Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2013-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationsettings.h
      7 *
      8 * created on: 2013feb07
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #ifndef __COLLATIONSETTINGS_H__
     13 #define __COLLATIONSETTINGS_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_COLLATION
     18 
     19 #include "unicode/ucol.h"
     20 #include "collation.h"
     21 #include "sharedobject.h"
     22 #include "umutex.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 /**
     27  * Collation settings/options/attributes.
     28  * These are the values that can be changed via API.
     29  */
     30 struct U_I18N_API CollationSettings : public SharedObject {
     31     /**
     32      * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
     33      */
     34     static const int32_t CHECK_FCD = 1;
     35     /**
     36      * Options bit 1: Numeric collation.
     37      * Also known as CODAN = COllate Digits As Numbers.
     38      *
     39      * Treat digit sequences as numbers with CE sequences in numeric order,
     40      * rather than returning a normal CE for each digit.
     41      */
     42     static const int32_t NUMERIC = 2;
     43     /**
     44      * "Shifted" alternate handling, see ALTERNATE_MASK.
     45      */
     46     static const int32_t SHIFTED = 4;
     47     /**
     48      * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
     49      * Reserve values 8 and 0xc for shift-trimmed and blanked.
     50      */
     51     static const int32_t ALTERNATE_MASK = 0xc;
     52     /**
     53      * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
     54      */
     55     static const int32_t MAX_VARIABLE_SHIFT = 4;
     56     /** maxVariable options bit mask before shifting. */
     57     static const int32_t MAX_VARIABLE_MASK = 0x70;
     58     /** Options bit 7: Reserved/unused/0. */
     59     /**
     60      * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
     61      */
     62     static const int32_t UPPER_FIRST = 0x100;
     63     /**
     64      * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
     65      * unless case level is on (when they are *moved* into the separate case level).
     66      * By default, the case bits are removed from the tertiary weight (ignored).
     67      *
     68      * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
     69      * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
     70      */
     71     static const int32_t CASE_FIRST = 0x200;
     72     /**
     73      * Options bit mask for caseFirst and upperFirst, before shifting.
     74      * Same value as caseFirst==upperFirst.
     75      */
     76     static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
     77     /**
     78      * Options bit 10: Insert the case level between the secondary and tertiary levels.
     79      */
     80     static const int32_t CASE_LEVEL = 0x400;
     81     /**
     82      * Options bit 11: Compare secondary weights backwards. ("French secondary")
     83      */
     84     static const int32_t BACKWARD_SECONDARY = 0x800;
     85     /**
     86      * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
     87      * It is the top used bit field in the options. (No need to mask after shifting.)
     88      */
     89     static const int32_t STRENGTH_SHIFT = 12;
     90     /** Strength options bit mask before shifting. */
     91     static const int32_t STRENGTH_MASK = 0xf000;
     92 
     93     /** maxVariable values */
     94     enum MaxVariable {
     95         MAX_VAR_SPACE,
     96         MAX_VAR_PUNCT,
     97         MAX_VAR_SYMBOL,
     98         MAX_VAR_CURRENCY
     99     };
    100 
    101     CollationSettings()
    102             : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) |
    103                       (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)),
    104               variableTop(0),
    105               reorderTable(NULL),
    106               reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
    107               fastLatinOptions(-1) {}
    108 
    109     CollationSettings(const CollationSettings &other);
    110     virtual ~CollationSettings();
    111 
    112     UBool operator==(const CollationSettings &other) const;
    113 
    114     inline UBool operator!=(const CollationSettings &other) const {
    115         return !operator==(other);
    116     }
    117 
    118     int32_t hashCode() const;
    119 
    120     void resetReordering();
    121     void aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table);
    122     UBool setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]);
    123 
    124     void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
    125 
    126     static int32_t getStrength(int32_t options) {
    127         return options >> STRENGTH_SHIFT;
    128     }
    129 
    130     int32_t getStrength() const {
    131         return getStrength(options);
    132     }
    133 
    134     /** Sets the options bit for an on/off attribute. */
    135     void setFlag(int32_t bit, UColAttributeValue value,
    136                  int32_t defaultOptions, UErrorCode &errorCode);
    137 
    138     UColAttributeValue getFlag(int32_t bit) const {
    139         return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF;
    140     }
    141 
    142     void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode);
    143 
    144     UColAttributeValue getCaseFirst() const {
    145         int32_t option = options & CASE_FIRST_AND_UPPER_MASK;
    146         return (option == 0) ? UCOL_OFF :
    147                 (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST;
    148     }
    149 
    150     void setAlternateHandling(UColAttributeValue value,
    151                               int32_t defaultOptions, UErrorCode &errorCode);
    152 
    153     UColAttributeValue getAlternateHandling() const {
    154         return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHIFTED;
    155     }
    156 
    157     void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
    158 
    159     MaxVariable getMaxVariable() const {
    160         return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT);
    161     }
    162 
    163     /**
    164      * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
    165      */
    166     static inline UBool isTertiaryWithCaseBits(int32_t options) {
    167         return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
    168     }
    169     static uint32_t getTertiaryMask(int32_t options) {
    170         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
    171         return isTertiaryWithCaseBits(options) ?
    172                 Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MASK;
    173     }
    174 
    175     static UBool sortsTertiaryUpperCaseFirst(int32_t options) {
    176         // On tertiary level, consider case bits and sort uppercase first
    177         // if caseLevel is off and caseFirst==upperFirst.
    178         return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
    179     }
    180 
    181     inline UBool dontCheckFCD() const {
    182         return (options & CHECK_FCD) == 0;
    183     }
    184 
    185     inline UBool hasBackwardSecondary() const {
    186         return (options & BACKWARD_SECONDARY) != 0;
    187     }
    188 
    189     inline UBool isNumeric() const {
    190         return (options & NUMERIC) != 0;
    191     }
    192 
    193     /** CHECK_FCD etc. */
    194     int32_t options;
    195     /** Variable-top primary weight. */
    196     uint32_t variableTop;
    197     /** 256-byte table for reordering permutation of primary lead bytes; NULL if no reordering. */
    198     const uint8_t *reorderTable;
    199     /** Array of reorder codes; ignored if reorderCodesLength == 0. */
    200     const int32_t *reorderCodes;
    201     /** Number of reorder codes; 0 if no reordering. */
    202     int32_t reorderCodesLength;
    203     /**
    204      * Capacity of reorderCodes.
    205      * If 0, then the table and codes are aliases.
    206      * Otherwise, this object owns the memory via the reorderCodes pointer;
    207      * the table and the codes are in the same memory block, with the codes first.
    208      */
    209     int32_t reorderCodesCapacity;
    210 
    211     /** Options for CollationFastLatin. Negative if disabled. */
    212     int32_t fastLatinOptions;
    213     uint16_t fastLatinPrimaries[0x180];
    214 };
    215 
    216 U_NAMESPACE_END
    217 
    218 #endif  // !UCONFIG_NO_COLLATION
    219 #endif  // __COLLATIONSETTINGS_H__
    220