Home | History | Annotate | Download | only in unicode
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // stringoptions.h
      5 // created: 2017jun08 Markus W. Scherer
      6 
      7 #ifndef __STRINGOPTIONS_H__
      8 #define __STRINGOPTIONS_H__
      9 
     10 #include "unicode/utypes.h"
     11 
     12 /**
     13  * \file
     14  * \brief C API: Bit set option bit constants for various string and character processing functions.
     15  */
     16 
     17 /**
     18  * Option value for case folding: Use default mappings defined in CaseFolding.txt.
     19  *
     20  * @stable ICU 2.0
     21  */
     22 #define U_FOLD_CASE_DEFAULT 0
     23 
     24 /**
     25  * Option value for case folding:
     26  *
     27  * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
     28  * and dotless i appropriately for Turkic languages (tr, az).
     29  *
     30  * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
     31  * are to be included for default mappings and
     32  * excluded for the Turkic-specific mappings.
     33  *
     34  * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
     35  * are to be excluded for default mappings and
     36  * included for the Turkic-specific mappings.
     37  *
     38  * @stable ICU 2.0
     39  */
     40 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
     41 
     42 #ifndef U_HIDE_DRAFT_API
     43 
     44 /**
     45  * Titlecase the string as a whole rather than each word.
     46  * (Titlecase only the character at index 0, possibly adjusted.)
     47  * Option bits value for titlecasing APIs that take an options bit set.
     48  *
     49  * It is an error to specify multiple titlecasing iterator options together,
     50  * including both an options bit and an explicit BreakIterator.
     51  *
     52  * @see U_TITLECASE_ADJUST_TO_CASED
     53  * @draft ICU 60
     54  */
     55 #define U_TITLECASE_WHOLE_STRING 0x20
     56 
     57 /**
     58  * Titlecase sentences rather than words.
     59  * (Titlecase only the first character of each sentence, possibly adjusted.)
     60  * Option bits value for titlecasing APIs that take an options bit set.
     61  *
     62  * It is an error to specify multiple titlecasing iterator options together,
     63  * including both an options bit and an explicit BreakIterator.
     64  *
     65  * @see U_TITLECASE_ADJUST_TO_CASED
     66  * @draft ICU 60
     67  */
     68 #define U_TITLECASE_SENTENCES 0x40
     69 
     70 #endif  // U_HIDE_DRAFT_API
     71 
     72 /**
     73  * Do not lowercase non-initial parts of words when titlecasing.
     74  * Option bit for titlecasing APIs that take an options bit set.
     75  *
     76  * By default, titlecasing will titlecase the character at each
     77  * (possibly adjusted) BreakIterator index and
     78  * lowercase all other characters up to the next iterator index.
     79  * With this option, the other characters will not be modified.
     80  *
     81  * @see U_TITLECASE_ADJUST_TO_CASED
     82  * @see UnicodeString::toTitle
     83  * @see CaseMap::toTitle
     84  * @see ucasemap_setOptions
     85  * @see ucasemap_toTitle
     86  * @see ucasemap_utf8ToTitle
     87  * @stable ICU 3.8
     88  */
     89 #define U_TITLECASE_NO_LOWERCASE 0x100
     90 
     91 /**
     92  * Do not adjust the titlecasing BreakIterator indexes;
     93  * titlecase exactly the characters at breaks from the iterator.
     94  * Option bit for titlecasing APIs that take an options bit set.
     95  *
     96  * By default, titlecasing will take each break iterator index,
     97  * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
     98  * and titlecase that one.
     99  *
    100  * Other characters are lowercased.
    101  *
    102  * It is an error to specify multiple titlecasing adjustment options together.
    103  *
    104  * @see U_TITLECASE_ADJUST_TO_CASED
    105  * @see U_TITLECASE_NO_LOWERCASE
    106  * @see UnicodeString::toTitle
    107  * @see CaseMap::toTitle
    108  * @see ucasemap_setOptions
    109  * @see ucasemap_toTitle
    110  * @see ucasemap_utf8ToTitle
    111  * @stable ICU 3.8
    112  */
    113 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
    114 
    115 #ifndef U_HIDE_DRAFT_API
    116 
    117 /**
    118  * Adjust each titlecasing BreakIterator index to the next cased character.
    119  * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
    120  * Option bit for titlecasing APIs that take an options bit set.
    121  *
    122  * This used to be the default index adjustment in ICU.
    123  * Since ICU 60, the default index adjustment is to the next character that is
    124  * a letter, number, symbol, or private use code point.
    125  * (Uncased modifier letters are skipped.)
    126  * The difference in behavior is small for word titlecasing,
    127  * but the new adjustment is much better for whole-string and sentence titlecasing:
    128  * It yields "49ers" and "(abc)" instead of "49Ers" and "(Abc)".
    129  *
    130  * It is an error to specify multiple titlecasing adjustment options together.
    131  *
    132  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
    133  * @draft ICU 60
    134  */
    135 #define U_TITLECASE_ADJUST_TO_CASED 0x400
    136 
    137 /**
    138  * Option for string transformation functions to not first reset the Edits object.
    139  * Used for example in some case-mapping and normalization functions.
    140  *
    141  * @see CaseMap
    142  * @see Edits
    143  * @see Normalizer2
    144  * @draft ICU 60
    145  */
    146 #define U_EDITS_NO_RESET 0x2000
    147 
    148 /**
    149  * Omit unchanged text when recording how source substrings
    150  * relate to changed and unchanged result substrings.
    151  * Used for example in some case-mapping and normalization functions.
    152  *
    153  * @see CaseMap
    154  * @see Edits
    155  * @see Normalizer2
    156  * @draft ICU 60
    157  */
    158 #define U_OMIT_UNCHANGED_TEXT 0x4000
    159 
    160 #endif  // U_HIDE_DRAFT_API
    161 
    162 /**
    163  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    164  * Compare strings in code point order instead of code unit order.
    165  * @stable ICU 2.2
    166  */
    167 #define U_COMPARE_CODE_POINT_ORDER  0x8000
    168 
    169 /**
    170  * Option bit for unorm_compare:
    171  * Perform case-insensitive comparison.
    172  * @stable ICU 2.2
    173  */
    174 #define U_COMPARE_IGNORE_CASE       0x10000
    175 
    176 /**
    177  * Option bit for unorm_compare:
    178  * Both input strings are assumed to fulfill FCD conditions.
    179  * @stable ICU 2.2
    180  */
    181 #define UNORM_INPUT_IS_FCD          0x20000
    182 
    183 // Related definitions elsewhere.
    184 // Options that are not meaningful in the same functions
    185 // can share the same bits.
    186 //
    187 // Public:
    188 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
    189 //
    190 // Internal: (may change or be removed)
    191 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
    192 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
    193 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
    194 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
    195 // ustr_imp.h #define _STRNCMP_STYLE 0x1000
    196 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000
    197 
    198 #endif  // __STRINGOPTIONS_H__
    199