Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1999-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   file name:  ustr_imp.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2001jan30
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __USTR_IMP_H__
     18 #define __USTR_IMP_H__
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uiter.h"
     22 #include "ucase.h"
     23 
     24 /** Simple declaration to avoid including unicode/ubrk.h. */
     25 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
     26 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
     27     typedef struct UBreakIterator UBreakIterator;
     28 #endif
     29 
     30 #ifndef U_COMPARE_IGNORE_CASE
     31 /* see also unorm.h */
     32 /**
     33  * Option bit for unorm_compare:
     34  * Perform case-insensitive comparison.
     35  */
     36 #define U_COMPARE_IGNORE_CASE       0x10000
     37 #endif
     38 
     39 /**
     40  * Internal option for unorm_cmpEquivFold() for strncmp style.
     41  * If set, checks for both string length and terminating NUL.
     42  */
     43 #define _STRNCMP_STYLE 0x1000
     44 
     45 /**
     46  * Compare two strings in code point order or code unit order.
     47  * Works in strcmp style (both lengths -1),
     48  * strncmp style (lengths equal and >=0, flag TRUE),
     49  * and memcmp/UnicodeString style (at least one length >=0).
     50  */
     51 U_CFUNC int32_t U_EXPORT2
     52 uprv_strCompare(const UChar *s1, int32_t length1,
     53                 const UChar *s2, int32_t length2,
     54                 UBool strncmpStyle, UBool codePointOrder);
     55 
     56 /**
     57  * Internal API, used by u_strcasecmp() etc.
     58  * Compare strings case-insensitively,
     59  * in code point order or code unit order.
     60  */
     61 U_CFUNC int32_t
     62 u_strcmpFold(const UChar *s1, int32_t length1,
     63              const UChar *s2, int32_t length2,
     64              uint32_t options,
     65              UErrorCode *pErrorCode);
     66 
     67 /**
     68  * Interanl API, used for detecting length of
     69  * shared prefix case-insensitively.
     70  * @param s1            input string 1
     71  * @param length1       length of string 1, or -1 (NULL terminated)
     72  * @param s2            input string 2
     73  * @param length2       length of string 2, or -1 (NULL terminated)
     74  * @param options       compare options
     75  * @param matchLen1     (output) length of partial prefix match in s1
     76  * @param matchLen2     (output) length of partial prefix match in s2
     77  * @param pErrorCode    receives error status
     78  */
     79 U_CAPI void
     80 u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
     81                              const UChar *s2, int32_t length2,
     82                              uint32_t options,
     83                              int32_t *matchLen1, int32_t *matchLen2,
     84                              UErrorCode *pErrorCode);
     85 
     86 /**
     87  * Are the Unicode properties loaded?
     88  * This must be used before internal functions are called that do
     89  * not perform this check.
     90  * Generate a debug assertion failure if data is not loaded.
     91  */
     92 U_CFUNC UBool
     93 uprv_haveProperties(UErrorCode *pErrorCode);
     94 
     95 /**
     96   * Load the Unicode property data.
     97   * Intended primarily for use from u_init().
     98   * Has no effect if property data is already loaded.
     99   * NOT thread safe.
    100   */
    101 /*U_CFUNC int8_t
    102 uprv_loadPropsData(UErrorCode *errorCode);*/
    103 
    104 /*
    105  * Internal string casing functions implementing
    106  * ustring.h/ustrcase.c and UnicodeString case mapping functions.
    107  */
    108 
    109 struct UCaseMap {
    110     const UCaseProps *csp;
    111 #if !UCONFIG_NO_BREAK_ITERATION
    112     UBreakIterator *iter;  /* We adopt the iterator, so we own it. */
    113 #endif
    114     char locale[32];
    115     int32_t locCache;
    116     uint32_t options;
    117 };
    118 
    119 #ifndef __UCASEMAP_H__
    120 typedef struct UCaseMap UCaseMap;
    121 #endif
    122 
    123 #if UCONFIG_NO_BREAK_ITERATION
    124 #   define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
    125 #else
    126 #   define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
    127 #endif
    128 
    129 U_CFUNC void
    130 ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
    131 
    132 #ifndef U_STRING_CASE_MAPPER_DEFINED
    133 #define U_STRING_CASE_MAPPER_DEFINED
    134 
    135 /**
    136  * String case mapping function type, used by ustrcase_map().
    137  * All error checking must be done.
    138  * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
    139  * src and dest must not overlap.
    140  */
    141 typedef int32_t U_CALLCONV
    142 UStringCaseMapper(const UCaseMap *csm,
    143                   UChar *dest, int32_t destCapacity,
    144                   const UChar *src, int32_t srcLength,
    145                   UErrorCode *pErrorCode);
    146 
    147 #endif
    148 
    149 /** Implements UStringCaseMapper. */
    150 U_CFUNC int32_t U_CALLCONV
    151 ustrcase_internalToLower(const UCaseMap *csm,
    152                          UChar *dest, int32_t destCapacity,
    153                          const UChar *src, int32_t srcLength,
    154                          UErrorCode *pErrorCode);
    155 
    156 /** Implements UStringCaseMapper. */
    157 U_CFUNC int32_t U_CALLCONV
    158 ustrcase_internalToUpper(const UCaseMap *csm,
    159                          UChar *dest, int32_t destCapacity,
    160                          const UChar *src, int32_t srcLength,
    161                          UErrorCode *pErrorCode);
    162 
    163 #if !UCONFIG_NO_BREAK_ITERATION
    164 
    165 /** Implements UStringCaseMapper. */
    166 U_CFUNC int32_t U_CALLCONV
    167 ustrcase_internalToTitle(const UCaseMap *csm,
    168                          UChar *dest, int32_t destCapacity,
    169                          const UChar *src, int32_t srcLength,
    170                          UErrorCode *pErrorCode);
    171 
    172 #endif
    173 
    174 /** Implements UStringCaseMapper. */
    175 U_CFUNC int32_t U_CALLCONV
    176 ustrcase_internalFold(const UCaseMap *csm,
    177                       UChar *dest, int32_t destCapacity,
    178                       const UChar *src, int32_t srcLength,
    179                       UErrorCode *pErrorCode);
    180 
    181 /**
    182  * Implements argument checking and buffer handling
    183  * for string case mapping as a common function.
    184  */
    185 U_CFUNC int32_t
    186 ustrcase_map(const UCaseMap *csm,
    187              UChar *dest, int32_t destCapacity,
    188              const UChar *src, int32_t srcLength,
    189              UStringCaseMapper *stringCaseMapper,
    190              UErrorCode *pErrorCode);
    191 
    192 /**
    193  * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
    194  * UTF-8 version of UStringCaseMapper.
    195  * All error checking must be done.
    196  * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
    197  * src and dest must not overlap.
    198  */
    199 typedef int32_t U_CALLCONV
    200 UTF8CaseMapper(const UCaseMap *csm,
    201                uint8_t *dest, int32_t destCapacity,
    202                const uint8_t *src, int32_t srcLength,
    203                UErrorCode *pErrorCode);
    204 
    205 /** Implements UTF8CaseMapper. */
    206 U_CFUNC int32_t U_CALLCONV
    207 ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
    208          uint8_t *dest, int32_t destCapacity,
    209          const uint8_t *src, int32_t srcLength,
    210          UErrorCode *pErrorCode);
    211 
    212 /**
    213  * Implements argument checking and buffer handling
    214  * for UTF-8 string case mapping as a common function.
    215  */
    216 U_CFUNC int32_t
    217 ucasemap_mapUTF8(const UCaseMap *csm,
    218                  uint8_t *dest, int32_t destCapacity,
    219                  const uint8_t *src, int32_t srcLength,
    220                  UTF8CaseMapper *stringCaseMapper,
    221                  UErrorCode *pErrorCode);
    222 
    223 #ifdef __cplusplus
    224 
    225 U_NAMESPACE_BEGIN
    226 namespace GreekUpper {
    227 
    228 // Data bits.
    229 static const uint32_t UPPER_MASK = 0x3ff;
    230 static const uint32_t HAS_VOWEL = 0x1000;
    231 static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
    232 static const uint32_t HAS_ACCENT = 0x4000;
    233 static const uint32_t HAS_DIALYTIKA = 0x8000;
    234 // Further bits during data building and processing, not stored in the data map.
    235 static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
    236 static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
    237 
    238 static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
    239 static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
    240         HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
    241 static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
    242 
    243 // State bits.
    244 static const uint32_t AFTER_CASED = 1;
    245 static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
    246 
    247 uint32_t getLetterData(UChar32 c);
    248 
    249 /**
    250  * Returns a non-zero value for each of the Greek combining diacritics
    251  * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
    252  * plus some perispomeni look-alikes.
    253  */
    254 uint32_t getDiacriticData(UChar32 c);
    255 
    256 }  // namespace GreekUpper
    257 U_NAMESPACE_END
    258 
    259 #endif  // __cplusplus
    260 
    261 U_CAPI int32_t U_EXPORT2
    262 ustr_hashUCharsN(const UChar *str, int32_t length);
    263 
    264 U_CAPI int32_t U_EXPORT2
    265 ustr_hashCharsN(const char *str, int32_t length);
    266 
    267 U_CAPI int32_t U_EXPORT2
    268 ustr_hashICharsN(const char *str, int32_t length);
    269 
    270 /**
    271  * NUL-terminate a UChar * string if possible.
    272  * If length  < destCapacity then NUL-terminate.
    273  * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
    274  * If length  > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
    275  *
    276  * @param dest Destination buffer, can be NULL if destCapacity==0.
    277  * @param destCapacity Number of UChars available at dest.
    278  * @param length Number of UChars that were (to be) written to dest.
    279  * @param pErrorCode ICU error code.
    280  * @return length
    281  */
    282 U_CAPI int32_t U_EXPORT2
    283 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
    284 
    285 /**
    286  * NUL-terminate a char * string if possible.
    287  * Same as u_terminateUChars() but for a different string type.
    288  */
    289 U_CAPI int32_t U_EXPORT2
    290 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
    291 
    292 /**
    293  * NUL-terminate a UChar32 * string if possible.
    294  * Same as u_terminateUChars() but for a different string type.
    295  */
    296 U_CAPI int32_t U_EXPORT2
    297 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
    298 
    299 /**
    300  * NUL-terminate a wchar_t * string if possible.
    301  * Same as u_terminateUChars() but for a different string type.
    302  */
    303 U_CAPI int32_t U_EXPORT2
    304 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
    305 
    306 #endif
    307