Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2004-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ucase.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004aug30
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Low-level Unicode character/string case mapping code.
     17 *   Much code moved here (and modified) from uchar.c.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uset.h"
     22 #include "unicode/udata.h" /* UDataInfo */
     23 #include "ucmndata.h" /* DataHeader */
     24 #include "udatamem.h"
     25 #include "umutex.h"
     26 #include "uassert.h"
     27 #include "cmemory.h"
     28 #include "utrie2.h"
     29 #include "ucase.h"
     30 #include "ucln_cmn.h"
     31 
     32 struct UCaseProps {
     33     UDataMemory *mem;
     34     const int32_t *indexes;
     35     const uint16_t *exceptions;
     36     const UChar *unfold;
     37 
     38     UTrie2 trie;
     39     uint8_t formatVersion[4];
     40 };
     41 
     42 /* ucase_props_data.c is machine-generated by gencase --csource */
     43 #include "ucase_props_data.c"
     44 
     45 /* UCaseProps singleton ----------------------------------------------------- */
     46 
     47 U_CAPI const UCaseProps * U_EXPORT2
     48 ucase_getSingleton() {
     49     return &ucase_props_singleton;
     50 }
     51 
     52 /* set of property starts for UnicodeSet ------------------------------------ */
     53 
     54 static UBool U_CALLCONV
     55 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
     56     /* add the start code point to the USet */
     57     const USetAdder *sa=(const USetAdder *)context;
     58     sa->add(sa->set, start);
     59     return TRUE;
     60 }
     61 
     62 U_CFUNC void U_EXPORT2
     63 ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) {
     64     if(U_FAILURE(*pErrorCode)) {
     65         return;
     66     }
     67 
     68     /* add the start code point of each same-value range of the trie */
     69     utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa);
     70 
     71     /* add code points with hardcoded properties, plus the ones following them */
     72 
     73     /* (none right now, see comment below) */
     74 
     75     /*
     76      * Omit code points with hardcoded specialcasing properties
     77      * because we do not build property UnicodeSets for them right now.
     78      */
     79 }
     80 
     81 /* data access primitives --------------------------------------------------- */
     82 
     83 #define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
     84 
     85 #define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
     86 
     87 /* number of bits in an 8-bit integer value */
     88 static const uint8_t flagsOffset[256]={
     89     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
     90     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     91     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     92     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     93     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     94     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     95     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     96     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
     97     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     98     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     99     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    100     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    101     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    102     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    103     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    104     4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
    105 };
    106 
    107 #define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
    108 #define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
    109 
    110 /*
    111  * Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
    112  *
    113  * @param excWord (in) initial exceptions word
    114  * @param idx (in) desired slot index
    115  * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
    116  *               moved to the last uint16_t of the value, use +1 for beginning of next slot
    117  * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
    118  */
    119 #define GET_SLOT_VALUE(excWord, idx, pExc16, value) \
    120     if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
    121         (pExc16)+=SLOT_OFFSET(excWord, idx); \
    122         (value)=*pExc16; \
    123     } else { \
    124         (pExc16)+=2*SLOT_OFFSET(excWord, idx); \
    125         (value)=*pExc16++; \
    126         (value)=((value)<<16)|*pExc16; \
    127     }
    128 
    129 /* simple case mappings ----------------------------------------------------- */
    130 
    131 U_CAPI UChar32 U_EXPORT2
    132 ucase_tolower(const UCaseProps *csp, UChar32 c) {
    133     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    134     if(!PROPS_HAS_EXCEPTION(props)) {
    135         if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
    136             c+=UCASE_GET_DELTA(props);
    137         }
    138     } else {
    139         const uint16_t *pe=GET_EXCEPTIONS(csp, props);
    140         uint16_t excWord=*pe++;
    141         if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
    142             GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
    143         }
    144     }
    145     return c;
    146 }
    147 
    148 U_CAPI UChar32 U_EXPORT2
    149 ucase_toupper(const UCaseProps *csp, UChar32 c) {
    150     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    151     if(!PROPS_HAS_EXCEPTION(props)) {
    152         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
    153             c+=UCASE_GET_DELTA(props);
    154         }
    155     } else {
    156         const uint16_t *pe=GET_EXCEPTIONS(csp, props);
    157         uint16_t excWord=*pe++;
    158         if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
    159             GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
    160         }
    161     }
    162     return c;
    163 }
    164 
    165 U_CAPI UChar32 U_EXPORT2
    166 ucase_totitle(const UCaseProps *csp, UChar32 c) {
    167     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    168     if(!PROPS_HAS_EXCEPTION(props)) {
    169         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
    170             c+=UCASE_GET_DELTA(props);
    171         }
    172     } else {
    173         const uint16_t *pe=GET_EXCEPTIONS(csp, props);
    174         uint16_t excWord=*pe++;
    175         int32_t idx;
    176         if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
    177             idx=UCASE_EXC_TITLE;
    178         } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
    179             idx=UCASE_EXC_UPPER;
    180         } else {
    181             return c;
    182         }
    183         GET_SLOT_VALUE(excWord, idx, pe, c);
    184     }
    185     return c;
    186 }
    187 
    188 static const UChar iDot[2] = { 0x69, 0x307 };
    189 static const UChar jDot[2] = { 0x6a, 0x307 };
    190 static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
    191 static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
    192 static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
    193 static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
    194 
    195 
    196 U_CFUNC void U_EXPORT2
    197 ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
    198     uint16_t props;
    199 
    200     /*
    201      * Hardcode the case closure of i and its relatives and ignore the
    202      * data file data for these characters.
    203      * The Turkic dotless i and dotted I with their case mapping conditions
    204      * and case folding option make the related characters behave specially.
    205      * This code matches their closure behavior to their case folding behavior.
    206      */
    207 
    208     switch(c) {
    209     case 0x49:
    210         /* regular i and I are in one equivalence class */
    211         sa->add(sa->set, 0x69);
    212         return;
    213     case 0x69:
    214         sa->add(sa->set, 0x49);
    215         return;
    216     case 0x130:
    217         /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
    218         sa->addString(sa->set, iDot, 2);
    219         return;
    220     case 0x131:
    221         /* dotless i is in a class by itself */
    222         return;
    223     default:
    224         /* otherwise use the data file data */
    225         break;
    226     }
    227 
    228     props=UTRIE2_GET16(&csp->trie, c);
    229     if(!PROPS_HAS_EXCEPTION(props)) {
    230         if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
    231             /* add the one simple case mapping, no matter what type it is */
    232             int32_t delta=UCASE_GET_DELTA(props);
    233             if(delta!=0) {
    234                 sa->add(sa->set, c+delta);
    235             }
    236         }
    237     } else {
    238         /*
    239          * c has exceptions, so there may be multiple simple and/or
    240          * full case mappings. Add them all.
    241          */
    242         const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props);
    243         const UChar *closure;
    244         uint16_t excWord=*pe++;
    245         int32_t idx, closureLength, fullLength, length;
    246 
    247         pe0=pe;
    248 
    249         /* add all simple case mappings */
    250         for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
    251             if(HAS_SLOT(excWord, idx)) {
    252                 pe=pe0;
    253                 GET_SLOT_VALUE(excWord, idx, pe, c);
    254                 sa->add(sa->set, c);
    255             }
    256         }
    257 
    258         /* get the closure string pointer & length */
    259         if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
    260             pe=pe0;
    261             GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
    262             closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
    263             closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
    264         } else {
    265             closureLength=0;
    266             closure=NULL;
    267         }
    268 
    269         /* add the full case folding */
    270         if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
    271             pe=pe0;
    272             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
    273 
    274             /* start of full case mapping strings */
    275             ++pe;
    276 
    277             fullLength&=0xffff; /* bits 16 and higher are reserved */
    278 
    279             /* skip the lowercase result string */
    280             pe+=fullLength&UCASE_FULL_LOWER;
    281             fullLength>>=4;
    282 
    283             /* add the full case folding string */
    284             length=fullLength&0xf;
    285             if(length!=0) {
    286                 sa->addString(sa->set, (const UChar *)pe, length);
    287                 pe+=length;
    288             }
    289 
    290             /* skip the uppercase and titlecase strings */
    291             fullLength>>=4;
    292             pe+=fullLength&0xf;
    293             fullLength>>=4;
    294             pe+=fullLength;
    295 
    296             closure=(const UChar *)pe; /* behind full case mappings */
    297         }
    298 
    299         /* add each code point in the closure string */
    300         for(idx=0; idx<closureLength;) {
    301             U16_NEXT_UNSAFE(closure, idx, c);
    302             sa->add(sa->set, c);
    303         }
    304     }
    305 }
    306 
    307 /*
    308  * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
    309  * must be length>0 and max>0 and length<=max
    310  */
    311 static U_INLINE int32_t
    312 strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
    313     int32_t c1, c2;
    314 
    315     max-=length; /* we require length<=max, so no need to decrement max in the loop */
    316     do {
    317         c1=*s++;
    318         c2=*t++;
    319         if(c2==0) {
    320             return 1; /* reached the end of t but not of s */
    321         }
    322         c1-=c2;
    323         if(c1!=0) {
    324             return c1; /* return difference result */
    325         }
    326     } while(--length>0);
    327     /* ends with length==0 */
    328 
    329     if(max==0 || *t==0) {
    330         return 0; /* equal to length of both strings */
    331     } else {
    332         return -max; /* return lengh difference */
    333     }
    334 }
    335 
    336 U_CFUNC UBool U_EXPORT2
    337 ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) {
    338     const UChar *unfold, *p;
    339     int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
    340 
    341     if(csp->unfold==NULL || s==NULL) {
    342         return FALSE; /* no reverse case folding data, or no string */
    343     }
    344     if(length<=1) {
    345         /* the string is too short to find any match */
    346         /*
    347          * more precise would be:
    348          * if(!u_strHasMoreChar32Than(s, length, 1))
    349          * but this does not make much practical difference because
    350          * a single supplementary code point would just not be found
    351          */
    352         return FALSE;
    353     }
    354 
    355     unfold=csp->unfold;
    356     unfoldRows=unfold[UCASE_UNFOLD_ROWS];
    357     unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
    358     unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
    359     unfold+=unfoldRowWidth;
    360 
    361     if(length>unfoldStringWidth) {
    362         /* the string is too long to find any match */
    363         return FALSE;
    364     }
    365 
    366     /* do a binary search for the string */
    367     start=0;
    368     limit=unfoldRows;
    369     while(start<limit) {
    370         i=(start+limit)/2;
    371         p=unfold+(i*unfoldRowWidth);
    372         result=strcmpMax(s, length, p, unfoldStringWidth);
    373 
    374         if(result==0) {
    375             /* found the string: add each code point, and its case closure */
    376             UChar32 c;
    377 
    378             for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
    379                 U16_NEXT_UNSAFE(p, i, c);
    380                 sa->add(sa->set, c);
    381                 ucase_addCaseClosure(csp, c, sa);
    382             }
    383             return TRUE;
    384         } else if(result<0) {
    385             limit=i;
    386         } else /* result>0 */ {
    387             start=i+1;
    388         }
    389     }
    390 
    391     return FALSE; /* string not found */
    392 }
    393 
    394 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
    395 U_CAPI int32_t U_EXPORT2
    396 ucase_getType(const UCaseProps *csp, UChar32 c) {
    397     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    398     return UCASE_GET_TYPE(props);
    399 }
    400 
    401 /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
    402 U_CAPI int32_t U_EXPORT2
    403 ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) {
    404     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    405     int32_t type=UCASE_GET_TYPE(props);
    406     if(props&UCASE_EXCEPTION) {
    407         const uint16_t *pe=GET_EXCEPTIONS(csp, props);
    408         if(*pe&UCASE_EXC_CASE_IGNORABLE) {
    409             type|=4;
    410         }
    411     } else if(type==UCASE_NONE && (props&UCASE_CASE_IGNORABLE)) {
    412         type|=4;
    413     }
    414     return type;
    415 }
    416 
    417 /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
    418 static U_INLINE int32_t
    419 getDotType(const UCaseProps *csp, UChar32 c) {
    420     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    421     if(!PROPS_HAS_EXCEPTION(props)) {
    422         return props&UCASE_DOT_MASK;
    423     } else {
    424         const uint16_t *pe=GET_EXCEPTIONS(csp, props);
    425         return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
    426     }
    427 }
    428 
    429 U_CAPI UBool U_EXPORT2
    430 ucase_isSoftDotted(const UCaseProps *csp, UChar32 c) {
    431     return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED);
    432 }
    433 
    434 U_CAPI UBool U_EXPORT2
    435 ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) {
    436     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    437     return (UBool)((props&UCASE_SENSITIVE)!=0);
    438 }
    439 
    440 /* string casing ------------------------------------------------------------ */
    441 
    442 /*
    443  * These internal functions form the core of string case mappings.
    444  * They map single code points to result code points or strings and take
    445  * all necessary conditions (context, locale ID, options) into account.
    446  *
    447  * They do not iterate over the source or write to the destination
    448  * so that the same functions are useful for non-standard string storage,
    449  * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
    450  * For the same reason, the "surrounding text" context is passed in as a
    451  * UCaseContextIterator which does not make any assumptions about
    452  * the underlying storage.
    453  *
    454  * This section contains helper functions that check for conditions
    455  * in the input text surrounding the current code point
    456  * according to SpecialCasing.txt.
    457  *
    458  * Each helper function gets the index
    459  * - after the current code point if it looks at following text
    460  * - before the current code point if it looks at preceding text
    461  *
    462  * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
    463  *
    464  * Final_Sigma
    465  *   C is preceded by a sequence consisting of
    466  *     a cased letter and a case-ignorable sequence,
    467  *   and C is not followed by a sequence consisting of
    468  *     an ignorable sequence and then a cased letter.
    469  *
    470  * More_Above
    471  *   C is followed by one or more characters of combining class 230 (ABOVE)
    472  *   in the combining character sequence.
    473  *
    474  * After_Soft_Dotted
    475  *   The last preceding character with combining class of zero before C
    476  *   was Soft_Dotted,
    477  *   and there is no intervening combining character class 230 (ABOVE).
    478  *
    479  * Before_Dot
    480  *   C is followed by combining dot above (U+0307).
    481  *   Any sequence of characters with a combining class that is neither 0 nor 230
    482  *   may intervene between the current character and the combining dot above.
    483  *
    484  * The erratum from 2002-10-31 adds the condition
    485  *
    486  * After_I
    487  *   The last preceding base character was an uppercase I, and there is no
    488  *   intervening combining character class 230 (ABOVE).
    489  *
    490  *   (See Jitterbug 2344 and the comments on After_I below.)
    491  *
    492  * Helper definitions in Unicode 3.2 UAX 21:
    493  *
    494  * D1. A character C is defined to be cased
    495  *     if it meets any of the following criteria:
    496  *
    497  *   - The general category of C is Titlecase Letter (Lt)
    498  *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
    499  *   - Given D = NFD(C), then it is not the case that:
    500  *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
    501  *     (This third criterium does not add any characters to the list
    502  *      for Unicode 3.2. Ignored.)
    503  *
    504  * D2. A character C is defined to be case-ignorable
    505  *     if it meets either of the following criteria:
    506  *
    507  *   - The general category of C is
    508  *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
    509  *     Letter Modifier (Lm), or Symbol Modifier (Sk)
    510  *   - C is one of the following characters
    511  *     U+0027 APOSTROPHE
    512  *     U+00AD SOFT HYPHEN (SHY)
    513  *     U+2019 RIGHT SINGLE QUOTATION MARK
    514  *            (the preferred character for apostrophe)
    515  *
    516  * D3. A case-ignorable sequence is a sequence of
    517  *     zero or more case-ignorable characters.
    518  */
    519 
    520 #define is_a(c) ((c)=='a' || (c)=='A')
    521 #define is_d(c) ((c)=='d' || (c)=='D')
    522 #define is_e(c) ((c)=='e' || (c)=='E')
    523 #define is_i(c) ((c)=='i' || (c)=='I')
    524 #define is_l(c) ((c)=='l' || (c)=='L')
    525 #define is_n(c) ((c)=='n' || (c)=='N')
    526 #define is_r(c) ((c)=='r' || (c)=='R')
    527 #define is_t(c) ((c)=='t' || (c)=='T')
    528 #define is_u(c) ((c)=='u' || (c)=='U')
    529 #define is_z(c) ((c)=='z' || (c)=='Z')
    530 
    531 /* separator? */
    532 #define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
    533 
    534 /**
    535  * Requires non-NULL locale ID but otherwise does the equivalent of
    536  * checking for language codes as if uloc_getLanguage() were called:
    537  * Accepts both 2- and 3-letter codes and accepts case variants.
    538  */
    539 U_CFUNC int32_t
    540 ucase_getCaseLocale(const char *locale, int32_t *locCache) {
    541     int32_t result;
    542     char c;
    543 
    544     if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) {
    545         return result;
    546     }
    547 
    548     result=UCASE_LOC_ROOT;
    549 
    550     /*
    551      * This function used to use uloc_getLanguage(), but the current code
    552      * removes the dependency of this low-level code on uloc implementation code
    553      * and is faster because not the whole locale ID has to be
    554      * examined and copied/transformed.
    555      *
    556      * Because this code does not want to depend on uloc, the caller must
    557      * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
    558      */
    559     c=*locale++;
    560     if(is_t(c)) {
    561         /* tr or tur? */
    562         c=*locale++;
    563         if(is_u(c)) {
    564             c=*locale++;
    565         }
    566         if(is_r(c)) {
    567             c=*locale;
    568             if(is_sep(c)) {
    569                 result=UCASE_LOC_TURKISH;
    570             }
    571         }
    572     } else if(is_a(c)) {
    573         /* az or aze? */
    574         c=*locale++;
    575         if(is_z(c)) {
    576             c=*locale++;
    577             if(is_e(c)) {
    578                 c=*locale;
    579             }
    580             if(is_sep(c)) {
    581                 result=UCASE_LOC_TURKISH;
    582             }
    583         }
    584     } else if(is_l(c)) {
    585         /* lt or lit? */
    586         c=*locale++;
    587         if(is_i(c)) {
    588             c=*locale++;
    589         }
    590         if(is_t(c)) {
    591             c=*locale;
    592             if(is_sep(c)) {
    593                 result=UCASE_LOC_LITHUANIAN;
    594             }
    595         }
    596     } else if(is_n(c)) {
    597         /* nl or nld? */
    598         c=*locale++;
    599         if(is_l(c)) {
    600             c=*locale++;
    601             if(is_d(c)) {
    602                 c=*locale;
    603             }
    604             if(is_sep(c)) {
    605                 result=UCASE_LOC_DUTCH;
    606             }
    607         }
    608     }
    609 
    610     if(locCache!=NULL) {
    611         *locCache=result;
    612     }
    613     return result;
    614 }
    615 
    616 /*
    617  * Is followed by
    618  *   {case-ignorable}* cased
    619  * ?
    620  * (dir determines looking forward/backward)
    621  * If a character is case-ignorable, it is skipped regardless of whether
    622  * it is also cased or not.
    623  */
    624 static UBool
    625 isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) {
    626     UChar32 c;
    627 
    628     if(iter==NULL) {
    629         return FALSE;
    630     }
    631 
    632     for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
    633         int32_t type=ucase_getTypeOrIgnorable(csp, c);
    634         if(type&4) {
    635             /* case-ignorable, continue with the loop */
    636         } else if(type!=UCASE_NONE) {
    637             return TRUE; /* followed by cased letter */
    638         } else {
    639             return FALSE; /* uncased and not case-ignorable */
    640         }
    641     }
    642 
    643     return FALSE; /* not followed by cased letter */
    644 }
    645 
    646 /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
    647 static UBool
    648 isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
    649     UChar32 c;
    650     int32_t dotType;
    651     int8_t dir;
    652 
    653     if(iter==NULL) {
    654         return FALSE;
    655     }
    656 
    657     for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
    658         dotType=getDotType(csp, c);
    659         if(dotType==UCASE_SOFT_DOTTED) {
    660             return TRUE; /* preceded by TYPE_i */
    661         } else if(dotType!=UCASE_OTHER_ACCENT) {
    662             return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
    663         }
    664     }
    665 
    666     return FALSE; /* not preceded by TYPE_i */
    667 }
    668 
    669 /*
    670  * See Jitterbug 2344:
    671  * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
    672  * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
    673  * we made those releases compatible with Unicode 3.2 which had not fixed
    674  * a related bug in SpecialCasing.txt.
    675  *
    676  * From the Jitterbug 2344 text:
    677  * ... this bug is listed as a Unicode erratum
    678  * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
    679  * <quote>
    680  * There are two errors in SpecialCasing.txt.
    681  * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
    682  * 2. An incorrect context definition. Correct as follows:
    683  * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
    684  * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
    685  * ---
    686  * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
    687  * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
    688  * where the context After_I is defined as:
    689  * The last preceding base character was an uppercase I, and there is no
    690  * intervening combining character class 230 (ABOVE).
    691  * </quote>
    692  *
    693  * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
    694  *
    695  * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
    696  * # This matches the behavior of the canonically equivalent I-dot_above
    697  *
    698  * See also the description in this place in older versions of uchar.c (revision 1.100).
    699  *
    700  * Markus W. Scherer 2003-feb-15
    701  */
    702 
    703 /* Is preceded by base character 'I' with no intervening cc=230 ? */
    704 static UBool
    705 isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
    706     UChar32 c;
    707     int32_t dotType;
    708     int8_t dir;
    709 
    710     if(iter==NULL) {
    711         return FALSE;
    712     }
    713 
    714     for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
    715         if(c==0x49) {
    716             return TRUE; /* preceded by I */
    717         }
    718         dotType=getDotType(csp, c);
    719         if(dotType!=UCASE_OTHER_ACCENT) {
    720             return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
    721         }
    722     }
    723 
    724     return FALSE; /* not preceded by I */
    725 }
    726 
    727 /* Is followed by one or more cc==230 ? */
    728 static UBool
    729 isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
    730     UChar32 c;
    731     int32_t dotType;
    732     int8_t dir;
    733 
    734     if(iter==NULL) {
    735         return FALSE;
    736     }
    737 
    738     for(dir=1; (c=iter(context, dir))>=0; dir=0) {
    739         dotType=getDotType(csp, c);
    740         if(dotType==UCASE_ABOVE) {
    741             return TRUE; /* at least one cc==230 following */
    742         } else if(dotType!=UCASE_OTHER_ACCENT) {
    743             return FALSE; /* next base character, no more cc==230 following */
    744         }
    745     }
    746 
    747     return FALSE; /* no more cc==230 following */
    748 }
    749 
    750 /* Is followed by a dot above (without cc==230 in between) ? */
    751 static UBool
    752 isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
    753     UChar32 c;
    754     int32_t dotType;
    755     int8_t dir;
    756 
    757     if(iter==NULL) {
    758         return FALSE;
    759     }
    760 
    761     for(dir=1; (c=iter(context, dir))>=0; dir=0) {
    762         if(c==0x307) {
    763             return TRUE;
    764         }
    765         dotType=getDotType(csp, c);
    766         if(dotType!=UCASE_OTHER_ACCENT) {
    767             return FALSE; /* next base character or cc==230 in between */
    768         }
    769     }
    770 
    771     return FALSE; /* no dot above following */
    772 }
    773 
    774 U_CAPI int32_t U_EXPORT2
    775 ucase_toFullLower(const UCaseProps *csp, UChar32 c,
    776                   UCaseContextIterator *iter, void *context,
    777                   const UChar **pString,
    778                   const char *locale, int32_t *locCache)
    779 {
    780     UChar32 result=c;
    781     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    782     if(!PROPS_HAS_EXCEPTION(props)) {
    783         if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
    784             result=c+UCASE_GET_DELTA(props);
    785         }
    786     } else {
    787         const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
    788         uint16_t excWord=*pe++;
    789         int32_t full;
    790 
    791         pe2=pe;
    792 
    793         if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
    794             /* use hardcoded conditions and mappings */
    795             int32_t loc=ucase_getCaseLocale(locale, locCache);
    796 
    797             /*
    798              * Test for conditional mappings first
    799              *   (otherwise the unconditional default mappings are always taken),
    800              * then test for characters that have unconditional mappings in SpecialCasing.txt,
    801              * then get the UnicodeData.txt mappings.
    802              */
    803             if( loc==UCASE_LOC_LITHUANIAN &&
    804                     /* base characters, find accents above */
    805                     (((c==0x49 || c==0x4a || c==0x12e) &&
    806                         isFollowedByMoreAbove(csp, iter, context)) ||
    807                     /* precomposed with accent above, no need to find one */
    808                     (c==0xcc || c==0xcd || c==0x128))
    809             ) {
    810                 /*
    811                     # Lithuanian
    812 
    813                     # Lithuanian retains the dot in a lowercase i when followed by accents.
    814 
    815                     # Introduce an explicit dot above when lowercasing capital I's and J's
    816                     # whenever there are more accents above.
    817                     # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
    818 
    819                     0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
    820                     004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
    821                     012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
    822                     00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
    823                     00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
    824                     0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
    825                  */
    826                 switch(c) {
    827                 case 0x49:  /* LATIN CAPITAL LETTER I */
    828                     *pString=iDot;
    829                     return 2;
    830                 case 0x4a:  /* LATIN CAPITAL LETTER J */
    831                     *pString=jDot;
    832                     return 2;
    833                 case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
    834                     *pString=iOgonekDot;
    835                     return 2;
    836                 case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
    837                     *pString=iDotGrave;
    838                     return 3;
    839                 case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
    840                     *pString=iDotAcute;
    841                     return 3;
    842                 case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
    843                     *pString=iDotTilde;
    844                     return 3;
    845                 default:
    846                     return 0; /* will not occur */
    847                 }
    848             /* # Turkish and Azeri */
    849             } else if(loc==UCASE_LOC_TURKISH && c==0x130) {
    850                 /*
    851                     # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
    852                     # The following rules handle those cases.
    853 
    854                     0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
    855                     0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
    856                  */
    857                 return 0x69;
    858             } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) {
    859                 /*
    860                     # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
    861                     # This matches the behavior of the canonically equivalent I-dot_above
    862 
    863                     0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
    864                     0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
    865                  */
    866                 return 0; /* remove the dot (continue without output) */
    867             } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) {
    868                 /*
    869                     # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
    870 
    871                     0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
    872                     0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
    873                  */
    874                 return 0x131;
    875             } else if(c==0x130) {
    876                 /*
    877                     # Preserve canonical equivalence for I with dot. Turkic is handled below.
    878 
    879                     0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
    880                  */
    881                 *pString=iDot;
    882                 return 2;
    883             } else if(  c==0x3a3 &&
    884                         !isFollowedByCasedLetter(csp, iter, context, 1) &&
    885                         isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */
    886             ) {
    887                 /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
    888                 /*
    889                     # Special case for final form of sigma
    890 
    891                     03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
    892                  */
    893                 return 0x3c2; /* greek small final sigma */
    894             } else {
    895                 /* no known conditional special case mapping, use a normal mapping */
    896             }
    897         } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
    898             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
    899             full&=UCASE_FULL_LOWER;
    900             if(full!=0) {
    901                 /* set the output pointer to the lowercase mapping */
    902                 *pString=pe+1;
    903 
    904                 /* return the string length */
    905                 return full;
    906             }
    907         }
    908 
    909         if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
    910             GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
    911         }
    912     }
    913 
    914     return (result==c) ? ~result : result;
    915 }
    916 
    917 /* internal */
    918 static int32_t
    919 toUpperOrTitle(const UCaseProps *csp, UChar32 c,
    920                UCaseContextIterator *iter, void *context,
    921                const UChar **pString,
    922                const char *locale, int32_t *locCache,
    923                UBool upperNotTitle) {
    924     UChar32 result=c;
    925     uint16_t props=UTRIE2_GET16(&csp->trie, c);
    926     if(!PROPS_HAS_EXCEPTION(props)) {
    927         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
    928             result=c+UCASE_GET_DELTA(props);
    929         }
    930     } else {
    931         const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
    932         uint16_t excWord=*pe++;
    933         int32_t full, idx;
    934 
    935         pe2=pe;
    936 
    937         if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
    938             /* use hardcoded conditions and mappings */
    939             int32_t loc=ucase_getCaseLocale(locale, locCache);
    940 
    941             if(loc==UCASE_LOC_TURKISH && c==0x69) {
    942                 /*
    943                     # Turkish and Azeri
    944 
    945                     # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
    946                     # The following rules handle those cases.
    947 
    948                     # When uppercasing, i turns into a dotted capital I
    949 
    950                     0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
    951                     0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
    952                 */
    953                 return 0x130;
    954             } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) {
    955                 /*
    956                     # Lithuanian
    957 
    958                     # Lithuanian retains the dot in a lowercase i when followed by accents.
    959 
    960                     # Remove DOT ABOVE after "i" with upper or titlecase
    961 
    962                     0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
    963                  */
    964                 return 0; /* remove the dot (continue without output) */
    965             } else {
    966                 /* no known conditional special case mapping, use a normal mapping */
    967             }
    968         } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
    969             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
    970 
    971             /* start of full case mapping strings */
    972             ++pe;
    973 
    974             /* skip the lowercase and case-folding result strings */
    975             pe+=full&UCASE_FULL_LOWER;
    976             full>>=4;
    977             pe+=full&0xf;
    978             full>>=4;
    979 
    980             if(upperNotTitle) {
    981                 full&=0xf;
    982             } else {
    983                 /* skip the uppercase result string */
    984                 pe+=full&0xf;
    985                 full=(full>>4)&0xf;
    986             }
    987 
    988             if(full!=0) {
    989                 /* set the output pointer to the result string */
    990                 *pString=pe;
    991 
    992                 /* return the string length */
    993                 return full;
    994             }
    995         }
    996 
    997         if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
    998             idx=UCASE_EXC_TITLE;
    999         } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
   1000             /* here, titlecase is same as uppercase */
   1001             idx=UCASE_EXC_UPPER;
   1002         } else {
   1003             return ~c;
   1004         }
   1005         GET_SLOT_VALUE(excWord, idx, pe2, result);
   1006     }
   1007 
   1008     return (result==c) ? ~result : result;
   1009 }
   1010 
   1011 U_CAPI int32_t U_EXPORT2
   1012 ucase_toFullUpper(const UCaseProps *csp, UChar32 c,
   1013                   UCaseContextIterator *iter, void *context,
   1014                   const UChar **pString,
   1015                   const char *locale, int32_t *locCache) {
   1016     return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE);
   1017 }
   1018 
   1019 U_CAPI int32_t U_EXPORT2
   1020 ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
   1021                   UCaseContextIterator *iter, void *context,
   1022                   const UChar **pString,
   1023                   const char *locale, int32_t *locCache) {
   1024     return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE);
   1025 }
   1026 
   1027 /* case folding ------------------------------------------------------------- */
   1028 
   1029 /*
   1030  * Case folding is similar to lowercasing.
   1031  * The result may be a simple mapping, i.e., a single code point, or
   1032  * a full mapping, i.e., a string.
   1033  * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
   1034  * then only the lowercase mapping is stored.
   1035  *
   1036  * Some special cases are hardcoded because their conditions cannot be
   1037  * parsed and processed from CaseFolding.txt.
   1038  *
   1039  * Unicode 3.2 CaseFolding.txt specifies for its status field:
   1040 
   1041 # C: common case folding, common mappings shared by both simple and full mappings.
   1042 # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
   1043 # S: simple case folding, mappings to single characters where different from F.
   1044 # T: special case for uppercase I and dotted uppercase I
   1045 #    - For non-Turkic languages, this mapping is normally not used.
   1046 #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
   1047 #
   1048 # Usage:
   1049 #  A. To do a simple case folding, use the mappings with status C + S.
   1050 #  B. To do a full case folding, use the mappings with status C + F.
   1051 #
   1052 #    The mappings with status T can be used or omitted depending on the desired case-folding
   1053 #    behavior. (The default option is to exclude them.)
   1054 
   1055  * Unicode 3.2 has 'T' mappings as follows:
   1056 
   1057 0049; T; 0131; # LATIN CAPITAL LETTER I
   1058 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
   1059 
   1060  * while the default mappings for these code points are:
   1061 
   1062 0049; C; 0069; # LATIN CAPITAL LETTER I
   1063 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
   1064 
   1065  * U+0130 has no simple case folding (simple-case-folds to itself).
   1066  */
   1067 
   1068 /* return the simple case folding mapping for c */
   1069 U_CAPI UChar32 U_EXPORT2
   1070 ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) {
   1071     uint16_t props=UTRIE2_GET16(&csp->trie, c);
   1072     if(!PROPS_HAS_EXCEPTION(props)) {
   1073         if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
   1074             c+=UCASE_GET_DELTA(props);
   1075         }
   1076     } else {
   1077         const uint16_t *pe=GET_EXCEPTIONS(csp, props);
   1078         uint16_t excWord=*pe++;
   1079         int32_t idx;
   1080         if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
   1081             /* special case folding mappings, hardcoded */
   1082             if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
   1083                 /* default mappings */
   1084                 if(c==0x49) {
   1085                     /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
   1086                     return 0x69;
   1087                 } else if(c==0x130) {
   1088                     /* no simple case folding for U+0130 */
   1089                     return c;
   1090                 }
   1091             } else {
   1092                 /* Turkic mappings */
   1093                 if(c==0x49) {
   1094                     /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
   1095                     return 0x131;
   1096                 } else if(c==0x130) {
   1097                     /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
   1098                     return 0x69;
   1099                 }
   1100             }
   1101         }
   1102         if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
   1103             idx=UCASE_EXC_FOLD;
   1104         } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
   1105             idx=UCASE_EXC_LOWER;
   1106         } else {
   1107             return c;
   1108         }
   1109         GET_SLOT_VALUE(excWord, idx, pe, c);
   1110     }
   1111     return c;
   1112 }
   1113 
   1114 /*
   1115  * Issue for canonical caseless match (UAX #21):
   1116  * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
   1117  * canonical equivalence, unlike default-option casefolding.
   1118  * For example, I-grave and I + grave fold to strings that are not canonically
   1119  * equivalent.
   1120  * For more details, see the comment in unorm_compare() in unorm.cpp
   1121  * and the intermediate prototype changes for Jitterbug 2021.
   1122  * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
   1123  *
   1124  * This did not get fixed because it appears that it is not possible to fix
   1125  * it for uppercase and lowercase characters (I-grave vs. i-grave)
   1126  * together in a way that they still fold to common result strings.
   1127  */
   1128 
   1129 U_CAPI int32_t U_EXPORT2
   1130 ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
   1131                     const UChar **pString,
   1132                     uint32_t options)
   1133 {
   1134     UChar32 result=c;
   1135     uint16_t props=UTRIE2_GET16(&csp->trie, c);
   1136     if(!PROPS_HAS_EXCEPTION(props)) {
   1137         if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
   1138             result=c+UCASE_GET_DELTA(props);
   1139         }
   1140     } else {
   1141         const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
   1142         uint16_t excWord=*pe++;
   1143         int32_t full, idx;
   1144 
   1145         pe2=pe;
   1146 
   1147         if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
   1148             /* use hardcoded conditions and mappings */
   1149             if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
   1150                 /* default mappings */
   1151                 if(c==0x49) {
   1152                     /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
   1153                     return 0x69;
   1154                 } else if(c==0x130) {
   1155                     /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
   1156                     *pString=iDot;
   1157                     return 2;
   1158                 }
   1159             } else {
   1160                 /* Turkic mappings */
   1161                 if(c==0x49) {
   1162                     /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
   1163                     return 0x131;
   1164                 } else if(c==0x130) {
   1165                     /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
   1166                     return 0x69;
   1167                 }
   1168             }
   1169         } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
   1170             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
   1171 
   1172             /* start of full case mapping strings */
   1173             ++pe;
   1174 
   1175             /* skip the lowercase result string */
   1176             pe+=full&UCASE_FULL_LOWER;
   1177             full=(full>>4)&0xf;
   1178 
   1179             if(full!=0) {
   1180                 /* set the output pointer to the result string */
   1181                 *pString=pe;
   1182 
   1183                 /* return the string length */
   1184                 return full;
   1185             }
   1186         }
   1187 
   1188         if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
   1189             idx=UCASE_EXC_FOLD;
   1190         } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
   1191             idx=UCASE_EXC_LOWER;
   1192         } else {
   1193             return ~c;
   1194         }
   1195         GET_SLOT_VALUE(excWord, idx, pe2, result);
   1196     }
   1197 
   1198     return (result==c) ? ~result : result;
   1199 }
   1200 
   1201 /* case mapping properties API ---------------------------------------------- */
   1202 
   1203 #define GET_CASE_PROPS() &ucase_props_singleton
   1204 
   1205 /* public API (see uchar.h) */
   1206 
   1207 U_CAPI UBool U_EXPORT2
   1208 u_isULowercase(UChar32 c) {
   1209     return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c));
   1210 }
   1211 
   1212 U_CAPI UBool U_EXPORT2
   1213 u_isUUppercase(UChar32 c) {
   1214     return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c));
   1215 }
   1216 
   1217 /* Transforms the Unicode character to its lower case equivalent.*/
   1218 U_CAPI UChar32 U_EXPORT2
   1219 u_tolower(UChar32 c) {
   1220     return ucase_tolower(GET_CASE_PROPS(), c);
   1221 }
   1222 
   1223 /* Transforms the Unicode character to its upper case equivalent.*/
   1224 U_CAPI UChar32 U_EXPORT2
   1225 u_toupper(UChar32 c) {
   1226     return ucase_toupper(GET_CASE_PROPS(), c);
   1227 }
   1228 
   1229 /* Transforms the Unicode character to its title case equivalent.*/
   1230 U_CAPI UChar32 U_EXPORT2
   1231 u_totitle(UChar32 c) {
   1232     return ucase_totitle(GET_CASE_PROPS(), c);
   1233 }
   1234 
   1235 /* return the simple case folding mapping for c */
   1236 U_CAPI UChar32 U_EXPORT2
   1237 u_foldCase(UChar32 c, uint32_t options) {
   1238     return ucase_fold(GET_CASE_PROPS(), c, options);
   1239 }
   1240 
   1241 U_CFUNC int32_t U_EXPORT2
   1242 ucase_hasBinaryProperty(UChar32 c, UProperty which) {
   1243     /* case mapping properties */
   1244     const UChar *resultString;
   1245     int32_t locCache;
   1246     const UCaseProps *csp=GET_CASE_PROPS();
   1247     if(csp==NULL) {
   1248         return FALSE;
   1249     }
   1250     switch(which) {
   1251     case UCHAR_LOWERCASE:
   1252         return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
   1253     case UCHAR_UPPERCASE:
   1254         return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
   1255     case UCHAR_SOFT_DOTTED:
   1256         return ucase_isSoftDotted(csp, c);
   1257     case UCHAR_CASE_SENSITIVE:
   1258         return ucase_isCaseSensitive(csp, c);
   1259     case UCHAR_CASED:
   1260         return (UBool)(UCASE_NONE!=ucase_getType(csp, c));
   1261     case UCHAR_CASE_IGNORABLE:
   1262         return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2);
   1263     /*
   1264      * Note: The following Changes_When_Xyz are defined as testing whether
   1265      * the NFD form of the input changes when Xyz-case-mapped.
   1266      * However, this simpler implementation of these properties,
   1267      * ignoring NFD, passes the tests.
   1268      * The implementation needs to be changed if the tests start failing.
   1269      * When that happens, optimizations should be used to work with the
   1270      * per-single-code point ucase_toFullXyz() functions unless
   1271      * the NFD form has more than one code point,
   1272      * and the property starts set needs to be the union of the
   1273      * start sets for normalization and case mappings.
   1274      */
   1275     case UCHAR_CHANGES_WHEN_LOWERCASED:
   1276         locCache=UCASE_LOC_ROOT;
   1277         return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
   1278     case UCHAR_CHANGES_WHEN_UPPERCASED:
   1279         locCache=UCASE_LOC_ROOT;
   1280         return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
   1281     case UCHAR_CHANGES_WHEN_TITLECASED:
   1282         locCache=UCASE_LOC_ROOT;
   1283         return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
   1284     /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
   1285     case UCHAR_CHANGES_WHEN_CASEMAPPED:
   1286         locCache=UCASE_LOC_ROOT;
   1287         return (UBool)(
   1288             ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
   1289             ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
   1290             ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
   1291     default:
   1292         return FALSE;
   1293     }
   1294 }
   1295