Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  *
      7  *   Copyright (C) 2004-2015, International Business Machines
      8  *   Corporation and others.  All Rights Reserved.
      9  *
     10  *******************************************************************************
     11  *   file name:  UCaseProps.java
     12  *   encoding:   US-ASCII
     13  *   tab size:   8 (not used)
     14  *   indentation:4
     15  *
     16  *   created on: 2005jan29
     17  *   created by: Markus W. Scherer
     18  *
     19  *   Low-level Unicode character/string case mapping code.
     20  *   Java port of ucase.h/.c.
     21  */
     22 
     23 package android.icu.impl;
     24 
     25 import java.io.IOException;
     26 import java.nio.ByteBuffer;
     27 import java.util.Iterator;
     28 import java.util.Locale;
     29 
     30 import android.icu.lang.UCharacter;
     31 import android.icu.lang.UProperty;
     32 import android.icu.text.UTF16;
     33 import android.icu.text.UnicodeSet;
     34 import android.icu.util.ICUUncheckedIOException;
     35 import android.icu.util.ULocale;
     36 
     37 /**
     38  * @hide Only a subset of ICU is exposed in Android
     39  */
     40 public final class UCaseProps {
     41 
     42     // constructors etc. --------------------------------------------------- ***
     43 
     44     // port of ucase_openProps()
     45     private UCaseProps() throws IOException {
     46         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
     47         readData(bytes);
     48     }
     49 
     50     private final void readData(ByteBuffer bytes) throws IOException {
     51         // read the header
     52         ICUBinary.readHeader(bytes, FMT, new IsAcceptable());
     53 
     54         // read indexes[]
     55         int count=bytes.getInt();
     56         if(count<IX_TOP) {
     57             throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
     58         }
     59         indexes=new int[count];
     60 
     61         indexes[0]=count;
     62         for(int i=1; i<count; ++i) {
     63             indexes[i]=bytes.getInt();
     64         }
     65 
     66         // read the trie
     67         trie=Trie2_16.createFromSerialized(bytes);
     68         int expectedTrieLength=indexes[IX_TRIE_SIZE];
     69         int trieLength=trie.getSerializedLength();
     70         if(trieLength>expectedTrieLength) {
     71             throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
     72         }
     73         // skip padding after trie bytes
     74         ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength);
     75 
     76         // read exceptions[]
     77         count=indexes[IX_EXC_LENGTH];
     78         if(count>0) {
     79             exceptions=ICUBinary.getString(bytes, count, 0);
     80         }
     81 
     82         // read unfold[]
     83         count=indexes[IX_UNFOLD_LENGTH];
     84         if(count>0) {
     85             unfold=ICUBinary.getChars(bytes, count, 0);
     86         }
     87     }
     88 
     89     // implement ICUBinary.Authenticate
     90     private final static class IsAcceptable implements ICUBinary.Authenticate {
     91         @Override
     92         public boolean isDataVersionAcceptable(byte version[]) {
     93             return version[0]==3;
     94         }
     95     }
     96 
     97     // set of property starts for UnicodeSet ------------------------------- ***
     98 
     99     public final void addPropertyStarts(UnicodeSet set) {
    100         /* add the start code point of each same-value range of the trie */
    101         Iterator<Trie2.Range> trieIterator=trie.iterator();
    102         Trie2.Range range;
    103         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
    104             set.add(range.startCodePoint);
    105         }
    106 
    107         /* add code points with hardcoded properties, plus the ones following them */
    108 
    109         /* (none right now, see comment below) */
    110 
    111         /*
    112          * Omit code points with hardcoded specialcasing properties
    113          * because we do not build property UnicodeSets for them right now.
    114          */
    115     }
    116 
    117     // data access primitives ---------------------------------------------- ***
    118     private static final int getExceptionsOffset(int props) {
    119         return props>>EXC_SHIFT;
    120     }
    121 
    122     private static final boolean propsHasException(int props) {
    123         return (props&EXCEPTION)!=0;
    124     }
    125 
    126     /* number of bits in an 8-bit integer value */
    127     private static final byte flagsOffset[/*256*/]={
    128         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
    129         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    130         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    131         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    132         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    133         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    134         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    135         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    136         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    137         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    138         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    139         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    140         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    141         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    142         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    143         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
    144     };
    145 
    146     private static final boolean hasSlot(int flags, int index) {
    147         return (flags&(1<<index))!=0;
    148     }
    149     private static final byte slotOffset(int flags, int index) {
    150         return flagsOffset[flags&((1<<index)-1)];
    151     }
    152 
    153     /*
    154      * Get the value of an optional-value slot where hasSlot(excWord, index).
    155      *
    156      * @param excWord (in) initial exceptions word
    157      * @param index (in) desired slot index
    158      * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
    159      * @return bits 31..0: slot value
    160      *             63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
    161      */
    162     private final long getSlotValueAndOffset(int excWord, int index, int excOffset) {
    163         long value;
    164         if((excWord&EXC_DOUBLE_SLOTS)==0) {
    165             excOffset+=slotOffset(excWord, index);
    166             value=exceptions.charAt(excOffset);
    167         } else {
    168             excOffset+=2*slotOffset(excWord, index);
    169             value=exceptions.charAt(excOffset++);
    170             value=(value<<16)|exceptions.charAt(excOffset);
    171         }
    172         return value |((long)excOffset<<32);
    173     }
    174 
    175     /* same as getSlotValueAndOffset() but does not return the slot offset */
    176     private final int getSlotValue(int excWord, int index, int excOffset) {
    177         int value;
    178         if((excWord&EXC_DOUBLE_SLOTS)==0) {
    179             excOffset+=slotOffset(excWord, index);
    180             value=exceptions.charAt(excOffset);
    181         } else {
    182             excOffset+=2*slotOffset(excWord, index);
    183             value=exceptions.charAt(excOffset++);
    184             value=(value<<16)|exceptions.charAt(excOffset);
    185         }
    186         return value;
    187     }
    188 
    189     // simple case mappings ------------------------------------------------ ***
    190 
    191     public final int tolower(int c) {
    192         int props=trie.get(c);
    193         if(!propsHasException(props)) {
    194             if(getTypeFromProps(props)>=UPPER) {
    195                 c+=getDelta(props);
    196             }
    197         } else {
    198             int excOffset=getExceptionsOffset(props);
    199             int excWord=exceptions.charAt(excOffset++);
    200             if(hasSlot(excWord, EXC_LOWER)) {
    201                 c=getSlotValue(excWord, EXC_LOWER, excOffset);
    202             }
    203         }
    204         return c;
    205     }
    206 
    207     public final int toupper(int c) {
    208         int props=trie.get(c);
    209         if(!propsHasException(props)) {
    210             if(getTypeFromProps(props)==LOWER) {
    211                 c+=getDelta(props);
    212             }
    213         } else {
    214             int excOffset=getExceptionsOffset(props);
    215             int excWord=exceptions.charAt(excOffset++);
    216             if(hasSlot(excWord, EXC_UPPER)) {
    217                 c=getSlotValue(excWord, EXC_UPPER, excOffset);
    218             }
    219         }
    220         return c;
    221     }
    222 
    223     public final int totitle(int c) {
    224         int props=trie.get(c);
    225         if(!propsHasException(props)) {
    226             if(getTypeFromProps(props)==LOWER) {
    227                 c+=getDelta(props);
    228             }
    229         } else {
    230             int excOffset=getExceptionsOffset(props);
    231             int excWord=exceptions.charAt(excOffset++);
    232             int index;
    233             if(hasSlot(excWord, EXC_TITLE)) {
    234                 index=EXC_TITLE;
    235             } else if(hasSlot(excWord, EXC_UPPER)) {
    236                 index=EXC_UPPER;
    237             } else {
    238                 return c;
    239             }
    240             c=getSlotValue(excWord, index, excOffset);
    241         }
    242         return c;
    243     }
    244 
    245     /**
    246      * Adds all simple case mappings and the full case folding for c to sa,
    247      * and also adds special case closure mappings.
    248      * c itself is not added.
    249      * For example, the mappings
    250      * - for s include long s
    251      * - for sharp s include ss
    252      * - for k include the Kelvin sign
    253      */
    254     public final void addCaseClosure(int c, UnicodeSet set) {
    255         /*
    256          * Hardcode the case closure of i and its relatives and ignore the
    257          * data file data for these characters.
    258          * The Turkic dotless i and dotted I with their case mapping conditions
    259          * and case folding option make the related characters behave specially.
    260          * This code matches their closure behavior to their case folding behavior.
    261          */
    262 
    263         switch(c) {
    264         case 0x49:
    265             /* regular i and I are in one equivalence class */
    266             set.add(0x69);
    267             return;
    268         case 0x69:
    269             set.add(0x49);
    270             return;
    271         case 0x130:
    272             /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
    273             set.add(iDot);
    274             return;
    275         case 0x131:
    276             /* dotless i is in a class by itself */
    277             return;
    278         default:
    279             /* otherwise use the data file data */
    280             break;
    281         }
    282 
    283         int props=trie.get(c);
    284         if(!propsHasException(props)) {
    285             if(getTypeFromProps(props)!=NONE) {
    286                 /* add the one simple case mapping, no matter what type it is */
    287                 int delta=getDelta(props);
    288                 if(delta!=0) {
    289                     set.add(c+delta);
    290                 }
    291             }
    292         } else {
    293             /*
    294              * c has exceptions, so there may be multiple simple and/or
    295              * full case mappings. Add them all.
    296              */
    297             int excOffset0, excOffset=getExceptionsOffset(props);
    298             int closureOffset;
    299             int excWord=exceptions.charAt(excOffset++);
    300             int index, closureLength, fullLength, length;
    301 
    302             excOffset0=excOffset;
    303 
    304             /* add all simple case mappings */
    305             for(index=EXC_LOWER; index<=EXC_TITLE; ++index) {
    306                 if(hasSlot(excWord, index)) {
    307                     excOffset=excOffset0;
    308                     c=getSlotValue(excWord, index, excOffset);
    309                     set.add(c);
    310                 }
    311             }
    312 
    313             /* get the closure string pointer & length */
    314             if(hasSlot(excWord, EXC_CLOSURE)) {
    315                 excOffset=excOffset0;
    316                 long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
    317                 closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */
    318                 closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */
    319             } else {
    320                 closureLength=0;
    321                 closureOffset=0;
    322             }
    323 
    324             /* add the full case folding */
    325             if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
    326                 excOffset=excOffset0;
    327                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
    328                 fullLength=(int)value;
    329 
    330                 /* start of full case mapping strings */
    331                 excOffset=(int)(value>>32)+1;
    332 
    333                 fullLength&=0xffff; /* bits 16 and higher are reserved */
    334 
    335                 /* skip the lowercase result string */
    336                 excOffset+=fullLength&FULL_LOWER;
    337                 fullLength>>=4;
    338 
    339                 /* add the full case folding string */
    340                 length=fullLength&0xf;
    341                 if(length!=0) {
    342                     set.add(exceptions.substring(excOffset, excOffset+length));
    343                     excOffset+=length;
    344                 }
    345 
    346                 /* skip the uppercase and titlecase strings */
    347                 fullLength>>=4;
    348                 excOffset+=fullLength&0xf;
    349                 fullLength>>=4;
    350                 excOffset+=fullLength;
    351 
    352                 closureOffset=excOffset; /* behind full case mappings */
    353             }
    354 
    355             /* add each code point in the closure string */
    356             int limit=closureOffset+closureLength;
    357             for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
    358                 c=exceptions.codePointAt(index);
    359                 set.add(c);
    360             }
    361         }
    362     }
    363 
    364     /*
    365      * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated
    366      * must be s.length()>0 and max>0 and s.length()<=max
    367      */
    368     private final int strcmpMax(String s, int unfoldOffset, int max) {
    369         int i1, length, c1, c2;
    370 
    371         length=s.length();
    372         max-=length; /* we require length<=max, so no need to decrement max in the loop */
    373         i1=0;
    374         do {
    375             c1=s.charAt(i1++);
    376             c2=unfold[unfoldOffset++];
    377             if(c2==0) {
    378                 return 1; /* reached the end of t but not of s */
    379             }
    380             c1-=c2;
    381             if(c1!=0) {
    382                 return c1; /* return difference result */
    383             }
    384         } while(--length>0);
    385         /* ends with length==0 */
    386 
    387         if(max==0 || unfold[unfoldOffset]==0) {
    388             return 0; /* equal to length of both strings */
    389         } else {
    390             return -max; /* return lengh difference */
    391         }
    392     }
    393 
    394     /**
    395      * Maps the string to single code points and adds the associated case closure
    396      * mappings.
    397      * The string is mapped to code points if it is their full case folding string.
    398      * In other words, this performs a reverse full case folding and then
    399      * adds the case closure items of the resulting code points.
    400      * If the string is found and its closure applied, then
    401      * the string itself is added as well as part of its code points' closure.
    402      *
    403      * @return true if the string was found
    404      */
    405     public final boolean addStringCaseClosure(String s, UnicodeSet set) {
    406         int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;
    407 
    408         if(unfold==null || s==null) {
    409             return false; /* no reverse case folding data, or no string */
    410         }
    411         length=s.length();
    412         if(length<=1) {
    413             /* the string is too short to find any match */
    414             /*
    415              * more precise would be:
    416              * if(!u_strHasMoreChar32Than(s, length, 1))
    417              * but this does not make much practical difference because
    418              * a single supplementary code point would just not be found
    419              */
    420             return false;
    421         }
    422 
    423         unfoldRows=unfold[UNFOLD_ROWS];
    424         unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
    425         unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
    426         //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
    427 
    428         if(length>unfoldStringWidth) {
    429             /* the string is too long to find any match */
    430             return false;
    431         }
    432 
    433         /* do a binary search for the string */
    434         start=0;
    435         limit=unfoldRows;
    436         while(start<limit) {
    437             i=(start+limit)/2;
    438             unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
    439             result=strcmpMax(s, unfoldOffset, unfoldStringWidth);
    440 
    441             if(result==0) {
    442                 /* found the string: add each code point, and its case closure */
    443                 int c;
    444 
    445                 for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
    446                     c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
    447                     set.add(c);
    448                     addCaseClosure(c, set);
    449                 }
    450                 return true;
    451             } else if(result<0) {
    452                 limit=i;
    453             } else /* result>0 */ {
    454                 start=i+1;
    455             }
    456         }
    457 
    458         return false; /* string not found */
    459     }
    460 
    461     /** @return NONE, LOWER, UPPER, TITLE */
    462     public final int getType(int c) {
    463         return getTypeFromProps(trie.get(c));
    464     }
    465 
    466     /** @return like getType() but also sets IGNORABLE if c is case-ignorable */
    467     public final int getTypeOrIgnorable(int c) {
    468         return getTypeAndIgnorableFromProps(trie.get(c));
    469     }
    470 
    471     /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */
    472     public final int getDotType(int c) {
    473         int props=trie.get(c);
    474         if(!propsHasException(props)) {
    475             return props&DOT_MASK;
    476         } else {
    477             return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
    478         }
    479     }
    480 
    481     public final boolean isSoftDotted(int c) {
    482         return getDotType(c)==SOFT_DOTTED;
    483     }
    484 
    485     public final boolean isCaseSensitive(int c) {
    486         return (trie.get(c)&SENSITIVE)!=0;
    487     }
    488 
    489     // string casing ------------------------------------------------------- ***
    490 
    491     /*
    492      * These internal functions form the core of string case mappings.
    493      * They map single code points to result code points or strings and take
    494      * all necessary conditions (context, locale ID, options) into account.
    495      *
    496      * They do not iterate over the source or write to the destination
    497      * so that the same functions are useful for non-standard string storage,
    498      * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
    499      * For the same reason, the "surrounding text" context is passed in as a
    500      * ContextIterator which does not make any assumptions about
    501      * the underlying storage.
    502      *
    503      * This section contains helper functions that check for conditions
    504      * in the input text surrounding the current code point
    505      * according to SpecialCasing.txt.
    506      *
    507      * Each helper function gets the index
    508      * - after the current code point if it looks at following text
    509      * - before the current code point if it looks at preceding text
    510      *
    511      * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
    512      *
    513      * Final_Sigma
    514      *   C is preceded by a sequence consisting of
    515      *     a cased letter and a case-ignorable sequence,
    516      *   and C is not followed by a sequence consisting of
    517      *     an ignorable sequence and then a cased letter.
    518      *
    519      * More_Above
    520      *   C is followed by one or more characters of combining class 230 (ABOVE)
    521      *   in the combining character sequence.
    522      *
    523      * After_Soft_Dotted
    524      *   The last preceding character with combining class of zero before C
    525      *   was Soft_Dotted,
    526      *   and there is no intervening combining character class 230 (ABOVE).
    527      *
    528      * Before_Dot
    529      *   C is followed by combining dot above (U+0307).
    530      *   Any sequence of characters with a combining class that is neither 0 nor 230
    531      *   may intervene between the current character and the combining dot above.
    532      *
    533      * The erratum from 2002-10-31 adds the condition
    534      *
    535      * After_I
    536      *   The last preceding base character was an uppercase I, and there is no
    537      *   intervening combining character class 230 (ABOVE).
    538      *
    539      *   (See Jitterbug 2344 and the comments on After_I below.)
    540      *
    541      * Helper definitions in Unicode 3.2 UAX 21:
    542      *
    543      * D1. A character C is defined to be cased
    544      *     if it meets any of the following criteria:
    545      *
    546      *   - The general category of C is Titlecase Letter (Lt)
    547      *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
    548      *   - Given D = NFD(C), then it is not the case that:
    549      *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
    550      *     (This third criterium does not add any characters to the list
    551      *      for Unicode 3.2. Ignored.)
    552      *
    553      * D2. A character C is defined to be case-ignorable
    554      *     if it meets either of the following criteria:
    555      *
    556      *   - The general category of C is
    557      *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
    558      *     Letter Modifier (Lm), or Symbol Modifier (Sk)
    559      *   - C is one of the following characters
    560      *     U+0027 APOSTROPHE
    561      *     U+00AD SOFT HYPHEN (SHY)
    562      *     U+2019 RIGHT SINGLE QUOTATION MARK
    563      *            (the preferred character for apostrophe)
    564      *
    565      * D3. A case-ignorable sequence is a sequence of
    566      *     zero or more case-ignorable characters.
    567      */
    568 
    569     /**
    570      * Iterator for string case mappings, which need to look at the
    571      * context (surrounding text) of a given character for conditional mappings.
    572      *
    573      * The iterator only needs to go backward or forward away from the
    574      * character in question. It does not use any indexes on this interface.
    575      * It does not support random access or an arbitrary change of
    576      * iteration direction.
    577      *
    578      * The code point being case-mapped itself is never returned by
    579      * this iterator.
    580      */
    581     public interface ContextIterator {
    582         /**
    583          * Reset the iterator for forward or backward iteration.
    584          * @param dir >0: Begin iterating forward from the first code point
    585          * after the one that is being case-mapped.
    586          *            <0: Begin iterating backward from the first code point
    587          * before the one that is being case-mapped.
    588          */
    589         public void reset(int dir);
    590         /**
    591          * Iterate and return the next code point, moving in the direction
    592          * determined by the reset() call.
    593          * @return Next code point, or <0 when the iteration is done.
    594          */
    595         public int next();
    596     }
    597 
    598     /**
    599      * For string case mappings, a single character (a code point) is mapped
    600      * either to itself (in which case in-place mapping functions do nothing),
    601      * or to another single code point, or to a string.
    602      * Aside from the string contents, these are indicated with a single int
    603      * value as follows:
    604      *
    605      * Mapping to self: Negative values (~self instead of -self to support U+0000)
    606      *
    607      * Mapping to another code point: Positive values >MAX_STRING_LENGTH
    608      *
    609      * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is
    610      * returned. Note that the string result may indeed have zero length.
    611      */
    612     public static final int MAX_STRING_LENGTH=0x1f;
    613 
    614     //ivate static final int LOC_UNKNOWN=0;
    615     public static final int LOC_ROOT=1;
    616     private static final int LOC_TURKISH=2;
    617     private static final int LOC_LITHUANIAN=3;
    618     static final int LOC_GREEK=4;
    619     public static final int LOC_DUTCH=5;
    620 
    621     public static final int getCaseLocale(Locale locale) {
    622         return getCaseLocale(locale.getLanguage());
    623     }
    624     public static final int getCaseLocale(ULocale locale) {
    625         return getCaseLocale(locale.getLanguage());
    626     }
    627     /** Accepts both 2- and 3-letter language subtags. */
    628     private static final int getCaseLocale(String language) {
    629         // Check the subtag length to reduce the number of comparisons
    630         // for locales without special behavior.
    631         // Fastpath for English "en" which is often used for default (=root locale) case mappings,
    632         // and for Chinese "zh": Very common but no special case mapping behavior.
    633         if(language.length()==2) {
    634             if(language.equals("en") || language.charAt(0)>'t') {
    635                 return LOC_ROOT;
    636             } else if(language.equals("tr") || language.equals("az")) {
    637                 return LOC_TURKISH;
    638             } else if(language.equals("el")) {
    639                 return LOC_GREEK;
    640             } else if(language.equals("lt")) {
    641                 return LOC_LITHUANIAN;
    642             } else if(language.equals("nl")) {
    643                 return LOC_DUTCH;
    644             }
    645         } else if(language.length()==3) {
    646             if(language.equals("tur") || language.equals("aze")) {
    647                 return LOC_TURKISH;
    648             } else if(language.equals("ell")) {
    649                 return LOC_GREEK;
    650             } else if(language.equals("lit")) {
    651                 return LOC_LITHUANIAN;
    652             } else if(language.equals("nld")) {
    653                 return LOC_DUTCH;
    654             }
    655         }
    656         return LOC_ROOT;
    657     }
    658 
    659     /* Is followed by {case-ignorable}* cased  ? (dir determines looking forward/backward) */
    660     private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) {
    661         int c;
    662 
    663         if(iter==null) {
    664             return false;
    665         }
    666 
    667         for(iter.reset(dir); (c=iter.next())>=0;) {
    668             int type=getTypeOrIgnorable(c);
    669             if((type&4)!=0) {
    670                 /* case-ignorable, continue with the loop */
    671             } else if(type!=NONE) {
    672                 return true; /* followed by cased letter */
    673             } else {
    674                 return false; /* uncased and not case-ignorable */
    675             }
    676         }
    677 
    678         return false; /* not followed by cased letter */
    679     }
    680 
    681     /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
    682     private final boolean isPrecededBySoftDotted(ContextIterator iter) {
    683         int c;
    684         int dotType;
    685 
    686         if(iter==null) {
    687             return false;
    688         }
    689 
    690         for(iter.reset(-1); (c=iter.next())>=0;) {
    691             dotType=getDotType(c);
    692             if(dotType==SOFT_DOTTED) {
    693                 return true; /* preceded by TYPE_i */
    694             } else if(dotType!=OTHER_ACCENT) {
    695                 return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
    696             }
    697         }
    698 
    699         return false; /* not preceded by TYPE_i */
    700     }
    701 
    702     /*
    703      * See Jitterbug 2344:
    704      * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
    705      * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
    706      * we made those releases compatible with Unicode 3.2 which had not fixed
    707      * a related bug in SpecialCasing.txt.
    708      *
    709      * From the Jitterbug 2344 text:
    710      * ... this bug is listed as a Unicode erratum
    711      * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
    712      * <quote>
    713      * There are two errors in SpecialCasing.txt.
    714      * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
    715      * 2. An incorrect context definition. Correct as follows:
    716      * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
    717      * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
    718      * ---
    719      * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
    720      * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
    721      * where the context After_I is defined as:
    722      * The last preceding base character was an uppercase I, and there is no
    723      * intervening combining character class 230 (ABOVE).
    724      * </quote>
    725      *
    726      * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
    727      *
    728      * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
    729      * # This matches the behavior of the canonically equivalent I-dot_above
    730      *
    731      * See also the description in this place in older versions of uchar.c (revision 1.100).
    732      *
    733      * Markus W. Scherer 2003-feb-15
    734      */
    735 
    736     /* Is preceded by base character 'I' with no intervening cc=230 ? */
    737     private final boolean isPrecededBy_I(ContextIterator iter) {
    738         int c;
    739         int dotType;
    740 
    741         if(iter==null) {
    742             return false;
    743         }
    744 
    745         for(iter.reset(-1); (c=iter.next())>=0;) {
    746             if(c==0x49) {
    747                 return true; /* preceded by I */
    748             }
    749             dotType=getDotType(c);
    750             if(dotType!=OTHER_ACCENT) {
    751                 return false; /* preceded by different base character (not I), or intervening cc==230 */
    752             }
    753         }
    754 
    755         return false; /* not preceded by I */
    756     }
    757 
    758     /* Is followed by one or more cc==230 ? */
    759     private final boolean isFollowedByMoreAbove(ContextIterator iter) {
    760         int c;
    761         int dotType;
    762 
    763         if(iter==null) {
    764             return false;
    765         }
    766 
    767         for(iter.reset(1); (c=iter.next())>=0;) {
    768             dotType=getDotType(c);
    769             if(dotType==ABOVE) {
    770                 return true; /* at least one cc==230 following */
    771             } else if(dotType!=OTHER_ACCENT) {
    772                 return false; /* next base character, no more cc==230 following */
    773             }
    774         }
    775 
    776         return false; /* no more cc==230 following */
    777     }
    778 
    779     /* Is followed by a dot above (without cc==230 in between) ? */
    780     private final boolean isFollowedByDotAbove(ContextIterator iter) {
    781         int c;
    782         int dotType;
    783 
    784         if(iter==null) {
    785             return false;
    786         }
    787 
    788         for(iter.reset(1); (c=iter.next())>=0; ) {
    789             if(c==0x307) {
    790                 return true;
    791             }
    792             dotType=getDotType(c);
    793             if(dotType!=OTHER_ACCENT) {
    794                 return false; /* next base character or cc==230 in between */
    795             }
    796         }
    797 
    798         return false; /* no dot above following */
    799     }
    800 
    801     private static final String
    802         iDot=       "i\u0307",
    803         jDot=       "j\u0307",
    804         iOgonekDot= "\u012f\u0307",
    805         iDotGrave=  "i\u0307\u0300",
    806         iDotAcute=  "i\u0307\u0301",
    807         iDotTilde=  "i\u0307\u0303";
    808 
    809     /**
    810      * Get the full lowercase mapping for c.
    811      *
    812      * @param c Character to be mapped.
    813      * @param iter Character iterator, used for context-sensitive mappings.
    814      *             See ContextIterator for details.
    815      *             If iter==null then a context-independent result is returned.
    816      * @param out If the mapping result is a string, then it is appended to out.
    817      * @param caseLocale Case locale value from ucase_getCaseLocale().
    818      * @return Output code point or string length, see MAX_STRING_LENGTH.
    819      *
    820      * @see ContextIterator
    821      * @see #MAX_STRING_LENGTH
    822      * @hide draft / provisional / internal are hidden on Android
    823      */
    824     public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
    825         int result, props;
    826 
    827         result=c;
    828         props=trie.get(c);
    829         if(!propsHasException(props)) {
    830             if(getTypeFromProps(props)>=UPPER) {
    831                 result=c+getDelta(props);
    832             }
    833         } else {
    834             int excOffset=getExceptionsOffset(props), excOffset2;
    835             int excWord=exceptions.charAt(excOffset++);
    836             int full;
    837 
    838             excOffset2=excOffset;
    839 
    840             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
    841                 /* use hardcoded conditions and mappings */
    842                 /*
    843                  * Test for conditional mappings first
    844                  *   (otherwise the unconditional default mappings are always taken),
    845                  * then test for characters that have unconditional mappings in SpecialCasing.txt,
    846                  * then get the UnicodeData.txt mappings.
    847                  */
    848                 if( caseLocale==LOC_LITHUANIAN &&
    849                         /* base characters, find accents above */
    850                         (((c==0x49 || c==0x4a || c==0x12e) &&
    851                             isFollowedByMoreAbove(iter)) ||
    852                         /* precomposed with accent above, no need to find one */
    853                         (c==0xcc || c==0xcd || c==0x128))
    854                 ) {
    855                     /*
    856                         # Lithuanian
    857 
    858                         # Lithuanian retains the dot in a lowercase i when followed by accents.
    859 
    860                         # Introduce an explicit dot above when lowercasing capital I's and J's
    861                         # whenever there are more accents above.
    862                         # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
    863 
    864                         0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
    865                         004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
    866                         012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
    867                         00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
    868                         00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
    869                         0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
    870                      */
    871                     try {
    872                         switch(c) {
    873                         case 0x49:  /* LATIN CAPITAL LETTER I */
    874                             out.append(iDot);
    875                             return 2;
    876                         case 0x4a:  /* LATIN CAPITAL LETTER J */
    877                             out.append(jDot);
    878                             return 2;
    879                         case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
    880                             out.append(iOgonekDot);
    881                             return 2;
    882                         case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
    883                             out.append(iDotGrave);
    884                             return 3;
    885                         case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
    886                             out.append(iDotAcute);
    887                             return 3;
    888                         case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
    889                             out.append(iDotTilde);
    890                             return 3;
    891                         default:
    892                             return 0; /* will not occur */
    893                         }
    894                     } catch (IOException e) {
    895                         throw new ICUUncheckedIOException(e);
    896                     }
    897                 /* # Turkish and Azeri */
    898                 } else if(caseLocale==LOC_TURKISH && c==0x130) {
    899                     /*
    900                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
    901                         # The following rules handle those cases.
    902 
    903                         0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
    904                         0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
    905                      */
    906                     return 0x69;
    907                 } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
    908                     /*
    909                         # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
    910                         # This matches the behavior of the canonically equivalent I-dot_above
    911 
    912                         0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
    913                         0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
    914                      */
    915                     return 0; /* remove the dot (continue without output) */
    916                 } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
    917                     /*
    918                         # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
    919 
    920                         0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
    921                         0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
    922                      */
    923                     return 0x131;
    924                 } else if(c==0x130) {
    925                     /*
    926                         # Preserve canonical equivalence for I with dot. Turkic is handled below.
    927 
    928                         0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
    929                      */
    930                     try {
    931                         out.append(iDot);
    932                         return 2;
    933                     } catch (IOException e) {
    934                         throw new ICUUncheckedIOException(e);
    935                     }
    936                 } else if(  c==0x3a3 &&
    937                             !isFollowedByCasedLetter(iter, 1) &&
    938                             isFollowedByCasedLetter(iter, -1) /* -1=preceded */
    939                 ) {
    940                     /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
    941                     /*
    942                         # Special case for final form of sigma
    943 
    944                         03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
    945                      */
    946                     return 0x3c2; /* greek small final sigma */
    947                 } else {
    948                     /* no known conditional special case mapping, use a normal mapping */
    949                 }
    950             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
    951                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
    952                 full=(int)value&FULL_LOWER;
    953                 if(full!=0) {
    954                     /* start of full case mapping strings */
    955                     excOffset=(int)(value>>32)+1;
    956 
    957                     try {
    958                         // append the lowercase mapping
    959                         out.append(exceptions, excOffset, excOffset+full);
    960 
    961                         /* return the string length */
    962                         return full;
    963                     } catch (IOException e) {
    964                         throw new ICUUncheckedIOException(e);
    965                     }
    966                 }
    967             }
    968 
    969             if(hasSlot(excWord, EXC_LOWER)) {
    970                 result=getSlotValue(excWord, EXC_LOWER, excOffset2);
    971             }
    972         }
    973 
    974         return (result==c) ? ~result : result;
    975     }
    976 
    977     /* internal */
    978     private final int toUpperOrTitle(int c, ContextIterator iter,
    979                                      Appendable out,
    980                                      int loc,
    981                                      boolean upperNotTitle) {
    982         int result;
    983         int props;
    984 
    985         result=c;
    986         props=trie.get(c);
    987         if(!propsHasException(props)) {
    988             if(getTypeFromProps(props)==LOWER) {
    989                 result=c+getDelta(props);
    990             }
    991         } else {
    992             int excOffset=getExceptionsOffset(props), excOffset2;
    993             int excWord=exceptions.charAt(excOffset++);
    994             int full, index;
    995 
    996             excOffset2=excOffset;
    997 
    998             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
    999                 /* use hardcoded conditions and mappings */
   1000                 if(loc==LOC_TURKISH && c==0x69) {
   1001                     /*
   1002                         # Turkish and Azeri
   1003 
   1004                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
   1005                         # The following rules handle those cases.
   1006 
   1007                         # When uppercasing, i turns into a dotted capital I
   1008 
   1009                         0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
   1010                         0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
   1011                     */
   1012                     return 0x130;
   1013                 } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) {
   1014                     /*
   1015                         # Lithuanian
   1016 
   1017                         # Lithuanian retains the dot in a lowercase i when followed by accents.
   1018 
   1019                         # Remove DOT ABOVE after "i" with upper or titlecase
   1020 
   1021                         0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
   1022                      */
   1023                     return 0; /* remove the dot (continue without output) */
   1024                 } else {
   1025                     /* no known conditional special case mapping, use a normal mapping */
   1026                 }
   1027             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
   1028                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
   1029                 full=(int)value&0xffff;
   1030 
   1031                 /* start of full case mapping strings */
   1032                 excOffset=(int)(value>>32)+1;
   1033 
   1034                 /* skip the lowercase and case-folding result strings */
   1035                 excOffset+=full&FULL_LOWER;
   1036                 full>>=4;
   1037                 excOffset+=full&0xf;
   1038                 full>>=4;
   1039 
   1040                 if(upperNotTitle) {
   1041                     full&=0xf;
   1042                 } else {
   1043                     /* skip the uppercase result string */
   1044                     excOffset+=full&0xf;
   1045                     full=(full>>4)&0xf;
   1046                 }
   1047 
   1048                 if(full!=0) {
   1049                     try {
   1050                         // append the result string
   1051                         out.append(exceptions, excOffset, excOffset+full);
   1052 
   1053                         /* return the string length */
   1054                         return full;
   1055                     } catch (IOException e) {
   1056                         throw new ICUUncheckedIOException(e);
   1057                     }
   1058                 }
   1059             }
   1060 
   1061             if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) {
   1062                 index=EXC_TITLE;
   1063             } else if(hasSlot(excWord, EXC_UPPER)) {
   1064                 /* here, titlecase is same as uppercase */
   1065                 index=EXC_UPPER;
   1066             } else {
   1067                 return ~c;
   1068             }
   1069             result=getSlotValue(excWord, index, excOffset2);
   1070         }
   1071 
   1072         return (result==c) ? ~result : result;
   1073     }
   1074 
   1075     public final int toFullUpper(int c, ContextIterator iter,
   1076                                  Appendable out,
   1077                                  int caseLocale) {
   1078         return toUpperOrTitle(c, iter, out, caseLocale, true);
   1079     }
   1080 
   1081     public final int toFullTitle(int c, ContextIterator iter,
   1082                                  Appendable out,
   1083                                  int caseLocale) {
   1084         return toUpperOrTitle(c, iter, out, caseLocale, false);
   1085     }
   1086 
   1087     /* case folding ------------------------------------------------------------- */
   1088 
   1089     /*
   1090      * Case folding is similar to lowercasing.
   1091      * The result may be a simple mapping, i.e., a single code point, or
   1092      * a full mapping, i.e., a string.
   1093      * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
   1094      * then only the lowercase mapping is stored.
   1095      *
   1096      * Some special cases are hardcoded because their conditions cannot be
   1097      * parsed and processed from CaseFolding.txt.
   1098      *
   1099      * Unicode 3.2 CaseFolding.txt specifies for its status field:
   1100 
   1101     # C: common case folding, common mappings shared by both simple and full mappings.
   1102     # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
   1103     # S: simple case folding, mappings to single characters where different from F.
   1104     # T: special case for uppercase I and dotted uppercase I
   1105     #    - For non-Turkic languages, this mapping is normally not used.
   1106     #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
   1107     #
   1108     # Usage:
   1109     #  A. To do a simple case folding, use the mappings with status C + S.
   1110     #  B. To do a full case folding, use the mappings with status C + F.
   1111     #
   1112     #    The mappings with status T can be used or omitted depending on the desired case-folding
   1113     #    behavior. (The default option is to exclude them.)
   1114 
   1115      * Unicode 3.2 has 'T' mappings as follows:
   1116 
   1117     0049; T; 0131; # LATIN CAPITAL LETTER I
   1118     0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
   1119 
   1120      * while the default mappings for these code points are:
   1121 
   1122     0049; C; 0069; # LATIN CAPITAL LETTER I
   1123     0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
   1124 
   1125      * U+0130 has no simple case folding (simple-case-folds to itself).
   1126      */
   1127 
   1128     /**
   1129      * Bit mask for getting just the options from a string compare options word
   1130      * that are relevant for case folding (of a single string or code point).
   1131      * @hide draft / provisional / internal are hidden on Android
   1132      */
   1133     private static final int FOLD_CASE_OPTIONS_MASK = 0xff;
   1134 
   1135     /* return the simple case folding mapping for c */
   1136     public final int fold(int c, int options) {
   1137         int props=trie.get(c);
   1138         if(!propsHasException(props)) {
   1139             if(getTypeFromProps(props)>=UPPER) {
   1140                 c+=getDelta(props);
   1141             }
   1142         } else {
   1143             int excOffset=getExceptionsOffset(props);
   1144             int excWord=exceptions.charAt(excOffset++);
   1145             int index;
   1146             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
   1147                 /* special case folding mappings, hardcoded */
   1148                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
   1149                     /* default mappings */
   1150                     if(c==0x49) {
   1151                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
   1152                         return 0x69;
   1153                     } else if(c==0x130) {
   1154                         /* no simple case folding for U+0130 */
   1155                         return c;
   1156                     }
   1157                 } else {
   1158                     /* Turkic mappings */
   1159                     if(c==0x49) {
   1160                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
   1161                         return 0x131;
   1162                     } else if(c==0x130) {
   1163                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
   1164                         return 0x69;
   1165                     }
   1166                 }
   1167             }
   1168             if(hasSlot(excWord, EXC_FOLD)) {
   1169                 index=EXC_FOLD;
   1170             } else if(hasSlot(excWord, EXC_LOWER)) {
   1171                 index=EXC_LOWER;
   1172             } else {
   1173                 return c;
   1174             }
   1175             c=getSlotValue(excWord, index, excOffset);
   1176         }
   1177         return c;
   1178     }
   1179 
   1180     /*
   1181      * Issue for canonical caseless match (UAX #21):
   1182      * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
   1183      * canonical equivalence, unlike default-option casefolding.
   1184      * For example, I-grave and I + grave fold to strings that are not canonically
   1185      * equivalent.
   1186      * For more details, see the comment in unorm_compare() in unorm.cpp
   1187      * and the intermediate prototype changes for Jitterbug 2021.
   1188      * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
   1189      *
   1190      * This did not get fixed because it appears that it is not possible to fix
   1191      * it for uppercase and lowercase characters (I-grave vs. i-grave)
   1192      * together in a way that they still fold to common result strings.
   1193      */
   1194 
   1195     public final int toFullFolding(int c, Appendable out, int options) {
   1196         int result;
   1197         int props;
   1198 
   1199         result=c;
   1200         props=trie.get(c);
   1201         if(!propsHasException(props)) {
   1202             if(getTypeFromProps(props)>=UPPER) {
   1203                 result=c+getDelta(props);
   1204             }
   1205         } else {
   1206             int excOffset=getExceptionsOffset(props), excOffset2;
   1207             int excWord=exceptions.charAt(excOffset++);
   1208             int full, index;
   1209 
   1210             excOffset2=excOffset;
   1211 
   1212             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
   1213                 /* use hardcoded conditions and mappings */
   1214                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
   1215                     /* default mappings */
   1216                     if(c==0x49) {
   1217                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
   1218                         return 0x69;
   1219                     } else if(c==0x130) {
   1220                         /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
   1221                         try {
   1222                             out.append(iDot);
   1223                             return 2;
   1224                         } catch (IOException e) {
   1225                             throw new ICUUncheckedIOException(e);
   1226                         }
   1227                     }
   1228                 } else {
   1229                     /* Turkic mappings */
   1230                     if(c==0x49) {
   1231                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
   1232                         return 0x131;
   1233                     } else if(c==0x130) {
   1234                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
   1235                         return 0x69;
   1236                     }
   1237                 }
   1238             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
   1239                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
   1240                 full=(int)value&0xffff;
   1241 
   1242                 /* start of full case mapping strings */
   1243                 excOffset=(int)(value>>32)+1;
   1244 
   1245                 /* skip the lowercase result string */
   1246                 excOffset+=full&FULL_LOWER;
   1247                 full=(full>>4)&0xf;
   1248 
   1249                 if(full!=0) {
   1250                     try {
   1251                         // append the result string
   1252                         out.append(exceptions, excOffset, excOffset+full);
   1253 
   1254                         /* return the string length */
   1255                         return full;
   1256                     } catch (IOException e) {
   1257                         throw new ICUUncheckedIOException(e);
   1258                     }
   1259                 }
   1260             }
   1261 
   1262             if(hasSlot(excWord, EXC_FOLD)) {
   1263                 index=EXC_FOLD;
   1264             } else if(hasSlot(excWord, EXC_LOWER)) {
   1265                 index=EXC_LOWER;
   1266             } else {
   1267                 return ~c;
   1268             }
   1269             result=getSlotValue(excWord, index, excOffset2);
   1270         }
   1271 
   1272         return (result==c) ? ~result : result;
   1273     }
   1274 
   1275     /* case mapping properties API ---------------------------------------------- */
   1276 
   1277     /*
   1278      * We need a StringBuilder for multi-code point output from the
   1279      * full case mapping functions. However, we do not actually use that output,
   1280      * we just check whether the input character was mapped to anything else.
   1281      * We use a shared StringBuilder to avoid allocating a new one in each call.
   1282      * We remove its contents each time so that it does not grow large over time.
   1283      *
   1284      * @internal
   1285      */
   1286     public static final StringBuilder dummyStringBuilder = new StringBuilder();
   1287 
   1288     public final boolean hasBinaryProperty(int c, int which) {
   1289         switch(which) {
   1290         case UProperty.LOWERCASE:
   1291             return LOWER==getType(c);
   1292         case UProperty.UPPERCASE:
   1293             return UPPER==getType(c);
   1294         case UProperty.SOFT_DOTTED:
   1295             return isSoftDotted(c);
   1296         case UProperty.CASE_SENSITIVE:
   1297             return isCaseSensitive(c);
   1298         case UProperty.CASED:
   1299             return NONE!=getType(c);
   1300         case UProperty.CASE_IGNORABLE:
   1301             return (getTypeOrIgnorable(c)>>2)!=0;
   1302         /*
   1303          * Note: The following Changes_When_Xyz are defined as testing whether
   1304          * the NFD form of the input changes when Xyz-case-mapped.
   1305          * However, this simpler implementation of these properties,
   1306          * ignoring NFD, passes the tests.
   1307          * The implementation needs to be changed if the tests start failing.
   1308          * When that happens, optimizations should be used to work with the
   1309          * per-single-code point ucase_toFullXyz() functions unless
   1310          * the NFD form has more than one code point,
   1311          * and the property starts set needs to be the union of the
   1312          * start sets for normalization and case mappings.
   1313          */
   1314         case UProperty.CHANGES_WHEN_LOWERCASED:
   1315             dummyStringBuilder.setLength(0);
   1316             return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
   1317         case UProperty.CHANGES_WHEN_UPPERCASED:
   1318             dummyStringBuilder.setLength(0);
   1319             return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
   1320         case UProperty.CHANGES_WHEN_TITLECASED:
   1321             dummyStringBuilder.setLength(0);
   1322             return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
   1323         /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
   1324         case UProperty.CHANGES_WHEN_CASEMAPPED:
   1325             dummyStringBuilder.setLength(0);
   1326             return
   1327                 toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
   1328                 toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
   1329                 toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
   1330         default:
   1331             return false;
   1332         }
   1333     }
   1334 
   1335     // data members -------------------------------------------------------- ***
   1336     private int indexes[];
   1337     private String exceptions;
   1338     private char unfold[];
   1339 
   1340     private Trie2_16 trie;
   1341 
   1342     // data format constants ----------------------------------------------- ***
   1343     private static final String DATA_NAME="ucase";
   1344     private static final String DATA_TYPE="icu";
   1345     private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE;
   1346 
   1347     /* format "cAsE" */
   1348     private static final int FMT=0x63415345;
   1349 
   1350     /* indexes into indexes[] */
   1351     //private static final int IX_INDEX_TOP=0;
   1352     //private static final int IX_LENGTH=1;
   1353     private static final int IX_TRIE_SIZE=2;
   1354     private static final int IX_EXC_LENGTH=3;
   1355     private static final int IX_UNFOLD_LENGTH=4;
   1356 
   1357     //private static final int IX_MAX_FULL_LENGTH=15;
   1358     private static final int IX_TOP=16;
   1359 
   1360     // definitions for 16-bit case properties word ------------------------- ***
   1361 
   1362     /* 2-bit constants for types of cased characters */
   1363     public static final int TYPE_MASK=3;
   1364     public static final int NONE=0;
   1365     public static final int LOWER=1;
   1366     public static final int UPPER=2;
   1367     public static final int TITLE=3;
   1368 
   1369     /** @return NONE, LOWER, UPPER, TITLE */
   1370     private static final int getTypeFromProps(int props) {
   1371         return props&TYPE_MASK;
   1372     }
   1373 
   1374     /** @return like getTypeFromProps() but also sets IGNORABLE if props indicate case-ignorable */
   1375     private static final int getTypeAndIgnorableFromProps(int props) {
   1376         return props&7;
   1377     }
   1378 
   1379     static final int IGNORABLE=4;
   1380     private static final int SENSITIVE=     8;
   1381     private static final int EXCEPTION=     0x10;
   1382 
   1383     private static final int DOT_MASK=      0x60;
   1384     //private static final int NO_DOT=        0;      /* normal characters with cc=0 */
   1385     private static final int SOFT_DOTTED=   0x20;   /* soft-dotted characters with cc=0 */
   1386     private static final int ABOVE=         0x40;   /* "above" accents with cc=230 */
   1387     private static final int OTHER_ACCENT=  0x60;   /* other accent character (0<cc!=230) */
   1388 
   1389     /* no exception: bits 15..7 are a 9-bit signed case mapping delta */
   1390     private static final int DELTA_SHIFT=   7;
   1391     //private static final int DELTA_MASK=    0xff80;
   1392     //private static final int MAX_DELTA=     0xff;
   1393     //private static final int MIN_DELTA=     (-MAX_DELTA-1);
   1394 
   1395     private static final int getDelta(int props) {
   1396         return (short)props>>DELTA_SHIFT;
   1397     }
   1398 
   1399     /* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */
   1400     private static final int EXC_SHIFT=     5;
   1401     //private static final int EXC_MASK=      0xffe0;
   1402     //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1);
   1403 
   1404     /* definitions for 16-bit main exceptions word ------------------------------ */
   1405 
   1406     /* first 8 bits indicate values in optional slots */
   1407     private static final int EXC_LOWER=0;
   1408     private static final int EXC_FOLD=1;
   1409     private static final int EXC_UPPER=2;
   1410     private static final int EXC_TITLE=3;
   1411     //private static final int EXC_4=4;           /* reserved */
   1412     //private static final int EXC_5=5;           /* reserved */
   1413     private static final int EXC_CLOSURE=6;
   1414     private static final int EXC_FULL_MAPPINGS=7;
   1415     //private static final int EXC_ALL_SLOTS=8;   /* one past the last slot */
   1416 
   1417     /* each slot is 2 uint16_t instead of 1 */
   1418     private static final int EXC_DOUBLE_SLOTS=          0x100;
   1419 
   1420     /* reserved: exception bits 11..9 */
   1421 
   1422     /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */
   1423     private static final int EXC_DOT_SHIFT=7;
   1424 
   1425     /* normally stored in the main word, but pushed out for larger exception indexes */
   1426     //private static final int EXC_DOT_MASK=              0x3000;
   1427     //private static final int EXC_NO_DOT=                0;
   1428     //private static final int EXC_SOFT_DOTTED=           0x1000;
   1429     //private static final int EXC_ABOVE=                 0x2000; /* "above" accents with cc=230 */
   1430     //private static final int EXC_OTHER_ACCENT=          0x3000; /* other character (0<cc!=230) */
   1431 
   1432     /* complex/conditional mappings */
   1433     private static final int EXC_CONDITIONAL_SPECIAL=   0x4000;
   1434     private static final int EXC_CONDITIONAL_FOLD=      0x8000;
   1435 
   1436     /* definitions for lengths word for full case mappings */
   1437     private static final int FULL_LOWER=    0xf;
   1438     //private static final int FULL_FOLDING=  0xf0;
   1439     //private static final int FULL_UPPER=    0xf00;
   1440     //private static final int FULL_TITLE=    0xf000;
   1441 
   1442     /* maximum lengths */
   1443     //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf;
   1444     private static final int CLOSURE_MAX_LENGTH=0xf;
   1445 
   1446     /* constants for reverse case folding ("unfold") data */
   1447     private static final int UNFOLD_ROWS=0;
   1448     private static final int UNFOLD_ROW_WIDTH=1;
   1449     private static final int UNFOLD_STRING_WIDTH=2;
   1450 
   1451     /*
   1452      * public singleton instance
   1453      */
   1454     public static final UCaseProps INSTANCE;
   1455 
   1456     // This static initializer block must be placed after
   1457     // other static member initialization
   1458     static {
   1459         try {
   1460             INSTANCE = new UCaseProps();
   1461         } catch (IOException e) {
   1462             throw new ICUUncheckedIOException(e);
   1463         }
   1464     }
   1465 }
   1466