Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 package android.icu.text;
     11 import android.icu.impl.Norm2AllModes;
     12 import android.icu.impl.Normalizer2Impl;
     13 
     14 /**
     15  * This class has been deprecated since ICU 2.2.
     16  * One problem is that this class is not designed to return supplementary characters.
     17  * Use the Normalizer2 and UCharacter classes instead.
     18  * <p>
     19  * <tt>ComposedCharIter</tt> is an iterator class that returns all
     20  * of the precomposed characters defined in the Unicode standard, along
     21  * with their decomposed forms.  This is often useful when building
     22  * data tables (<i>e.g.</i> collation tables) which need to treat composed
     23  * and decomposed characters equivalently.
     24  * <p>
     25  * For example, imagine that you have built a collation table with ordering
     26  * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all
     27  * characters used in a particular language.  When you process input text using
     28  * this table, the text must first be decomposed so that it matches the form
     29  * used in the table.  This can impose a performance penalty that may be
     30  * unacceptable in some situations.
     31  * <p>
     32  * You can avoid this problem by ensuring that the collation table contains
     33  * rules for both the decomposed <i>and</i> composed versions of each character.
     34  * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
     35  * composed characters in Unicode.  If the decomposition for that character
     36  * consists solely of characters that are listed in your ruleset, you can
     37  * add a new rule for the composed character that makes it equivalent to
     38  * its decomposition sequence.
     39  * <p>
     40  * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
     41  * of the composed characters in Unicode.  If you want to iterate over the
     42  * composed characters in a particular string, use {@link Normalizer} instead.
     43  * <p>
     44  * When constructing a <tt>ComposedCharIter</tt> there is one
     45  * optional feature that you can enable or disable:
     46  * <ul>
     47  *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
     48  *          characters and their corresponding Jamo decompositions.
     49  *          This option is off by default (<i>i.e.</i> Hangul processing is enabled)
     50  *          since the Unicode standard specifies that Hangul to Jamo
     51  *          is a canonical decomposition.
     52  * </ul>
     53  * <p>
     54  * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
     55  * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
     56  * It will be updated as later versions of Unicode are released.
     57  * @deprecated ICU 2.2
     58  * @hide Only a subset of ICU is exposed in Android
     59  */
     60 @Deprecated
     61 ///CLOVER:OFF
     62 public final class ComposedCharIter {
     63     /**
     64      * Constant that indicates the iteration has completed.
     65      * {@link #next} returns this value when there are no more composed characters
     66      * over which to iterate.
     67      * @deprecated ICU 2.2
     68      */
     69     @Deprecated
     70     public static final  char DONE = (char) Normalizer.DONE;
     71 
     72     /**
     73      * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
     74      * all Unicode characters with canonical decompositions, including Korean
     75      * Hangul characters.
     76      * @deprecated ICU 2.2
     77      */
     78     @Deprecated
     79     public ComposedCharIter() {
     80         this(false, 0);
     81     }
     82 
     83     /**
     84      * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
     85      * <p>
     86      * @param compat    <tt>false</tt> for canonical decompositions only;
     87      *                  <tt>true</tt> for both canonical and compatibility
     88      *                  decompositions.
     89      *
     90      * @param options   Optional decomposition features. None are supported, so this is ignored.
     91      * @deprecated ICU 2.2
     92      */
     93     @Deprecated
     94     public ComposedCharIter(boolean compat, int options) {
     95         if(compat) {
     96             n2impl = Norm2AllModes.getNFKCInstance().impl;
     97         } else {
     98             n2impl = Norm2AllModes.getNFCInstance().impl;
     99         }
    100     }
    101 
    102     /**
    103      * Determines whether there any precomposed Unicode characters not yet returned
    104      * by {@link #next}.
    105      * @deprecated ICU 2.2
    106      */
    107     @Deprecated
    108     public boolean hasNext() {
    109         if (nextChar == Normalizer.DONE)  {
    110             findNextChar();
    111         }
    112         return nextChar != Normalizer.DONE;
    113     }
    114 
    115     /**
    116      * Returns the next precomposed Unicode character.
    117      * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
    118      * by Unicode, in ascending order.  After all precomposed characters have
    119      * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
    120      * to <tt>next</tt> will return {@link #DONE}.
    121      * @deprecated ICU 2.2
    122      */
    123     @Deprecated
    124     public char next() {
    125         if (nextChar == Normalizer.DONE)  {
    126             findNextChar();
    127         }
    128         curChar = nextChar;
    129         nextChar = Normalizer.DONE;
    130         return (char) curChar;
    131     }
    132 
    133     /**
    134      * Returns the Unicode decomposition of the current character.
    135      * This method returns the decomposition of the precomposed character most
    136      * recently returned by {@link #next}.  The resulting decomposition is
    137      * affected by the settings of the options passed to the constructor.
    138      * @deprecated ICU 2.2
    139      */
    140     @Deprecated
    141     public String decomposition() {
    142         // the decomposition buffer contains the decomposition of
    143         // current char so just return it
    144         if(decompBuf != null) {
    145             return decompBuf;
    146         } else {
    147             return "";
    148         }
    149     }
    150 
    151     private void findNextChar() {
    152         int c=curChar+1;
    153         decompBuf = null;
    154         for(;;) {
    155             if(c < 0xFFFF) {
    156                 decompBuf = n2impl.getDecomposition(c);
    157                 if(decompBuf != null) {
    158                     // the curChar can be decomposed... so it is a composed char
    159                     // cache the result
    160                     break;
    161                 }
    162                 c++;
    163             } else {
    164                 c=Normalizer.DONE;
    165                 break;
    166             }
    167         }
    168         nextChar=c;
    169     }
    170 
    171     private final Normalizer2Impl n2impl;
    172     private String decompBuf;
    173     private int curChar = 0;
    174     private int nextChar = Normalizer.DONE;
    175 }
    176