Home | History | Annotate | Download | only in lang
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  *
      7  *   Copyright (C) 1999-2014, International Business Machines
      8  *   Corporation and others.  All Rights Reserved.
      9  *
     10  *******************************************************************************
     11  */
     12 
     13 package android.icu.lang;
     14 
     15 import android.icu.text.UTF16;
     16 
     17 /**
     18  * <code>UScriptRun</code> is used to find runs of characters in
     19  * the same script, as defined in the <code>UScript</code> class.
     20  * It implements a simple iterator over an array of characters.
     21  * The iterator will assign <code>COMMON</code> and <code>INHERITED</code>
     22  * characters to the same script as the preceeding characters. If the
     23  * COMMON and INHERITED characters are first, they will be assigned to
     24  * the same script as the following characters.
     25  *
     26  * The iterator will try to match paired punctuation. If it sees an
     27  * opening punctuation character, it will remember the script that
     28  * was assigned to that character, and assign the same script to the
     29  * matching closing punctuation.
     30  *
     31  * No attempt is made to combine related scripts into a single run. In
     32  * particular, Hiragana, Katakana, and Han characters will appear in separate
     33  * runs.
     34 
     35  * Here is an example of how to iterate over script runs:
     36  * <pre>
     37  * void printScriptRuns(char[] text)
     38  * {
     39  *     UScriptRun scriptRun = new UScriptRun(text);
     40  *
     41  *     while (scriptRun.next()) {
     42  *         int start  = scriptRun.getScriptStart();
     43  *         int limit  = scriptRun.getScriptLimit();
     44  *         int script = scriptRun.getScriptCode();
     45  *
     46  *         System.out.println("Script \"" + UScript.getName(script) + "\" from " +
     47  *                            start + " to " + limit + ".");
     48  *     }
     49  *  }
     50  * </pre>
     51  *
     52  * @deprecated This API is ICU internal only.
     53  * @hide Only a subset of ICU is exposed in Android
     54  * @hide draft / provisional / internal are hidden on Android
     55  */
     56 @Deprecated
     57 public final class UScriptRun
     58 {
     59     /**
     60      * Construct an empty <code>UScriptRun</code> object. The <code>next()</code>
     61      * method will return <code>false</code> the first time it is called.
     62      *
     63      * @deprecated This API is ICU internal only.
     64      * @hide draft / provisional / internal are hidden on Android
     65      */
     66     @Deprecated
     67     public UScriptRun()
     68     {
     69         char[] nullChars = null;
     70 
     71         reset(nullChars, 0, 0);
     72     }
     73 
     74     /**
     75      * Construct a <code>UScriptRun</code> object which iterates over the
     76      * characters in the given string.
     77      *
     78      * @param text the string of characters over which to iterate.
     79      *
     80      * @deprecated This API is ICU internal only.
     81      * @hide draft / provisional / internal are hidden on Android
     82      */
     83     @Deprecated
     84     public UScriptRun(String text)
     85     {
     86         reset (text);
     87     }
     88 
     89     /**
     90      * Construct a <code>UScriptRun</code> object which iterates over a subrange
     91      * of the characetrs in the given string.
     92      *
     93      * @param text the string of characters over which to iterate.
     94      * @param start the index of the first character over which to iterate
     95      * @param count the number of characters over which to iterate
     96      *
     97      * @deprecated This API is ICU internal only.
     98      * @hide draft / provisional / internal are hidden on Android
     99      */
    100     @Deprecated
    101     public UScriptRun(String text, int start, int count)
    102     {
    103         reset(text, start, count);
    104     }
    105 
    106     /**
    107      * Construct a <code>UScriptRun</code> object which iterates over the given
    108      * characetrs.
    109      *
    110      * @param chars the array of characters over which to iterate.
    111      *
    112      * @deprecated This API is ICU internal only.
    113      * @hide draft / provisional / internal are hidden on Android
    114      */
    115     @Deprecated
    116     public UScriptRun(char[] chars)
    117     {
    118         reset(chars);
    119     }
    120 
    121     /**
    122      * Construct a <code>UScriptRun</code> object which iterates over a subrange
    123      * of the given characetrs.
    124      *
    125      * @param chars the array of characters over which to iterate.
    126      * @param start the index of the first character over which to iterate
    127      * @param count the number of characters over which to iterate
    128      *
    129      * @deprecated This API is ICU internal only.
    130      * @hide draft / provisional / internal are hidden on Android
    131      */
    132     @Deprecated
    133     public UScriptRun(char[] chars, int start, int count)
    134     {
    135         reset(chars, start, count);
    136     }
    137 
    138 
    139     /**
    140      * Reset the iterator to the start of the text.
    141      *
    142      * @deprecated This API is ICU internal only.
    143      * @hide draft / provisional / internal are hidden on Android
    144      */
    145     @Deprecated
    146     public final void reset()
    147     {
    148         // empty any old parenStack contents.
    149         // NOTE: this is not the most efficient way
    150         // to do this, but it's the easiest to write...
    151         while (stackIsNotEmpty()) {
    152             pop();
    153         }
    154 
    155         scriptStart = textStart;
    156         scriptLimit = textStart;
    157         scriptCode  = UScript.INVALID_CODE;
    158         parenSP     = -1;
    159         pushCount   =  0;
    160         fixupCount  =  0;
    161 
    162         textIndex = textStart;
    163     }
    164 
    165     /**
    166      * Reset the iterator to iterate over the given range of the text. Throws
    167      * IllegalArgumentException if the range is outside of the bounds of the
    168      * character array.
    169      *
    170      * @param start the index of the new first character over which to iterate
    171      * @param count the new number of characters over which to iterate.
    172      * @exception IllegalArgumentException If invalid arguments are passed.
    173      *
    174      * @deprecated This API is ICU internal only.
    175      * @hide draft / provisional / internal are hidden on Android
    176      */
    177     @Deprecated
    178     public final void reset(int start, int count)
    179     throws IllegalArgumentException
    180     {
    181         int len = 0;
    182 
    183         if (text != null) {
    184             len = text.length;
    185         }
    186 
    187         if (start < 0 || count < 0 || start > len - count) {
    188             throw new IllegalArgumentException();
    189         }
    190 
    191         textStart = start;
    192         textLimit = start + count;
    193 
    194         reset();
    195     }
    196 
    197     /**
    198      * Reset the iterator to iterate over <code>count</code> characters
    199      * in <code>chars</code> starting at <code>start</code>. This allows
    200      * clients to reuse an iterator.
    201      *
    202      * @param chars the new array of characters over which to iterate.
    203      * @param start the index of the first character over which to iterate.
    204      * @param count the number of characters over which to iterate.
    205      *
    206      * @deprecated This API is ICU internal only.
    207      * @hide draft / provisional / internal are hidden on Android
    208      */
    209     @Deprecated
    210     public final void reset(char[] chars, int start, int count)
    211     {
    212         if (chars == null) {
    213             chars = emptyCharArray;
    214         }
    215 
    216         text = chars;
    217 
    218         reset(start, count);
    219     }
    220 
    221     /**
    222      * Reset the iterator to iterate over the characters
    223      * in <code>chars</code>. This allows clients to reuse an iterator.
    224      *
    225      * @param chars the new array of characters over which to iterate.
    226      *
    227      * @deprecated This API is ICU internal only.
    228      * @hide draft / provisional / internal are hidden on Android
    229      */
    230     @Deprecated
    231     public final void reset(char[] chars)
    232     {
    233         int length = 0;
    234 
    235         if (chars != null) {
    236             length = chars.length;
    237         }
    238 
    239         reset(chars, 0, length);
    240     }
    241 
    242     /**
    243      * Reset the iterator to iterate over <code>count</code> characters
    244      * in <code>text</code> starting at <code>start</code>. This allows
    245      * clients to reuse an iterator.
    246      *
    247      * @param str the new string of characters over which to iterate.
    248      * @param start the index of the first character over which to iterate.
    249      * @param count the nuber of characters over which to iterate.
    250      *
    251      * @deprecated This API is ICU internal only.
    252      * @hide draft / provisional / internal are hidden on Android
    253      */
    254     @Deprecated
    255     public final void reset(String str, int start, int count)
    256     {
    257         char[] chars = null;
    258 
    259         if (str != null) {
    260             chars = str.toCharArray();
    261         }
    262 
    263         reset(chars, start, count);
    264     }
    265 
    266     /**
    267      * Reset the iterator to iterate over the characters
    268      * in <code>text</code>. This allows clients to reuse an iterator.
    269      *
    270      * @param str the new string of characters over which to iterate.
    271      *
    272      * @deprecated This API is ICU internal only.
    273      * @hide draft / provisional / internal are hidden on Android
    274      */
    275     @Deprecated
    276     public final void reset(String str)
    277     {
    278         int length   = 0;
    279 
    280         if (str != null) {
    281             length = str.length();
    282         }
    283 
    284         reset(str, 0, length);
    285     }
    286 
    287 
    288 
    289     /**
    290      * Get the starting index of the current script run.
    291      *
    292      * @return the index of the first character in the current script run.
    293      *
    294      * @deprecated This API is ICU internal only.
    295      * @hide draft / provisional / internal are hidden on Android
    296      */
    297     @Deprecated
    298     public final int getScriptStart()
    299     {
    300         return scriptStart;
    301     }
    302 
    303     /**
    304      * Get the index of the first character after the current script run.
    305      *
    306      * @return the index of the first character after the current script run.
    307      *
    308      * @deprecated This API is ICU internal only.
    309      * @hide draft / provisional / internal are hidden on Android
    310      */
    311     @Deprecated
    312     public final int getScriptLimit()
    313     {
    314         return scriptLimit;
    315     }
    316 
    317     /**
    318      * Get the script code for the script of the current script run.
    319      *
    320      * @return the script code for the script of the current script run.
    321      * @see android.icu.lang.UScript
    322      *
    323      * @deprecated This API is ICU internal only.
    324      * @hide draft / provisional / internal are hidden on Android
    325      */
    326     @Deprecated
    327     public final int getScriptCode()
    328     {
    329         return scriptCode;
    330     }
    331 
    332     /**
    333      * Find the next script run. Returns <code>false</code> if there
    334      * isn't another run, returns <code>true</code> if there is.
    335      *
    336      * @return <code>false</code> if there isn't another run, <code>true</code> if there is.
    337      *
    338      * @deprecated This API is ICU internal only.
    339      * @hide draft / provisional / internal are hidden on Android
    340      */
    341     @Deprecated
    342     public final boolean next()
    343     {
    344         // if we've fallen off the end of the text, we're done
    345         if (scriptLimit >= textLimit) {
    346             return false;
    347         }
    348 
    349         scriptCode  = UScript.COMMON;
    350         scriptStart = scriptLimit;
    351 
    352         syncFixup();
    353 
    354         while (textIndex < textLimit) {
    355             int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart);
    356             int codePointCount = UTF16.getCharCount(ch);
    357             int sc = UScript.getScript(ch);
    358             int pairIndex = getPairIndex(ch);
    359 
    360             textIndex += codePointCount;
    361 
    362             // Paired character handling:
    363             //
    364             // if it's an open character, push it onto the stack.
    365             // if it's a close character, find the matching open on the
    366             // stack, and use that script code. Any non-matching open
    367             // characters above it on the stack will be poped.
    368             if (pairIndex >= 0) {
    369                 if ((pairIndex & 1) == 0) {
    370                     push(pairIndex, scriptCode);
    371                 } else {
    372                     int pi = pairIndex & ~1;
    373 
    374                     while (stackIsNotEmpty() && top().pairIndex != pi) {
    375                         pop();
    376                     }
    377 
    378                     if (stackIsNotEmpty()) {
    379                         sc = top().scriptCode;
    380                     }
    381                 }
    382             }
    383 
    384             if (sameScript(scriptCode, sc)) {
    385                 if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
    386                     scriptCode = sc;
    387 
    388                     fixup(scriptCode);
    389                 }
    390 
    391                 // if this character is a close paired character,
    392                 // pop the matching open character from the stack
    393                 if (pairIndex >= 0 && (pairIndex & 1) != 0) {
    394                     pop();
    395                 }
    396             } else {
    397                 // We've just seen the first character of
    398                 // the next run. Back over it so we'll see
    399                 // it again the next time.
    400                 textIndex -= codePointCount;
    401                 break;
    402             }
    403         }
    404 
    405         scriptLimit = textIndex;
    406         return true;
    407     }
    408 
    409     /**
    410      * Compare two script codes to see if they are in the same script. If one script is
    411      * a strong script, and the other is INHERITED or COMMON, it will compare equal.
    412      *
    413      * @param scriptOne one of the script codes.
    414      * @param scriptTwo the other script code.
    415      * @return <code>true</code> if the two scripts are the same.
    416      * @see android.icu.lang.UScript
    417      */
    418     private static boolean sameScript(int scriptOne, int scriptTwo)
    419     {
    420         return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED || scriptOne == scriptTwo;
    421     }
    422 
    423     /*
    424      * An internal class which holds entries on the paren stack.
    425      */
    426     private static final class ParenStackEntry
    427     {
    428         int pairIndex;
    429         int scriptCode;
    430 
    431         public ParenStackEntry(int thePairIndex, int theScriptCode)
    432         {
    433             pairIndex  = thePairIndex;
    434             scriptCode = theScriptCode;
    435         }
    436     }
    437 
    438     private static final int mod(int sp)
    439     {
    440         return sp % PAREN_STACK_DEPTH;
    441     }
    442 
    443     private static final int inc(int sp, int count)
    444     {
    445         return mod(sp + count);
    446     }
    447 
    448     private static final int inc(int sp)
    449     {
    450         return inc(sp, 1);
    451     }
    452 
    453     private static final int dec(int sp, int count)
    454     {
    455         return mod(sp + PAREN_STACK_DEPTH - count);
    456     }
    457 
    458     private static final int dec(int sp)
    459     {
    460         return dec(sp, 1);
    461     }
    462 
    463     private static final int limitInc(int count)
    464     {
    465         if (count < PAREN_STACK_DEPTH) {
    466             count += 1;
    467         }
    468 
    469         return count;
    470     }
    471 
    472     private final boolean stackIsEmpty()
    473     {
    474         return pushCount <= 0;
    475     }
    476 
    477     private final boolean stackIsNotEmpty()
    478     {
    479         return ! stackIsEmpty();
    480     }
    481 
    482     private final void push(int pairIndex, int scrptCode)
    483     {
    484         pushCount  = limitInc(pushCount);
    485         fixupCount = limitInc(fixupCount);
    486 
    487         parenSP = inc(parenSP);
    488         parenStack[parenSP] = new ParenStackEntry(pairIndex, scrptCode);
    489     }
    490 
    491     private final void pop()
    492     {
    493 
    494         if (stackIsEmpty()) {
    495             return;
    496         }
    497 
    498         parenStack[parenSP] = null;
    499 
    500         if (fixupCount > 0) {
    501             fixupCount -= 1;
    502         }
    503 
    504         pushCount -= 1;
    505         parenSP = dec(parenSP);
    506 
    507         // If the stack is now empty, reset the stack
    508         // pointers to their initial values.
    509         if (stackIsEmpty()) {
    510             parenSP = -1;
    511         }
    512     }
    513 
    514     private final ParenStackEntry top()
    515     {
    516         return parenStack[parenSP];
    517     }
    518 
    519     private final void syncFixup()
    520     {
    521         fixupCount = 0;
    522     }
    523 
    524     private final void fixup(int scrptCode)
    525     {
    526         int fixupSP = dec(parenSP, fixupCount);
    527 
    528         while (fixupCount-- > 0) {
    529             fixupSP = inc(fixupSP);
    530             parenStack[fixupSP].scriptCode = scrptCode;
    531         }
    532     }
    533 
    534     private char[] emptyCharArray = {};
    535 
    536     private char[] text;
    537 
    538     private int textIndex;
    539     private int  textStart;
    540     private int  textLimit;
    541 
    542     private int  scriptStart;
    543     private int  scriptLimit;
    544     private int  scriptCode;
    545 
    546     private static int PAREN_STACK_DEPTH = 32;
    547     private static ParenStackEntry parenStack[] = new ParenStackEntry[PAREN_STACK_DEPTH];
    548     private int parenSP = -1;
    549     private int pushCount = 0;
    550     private int fixupCount = 0;
    551 
    552     /**
    553      * Find the highest bit that's set in a word. Uses a binary search through
    554      * the bits.
    555      *
    556      * @param n the word in which to find the highest bit that's set.
    557      * @return the bit number (counting from the low order bit) of the highest bit.
    558      */
    559     private static final byte highBit(int n)
    560     {
    561         if (n <= 0) {
    562             return -32;
    563         }
    564 
    565         byte bit = 0;
    566 
    567         if (n >= 1 << 16) {
    568             n >>= 16;
    569             bit += 16;
    570         }
    571 
    572         if (n >= 1 << 8) {
    573             n >>= 8;
    574             bit += 8;
    575         }
    576 
    577         if (n >= 1 << 4) {
    578             n >>= 4;
    579             bit += 4;
    580         }
    581 
    582         if (n >= 1 << 2) {
    583             n >>= 2;
    584             bit += 2;
    585         }
    586 
    587         if (n >= 1 << 1) {
    588             n >>= 1;
    589             bit += 1;
    590         }
    591 
    592         return bit;
    593     }
    594 
    595     /**
    596      * Search the pairedChars array for the given character.
    597      *
    598      * @param ch the character for which to search.
    599      * @return the index of the character in the table, or -1 if it's not there.
    600      */
    601     private static int getPairIndex(int ch)
    602     {
    603         int probe = pairedCharPower;
    604         int index = 0;
    605 
    606         if (ch >= pairedChars[pairedCharExtra]) {
    607             index = pairedCharExtra;
    608         }
    609 
    610         while (probe > (1 << 0)) {
    611             probe >>= 1;
    612 
    613             if (ch >= pairedChars[index + probe]) {
    614                 index += probe;
    615             }
    616         }
    617 
    618         if (pairedChars[index] != ch) {
    619             index = -1;
    620         }
    621 
    622         return index;
    623     }
    624 
    625     private static int pairedChars[] = {
    626         0x0028, 0x0029, // ascii paired punctuation
    627         0x003c, 0x003e,
    628         0x005b, 0x005d,
    629         0x007b, 0x007d,
    630         0x00ab, 0x00bb, // guillemets
    631         0x2018, 0x2019, // general punctuation
    632         0x201c, 0x201d,
    633         0x2039, 0x203a,
    634         0x3008, 0x3009, // chinese paired punctuation
    635         0x300a, 0x300b,
    636         0x300c, 0x300d,
    637         0x300e, 0x300f,
    638         0x3010, 0x3011,
    639         0x3014, 0x3015,
    640         0x3016, 0x3017,
    641         0x3018, 0x3019,
    642         0x301a, 0x301b
    643     };
    644 
    645     private static int pairedCharPower = 1 << highBit(pairedChars.length);
    646     private static int pairedCharExtra = pairedChars.length - pairedCharPower;
    647 }
    648 
    649