Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 **********************************************************************
      6 * Copyright (c) 2003-2011, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 **********************************************************************
      9 * Author: Alan Liu
     10 * Created: September 23 2003
     11 * Since: ICU 2.8
     12 **********************************************************************
     13 */
     14 package android.icu.impl;
     15 
     16 import java.text.ParsePosition;
     17 
     18 import android.icu.text.SymbolTable;
     19 import android.icu.text.UTF16;
     20 
     21 /**
     22  * An iterator that returns 32-bit code points.  This class is deliberately
     23  * <em>not</em> related to any of the JDK or ICU4J character iterator classes
     24  * in order to minimize complexity.
     25  * @author Alan Liu
     26  * @hide Only a subset of ICU is exposed in Android
     27  */
     28 public class RuleCharacterIterator {
     29 
     30     // TODO: Ideas for later.  (Do not implement if not needed, lest the
     31     // code coverage numbers go down due to unused methods.)
     32     // 1. Add a copy constructor, equals() method, clone() method.
     33     // 2. Rather than return DONE, throw an exception if the end
     34     // is reached -- this is an alternate usage model, probably not useful.
     35     // 3. Return isEscaped from next().  If this happens,
     36     // don't keep an isEscaped member variable.
     37 
     38     /**
     39      * Text being iterated.
     40      */
     41     private String text;
     42 
     43     /**
     44      * Position of iterator.
     45      */
     46     private ParsePosition pos;
     47 
     48     /**
     49      * Symbol table used to parse and dereference variables.  May be null.
     50      */
     51     private SymbolTable sym;
     52 
     53     /**
     54      * Current variable expansion, or null if none.
     55      */
     56     private char[] buf;
     57 
     58     /**
     59      * Position within buf[].  Meaningless if buf == null.
     60      */
     61     private int bufPos;
     62 
     63     /**
     64      * Flag indicating whether the last character was parsed from an escape.
     65      */
     66     private boolean isEscaped;
     67 
     68     /**
     69      * Value returned when there are no more characters to iterate.
     70      */
     71     public static final int DONE = -1;
     72 
     73     /**
     74      * Bitmask option to enable parsing of variable names.  If (options &
     75      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
     76      * its value.  Variables are parsed using the SymbolTable API.
     77      */
     78     public static final int PARSE_VARIABLES = 1;
     79 
     80     /**
     81      * Bitmask option to enable parsing of escape sequences.  If (options &
     82      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
     83      * to its value.  Escapes are parsed using Utility.unescapeAt().
     84      */
     85     public static final int PARSE_ESCAPES   = 2;
     86 
     87     /**
     88      * Bitmask option to enable skipping of whitespace.  If (options &
     89      * SKIP_WHITESPACE) != 0, then Unicode Pattern_White_Space characters will be silently
     90      * skipped, as if they were not present in the input.
     91      */
     92     public static final int SKIP_WHITESPACE = 4;
     93 
     94     /**
     95      * Constructs an iterator over the given text, starting at the given
     96      * position.
     97      * @param text the text to be iterated
     98      * @param sym the symbol table, or null if there is none.  If sym is null,
     99      * then variables will not be deferenced, even if the PARSE_VARIABLES
    100      * option is set.
    101      * @param pos upon input, the index of the next character to return.  If a
    102      * variable has been dereferenced, then pos will <em>not</em> increment as
    103      * characters of the variable value are iterated.
    104      */
    105     public RuleCharacterIterator(String text, SymbolTable sym,
    106                                  ParsePosition pos) {
    107         if (text == null || pos.getIndex() > text.length()) {
    108             throw new IllegalArgumentException();
    109         }
    110         this.text = text;
    111         this.sym = sym;
    112         this.pos = pos;
    113         buf = null;
    114     }
    115 
    116     /**
    117      * Returns true if this iterator has no more characters to return.
    118      */
    119     public boolean atEnd() {
    120         return buf == null && pos.getIndex() == text.length();
    121     }
    122 
    123     /**
    124      * Returns the next character using the given options, or DONE if there
    125      * are no more characters, and advance the position to the next
    126      * character.
    127      * @param options one or more of the following options, bitwise-OR-ed
    128      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
    129      * @return the current 32-bit code point, or DONE
    130      */
    131     public int next(int options) {
    132         int c = DONE;
    133         isEscaped = false;
    134 
    135         for (;;) {
    136             c = _current();
    137             _advance(UTF16.getCharCount(c));
    138 
    139             if (c == SymbolTable.SYMBOL_REF && buf == null &&
    140                 (options & PARSE_VARIABLES) != 0 && sym != null) {
    141                 String name = sym.parseReference(text, pos, text.length());
    142                 // If name == null there was an isolated SYMBOL_REF;
    143                 // return it.  Caller must be prepared for this.
    144                 if (name == null) {
    145                     break;
    146                 }
    147                 bufPos = 0;
    148                 buf = sym.lookup(name);
    149                 if (buf == null) {
    150                     throw new IllegalArgumentException(
    151                                 "Undefined variable: " + name);
    152                 }
    153                 // Handle empty variable value
    154                 if (buf.length == 0) {
    155                     buf = null;
    156                 }
    157                 continue;
    158             }
    159 
    160             if ((options & SKIP_WHITESPACE) != 0 &&
    161                 PatternProps.isWhiteSpace(c)) {
    162                 continue;
    163             }
    164 
    165             if (c == '\\' && (options & PARSE_ESCAPES) != 0) {
    166                 int offset[] = new int[] { 0 };
    167                 c = Utility.unescapeAt(lookahead(), offset);
    168                 jumpahead(offset[0]);
    169                 isEscaped = true;
    170                 if (c < 0) {
    171                     throw new IllegalArgumentException("Invalid escape");
    172                 }
    173             }
    174 
    175             break;
    176         }
    177 
    178         return c;
    179     }
    180 
    181     /**
    182      * Returns true if the last character returned by next() was
    183      * escaped.  This will only be the case if the option passed in to
    184      * next() included PARSE_ESCAPED and the next character was an
    185      * escape sequence.
    186      */
    187     public boolean isEscaped() {
    188         return isEscaped;
    189     }
    190 
    191     /**
    192      * Returns true if this iterator is currently within a variable expansion.
    193      */
    194     public boolean inVariable() {
    195         return buf != null;
    196     }
    197 
    198     /**
    199      * Returns an object which, when later passed to setPos(), will
    200      * restore this iterator's position.  Usage idiom:
    201      *
    202      * RuleCharacterIterator iterator = ...;
    203      * Object pos = iterator.getPos(null); // allocate position object
    204      * for (;;) {
    205      *   pos = iterator.getPos(pos); // reuse position object
    206      *   int c = iterator.next(...);
    207      *   ...
    208      * }
    209      * iterator.setPos(pos);
    210      *
    211      * @param p a position object previously returned by getPos(),
    212      * or null.  If not null, it will be updated and returned.  If
    213      * null, a new position object will be allocated and returned.
    214      * @return a position object which may be passed to setPos(),
    215      * either `p,' or if `p' == null, a newly-allocated object
    216      */
    217     public Object getPos(Object p) {
    218         if (p == null) {
    219             return new Object[] {buf, new int[] {pos.getIndex(), bufPos}};
    220         }
    221         Object[] a = (Object[]) p;
    222         a[0] = buf;
    223         int[] v = (int[]) a[1];
    224         v[0] = pos.getIndex();
    225         v[1] = bufPos;
    226         return p;
    227     }
    228 
    229     /**
    230      * Restores this iterator to the position it had when getPos()
    231      * returned the given object.
    232      * @param p a position object previously returned by getPos()
    233      */
    234     public void setPos(Object p) {
    235         Object[] a = (Object[]) p;
    236         buf = (char[]) a[0];
    237         int[] v = (int[]) a[1];
    238         pos.setIndex(v[0]);
    239         bufPos = v[1];
    240     }
    241 
    242     /**
    243      * Skips ahead past any ignored characters, as indicated by the given
    244      * options.  This is useful in conjunction with the lookahead() method.
    245      *
    246      * Currently, this only has an effect for SKIP_WHITESPACE.
    247      * @param options one or more of the following options, bitwise-OR-ed
    248      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
    249      */
    250     public void skipIgnored(int options) {
    251         if ((options & SKIP_WHITESPACE) != 0) {
    252             for (;;) {
    253                 int a = _current();
    254                 if (!PatternProps.isWhiteSpace(a)) break;
    255                 _advance(UTF16.getCharCount(a));
    256             }
    257         }
    258     }
    259 
    260     /**
    261      * Returns a string containing the remainder of the characters to be
    262      * returned by this iterator, without any option processing.  If the
    263      * iterator is currently within a variable expansion, this will only
    264      * extend to the end of the variable expansion.  This method is provided
    265      * so that iterators may interoperate with string-based APIs.  The typical
    266      * sequence of calls is to call skipIgnored(), then call lookahead(), then
    267      * parse the string returned by lookahead(), then call jumpahead() to
    268      * resynchronize the iterator.
    269      * @return a string containing the characters to be returned by future
    270      * calls to next()
    271      */
    272     public String lookahead() {
    273         if (buf != null) {
    274             return new String(buf, bufPos, buf.length - bufPos);
    275         } else {
    276             return text.substring(pos.getIndex());
    277         }
    278     }
    279 
    280     /**
    281      * Advances the position by the given number of 16-bit code units.
    282      * This is useful in conjunction with the lookahead() method.
    283      * @param count the number of 16-bit code units to jump over
    284      */
    285     public void jumpahead(int count) {
    286         if (count < 0) {
    287             throw new IllegalArgumentException();
    288         }
    289         if (buf != null) {
    290             bufPos += count;
    291             if (bufPos > buf.length) {
    292                 throw new IllegalArgumentException();
    293             }
    294             if (bufPos == buf.length) {
    295                 buf = null;
    296             }
    297         } else {
    298             int i = pos.getIndex() + count;
    299             pos.setIndex(i);
    300             if (i > text.length()) {
    301                 throw new IllegalArgumentException();
    302             }
    303         }
    304     }
    305 
    306     /**
    307      * Returns a string representation of this object, consisting of the
    308      * characters being iterated, with a '|' marking the current position.
    309      * Position within an expanded variable is <em>not</em> indicated.
    310      * @return a string representation of this object
    311      */
    312     @Override
    313     public String toString() {
    314         int b = pos.getIndex();
    315         return text.substring(0, b) + '|' + text.substring(b);
    316     }
    317 
    318     /**
    319      * Returns the current 32-bit code point without parsing escapes, parsing
    320      * variables, or skipping whitespace.
    321      * @return the current 32-bit code point
    322      */
    323     private int _current() {
    324         if (buf != null) {
    325             return UTF16.charAt(buf, 0, buf.length, bufPos);
    326         } else {
    327             int i = pos.getIndex();
    328             return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
    329         }
    330     }
    331 
    332     /**
    333      * Advances the position by the given amount.
    334      * @param count the number of 16-bit code units to advance past
    335      */
    336     private void _advance(int count) {
    337         if (buf != null) {
    338             bufPos += count;
    339             if (bufPos == buf.length) {
    340                 buf = null;
    341             }
    342         } else {
    343             pos.setIndex(pos.getIndex() + count);
    344             if (pos.getIndex() > text.length()) {
    345                 pos.setIndex(text.length());
    346             }
    347         }
    348     }
    349 }