Home | History | Annotate | Download | only in data
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 **********************************************************************
      6 * Copyright (c) 2004-2015, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 **********************************************************************
      9 * Author: Alan Liu
     10 * Created: March 16 2004
     11 * Since: ICU 3.0
     12 **********************************************************************
     13 */
     14 package android.icu.impl.data;
     15 
     16 import java.io.IOException;
     17 
     18 import android.icu.impl.PatternProps;
     19 import android.icu.impl.Utility;
     20 import android.icu.text.UTF16;
     21 
     22 /**
     23  * An iterator class that returns successive string tokens from some
     24  * source.  String tokens are, in general, separated by Pattern_White_Space
     25  * in the source test.  Furthermore, they may be delimited by
     26  * either single or double quotes (opening and closing quotes must
     27  * match).  Escapes are processed using standard ICU unescaping.
     28  *
     29  * <p>2015-sep-03 TODO: Only used in android.icu.dev.test.format, move there.
     30  * @hide Only a subset of ICU is exposed in Android
     31  */
     32 public class TokenIterator {
     33 
     34     private ResourceReader reader;
     35     private String line;
     36     private StringBuffer buf;
     37     private boolean done;
     38     private int pos;
     39     private int lastpos;
     40 
     41     /**
     42      * Construct an iterator over the tokens returned by the given
     43      * ResourceReader, ignoring blank lines and comment lines (first
     44      * non-blank character is '#').  Note that trailing comments on a
     45      * line, beginning with the first unquoted '#', are recognized.
     46      */
     47     public TokenIterator(ResourceReader r) {
     48         reader = r;
     49         line = null;
     50         done = false;
     51         buf = new StringBuffer();
     52         pos = lastpos = -1;
     53     }
     54 
     55     /**
     56      * Return the next token from this iterator, or null if the last
     57      * token has been returned.
     58      */
     59     public String next() throws IOException {
     60         if (done) {
     61             return null;
     62         }
     63         for (;;) {
     64             if (line == null) {
     65                 line = reader.readLineSkippingComments();
     66                 if (line == null) {
     67                     done = true;
     68                     return null;
     69                 }
     70                 pos = 0;
     71             }
     72             buf.setLength(0);
     73             lastpos = pos;
     74             pos = nextToken(pos);
     75             if (pos < 0) {
     76                 line = null;
     77                 continue;
     78             }
     79             return buf.toString();
     80         }
     81     }
     82 
     83     /**
     84      * Return the one-based line number of the line of the last token returned by
     85      * next(). Should only be called
     86      * after a call to next(); otherwise the return
     87      * value is undefined.
     88      */
     89     public int getLineNumber() {
     90         return reader.getLineNumber();
     91     }
     92 
     93     /**
     94      * Return a string description of the position of the last line
     95      * returned by readLine() or readLineSkippingComments().
     96      */
     97     public String describePosition() {
     98         return reader.describePosition() + ':' + (lastpos+1);
     99     }
    100 
    101     /**
    102      * Read the next token from 'this.line' and append it to
    103      * 'this.buf'.  Tokens are separated by Pattern_White_Space.  Tokens
    104      * may also be delimited by double or single quotes.  The closing
    105      * quote must match the opening quote.  If a '#' is encountered,
    106      * the rest of the line is ignored, unless it is backslash-escaped
    107      * or within quotes.
    108      * @param position the offset into the string
    109      * @return offset to the next character to read from line, or if
    110      * the end of the line is reached without scanning a valid token,
    111      * -1
    112      */
    113     private int nextToken(int position) {
    114         position = PatternProps.skipWhiteSpace(line, position);
    115         if (position == line.length()) {
    116             return -1;
    117         }
    118         int startpos = position;
    119         char c = line.charAt(position++);
    120         char quote = 0;
    121         switch (c) {
    122         case '"':
    123         case '\'':
    124             quote = c;
    125             break;
    126         case '#':
    127             return -1;
    128         default:
    129             buf.append(c);
    130             break;
    131         }
    132         int[] posref = null;
    133         while (position < line.length()) {
    134             c = line.charAt(position); // 16-bit ok
    135             if (c == '\\') {
    136                 if (posref == null) {
    137                     posref = new int[1];
    138                 }
    139                 posref[0] = position+1;
    140                 int c32 = Utility.unescapeAt(line, posref);
    141                 if (c32 < 0) {
    142                     throw new RuntimeException("Invalid escape at " +
    143                                                reader.describePosition() + ':' +
    144                                                position);
    145                 }
    146                 UTF16.append(buf, c32);
    147                 position = posref[0];
    148             } else if ((quote != 0 && c == quote) ||
    149                        (quote == 0 && PatternProps.isWhiteSpace(c))) {
    150                 return ++position;
    151             } else if (quote == 0 && c == '#') {
    152                 return position; // do NOT increment
    153             } else {
    154                 buf.append(c);
    155                 ++position;
    156             }
    157         }
    158         if (quote != 0) {
    159             throw new RuntimeException("Unterminated quote at " +
    160                                        reader.describePosition() + ':' +
    161                                        startpos);
    162         }
    163         return position;
    164     }
    165 }
    166