Home | History | Annotate | Download | only in intltest
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (c) 2004-2011, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 * Author: Alan Liu
      9 * Created: March 22 2004
     10 * Since: ICU 3.0
     11 **********************************************************************
     12 */
     13 #include "tokiter.h"
     14 #include "textfile.h"
     15 #include "patternprops.h"
     16 #include "util.h"
     17 #include "uprops.h"
     18 
     19 TokenIterator::TokenIterator(TextFile* r) {
     20     reader = r;
     21     done = haveLine = FALSE;
     22     pos = lastpos = -1;
     23 }
     24 
     25 TokenIterator::~TokenIterator() {
     26 }
     27 
     28 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
     29     if (done || U_FAILURE(ec)) {
     30         return FALSE;
     31     }
     32     token.truncate(0);
     33     for (;;) {
     34         if (!haveLine) {
     35             if (!reader->readLineSkippingComments(line, ec)) {
     36                 done = TRUE;
     37                 return FALSE;
     38             }
     39             haveLine = TRUE;
     40             pos = 0;
     41         }
     42         lastpos = pos;
     43         if (!nextToken(token, ec)) {
     44             haveLine = FALSE;
     45             if (U_FAILURE(ec)) return FALSE;
     46             continue;
     47         }
     48         return TRUE;
     49     }
     50 }
     51 
     52 int32_t TokenIterator::getLineNumber() const {
     53     return reader->getLineNumber();
     54 }
     55 
     56 /**
     57  * Read the next token from 'this->line' and append it to 'token'.
     58  * Tokens are separated by Pattern_White_Space.  Tokens may also be
     59  * delimited by double or single quotes.  The closing quote must match
     60  * the opening quote.  If a '#' is encountered, the rest of the line
     61  * is ignored, unless it is backslash-escaped or within quotes.
     62  * @param token the token is appended to this StringBuffer
     63  * @param ec input-output error code
     64  * @return TRUE if a valid token is found, or FALSE if the end
     65  * of the line is reached or an error occurs
     66  */
     67 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
     68     ICU_Utility::skipWhitespace(line, pos, TRUE);
     69     if (pos == line.length()) {
     70         return FALSE;
     71     }
     72     UChar c = line.charAt(pos++);
     73     UChar quote = 0;
     74     switch (c) {
     75     case 34/*'"'*/:
     76     case 39/*'\\'*/:
     77         quote = c;
     78         break;
     79     case 35/*'#'*/:
     80         return FALSE;
     81     default:
     82         token.append(c);
     83         break;
     84     }
     85     while (pos < line.length()) {
     86         c = line.charAt(pos); // 16-bit ok
     87         if (c == 92/*'\\'*/) {
     88             UChar32 c32 = line.unescapeAt(pos);
     89             if (c32 < 0) {
     90                 ec = U_MALFORMED_UNICODE_ESCAPE;
     91                 return FALSE;
     92             }
     93             token.append(c32);
     94         } else if ((quote != 0 && c == quote) ||
     95                    (quote == 0 && PatternProps::isWhiteSpace(c))) {
     96             ++pos;
     97             return TRUE;
     98         } else if (quote == 0 && c == '#') {
     99             return TRUE; // do NOT increment
    100         } else {
    101             token.append(c);
    102             ++pos;
    103         }
    104     }
    105     if (quote != 0) {
    106         ec = U_UNTERMINATED_QUOTE;
    107         return FALSE;
    108     }
    109     return TRUE;
    110 }
    111