Home | History | Annotate | Download | only in intltest
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2004-2011, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 * Author: Alan Liu
      7 * Created: March 22 2004
      8 * Since: ICU 3.0
      9 **********************************************************************
     10 */
     11 #include "tokiter.h"
     12 #include "textfile.h"
     13 #include "patternprops.h"
     14 #include "util.h"
     15 #include "uprops.h"
     16 
     17 TokenIterator::TokenIterator(TextFile* r) {
     18     reader = r;
     19     done = haveLine = FALSE;
     20     pos = lastpos = -1;
     21 }
     22 
     23 TokenIterator::~TokenIterator() {
     24 }
     25 
     26 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
     27     if (done || U_FAILURE(ec)) {
     28         return FALSE;
     29     }
     30     token.truncate(0);
     31     for (;;) {
     32         if (!haveLine) {
     33             if (!reader->readLineSkippingComments(line, ec)) {
     34                 done = TRUE;
     35                 return FALSE;
     36             }
     37             haveLine = TRUE;
     38             pos = 0;
     39         }
     40         lastpos = pos;
     41         if (!nextToken(token, ec)) {
     42             haveLine = FALSE;
     43             if (U_FAILURE(ec)) return FALSE;
     44             continue;
     45         }
     46         return TRUE;
     47     }
     48 }
     49 
     50 int32_t TokenIterator::getLineNumber() const {
     51     return reader->getLineNumber();
     52 }
     53 
     54 /**
     55  * Read the next token from 'this->line' and append it to 'token'.
     56  * Tokens are separated by Pattern_White_Space.  Tokens may also be
     57  * delimited by double or single quotes.  The closing quote must match
     58  * the opening quote.  If a '#' is encountered, the rest of the line
     59  * is ignored, unless it is backslash-escaped or within quotes.
     60  * @param token the token is appended to this StringBuffer
     61  * @param ec input-output error code
     62  * @return TRUE if a valid token is found, or FALSE if the end
     63  * of the line is reached or an error occurs
     64  */
     65 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
     66     ICU_Utility::skipWhitespace(line, pos, TRUE);
     67     if (pos == line.length()) {
     68         return FALSE;
     69     }
     70     UChar c = line.charAt(pos++);
     71     UChar quote = 0;
     72     switch (c) {
     73     case 34/*'"'*/:
     74     case 39/*'\\'*/:
     75         quote = c;
     76         break;
     77     case 35/*'#'*/:
     78         return FALSE;
     79     default:
     80         token.append(c);
     81         break;
     82     }
     83     while (pos < line.length()) {
     84         c = line.charAt(pos); // 16-bit ok
     85         if (c == 92/*'\\'*/) {
     86             UChar32 c32 = line.unescapeAt(pos);
     87             if (c32 < 0) {
     88                 ec = U_MALFORMED_UNICODE_ESCAPE;
     89                 return FALSE;
     90             }
     91             token.append(c32);
     92         } else if ((quote != 0 && c == quote) ||
     93                    (quote == 0 && PatternProps::isWhiteSpace(c))) {
     94             ++pos;
     95             return TRUE;
     96         } else if (quote == 0 && c == '#') {
     97             return TRUE; // do NOT increment
     98         } else {
     99             token.append(c);
    100             ++pos;
    101         }
    102     }
    103     if (quote != 0) {
    104         ec = U_UNTERMINATED_QUOTE;
    105         return FALSE;
    106     }
    107     return TRUE;
    108 }
    109