Home | History | Annotate | Download | only in intltest
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2004, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 * Author: Alan Liu
      7 * Created: March 22 2004
      8 * Since: ICU 3.0
      9 **********************************************************************
     10 */
     11 #include "tokiter.h"
     12 #include "textfile.h"
     13 #include "util.h"
     14 #include "uprops.h"
     15 
     16 TokenIterator::TokenIterator(TextFile* r) {
     17     reader = r;
     18     done = haveLine = FALSE;
     19     pos = lastpos = -1;
     20 }
     21 
     22 TokenIterator::~TokenIterator() {
     23 }
     24 
     25 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
     26     if (done || U_FAILURE(ec)) {
     27         return FALSE;
     28     }
     29     token.truncate(0);
     30     for (;;) {
     31         if (!haveLine) {
     32             if (!reader->readLineSkippingComments(line, ec)) {
     33                 done = TRUE;
     34                 return FALSE;
     35             }
     36             haveLine = TRUE;
     37             pos = 0;
     38         }
     39         lastpos = pos;
     40         if (!nextToken(token, ec)) {
     41             haveLine = FALSE;
     42             if (U_FAILURE(ec)) return FALSE;
     43             continue;
     44         }
     45         return TRUE;
     46     }
     47 }
     48 
     49 int32_t TokenIterator::getLineNumber() const {
     50     return reader->getLineNumber();
     51 }
     52 
     53 /**
     54  * Read the next token from 'this->line' and append it to 'token'.
     55  * Tokens are separated by rule white space.  Tokens may also be
     56  * delimited by double or single quotes.  The closing quote must match
     57  * the opening quote.  If a '#' is encountered, the rest of the line
     58  * is ignored, unless it is backslash-escaped or within quotes.
     59  * @param token the token is appended to this StringBuffer
     60  * @param ec input-output error code
     61  * @return TRUE if a valid token is found, or FALSE if the end
     62  * of the line is reached or an error occurs
     63  */
     64 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
     65     ICU_Utility::skipWhitespace(line, pos, TRUE);
     66     if (pos == line.length()) {
     67         return FALSE;
     68     }
     69     UChar c = line.charAt(pos++);
     70     UChar quote = 0;
     71     switch (c) {
     72     case 34/*'"'*/:
     73     case 39/*'\\'*/:
     74         quote = c;
     75         break;
     76     case 35/*'#'*/:
     77         return FALSE;
     78     default:
     79         token.append(c);
     80         break;
     81     }
     82     while (pos < line.length()) {
     83         c = line.charAt(pos); // 16-bit ok
     84         if (c == 92/*'\\'*/) {
     85             UChar32 c32 = line.unescapeAt(pos);
     86             if (c32 < 0) {
     87                 ec = U_MALFORMED_UNICODE_ESCAPE;
     88                 return FALSE;
     89             }
     90             token.append(c32);
     91         } else if ((quote != 0 && c == quote) ||
     92                    (quote == 0 && uprv_isRuleWhiteSpace(c))) {
     93             ++pos;
     94             return TRUE;
     95         } else if (quote == 0 && c == '#') {
     96             return TRUE; // do NOT increment
     97         } else {
     98             token.append(c);
     99             ++pos;
    100         }
    101     }
    102     if (quote != 0) {
    103         ec = U_UNTERMINATED_QUOTE;
    104         return FALSE;
    105     }
    106     return TRUE;
    107 }
    108