1 /* 2 ********************************************************************** 3 * Copyright (c) 2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Alan Liu 7 * Created: March 22 2004 8 * Since: ICU 3.0 9 ********************************************************************** 10 */ 11 #include "tokiter.h" 12 #include "textfile.h" 13 #include "util.h" 14 #include "uprops.h" 15 16 TokenIterator::TokenIterator(TextFile* r) { 17 reader = r; 18 done = haveLine = FALSE; 19 pos = lastpos = -1; 20 } 21 22 TokenIterator::~TokenIterator() { 23 } 24 25 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { 26 if (done || U_FAILURE(ec)) { 27 return FALSE; 28 } 29 token.truncate(0); 30 for (;;) { 31 if (!haveLine) { 32 if (!reader->readLineSkippingComments(line, ec)) { 33 done = TRUE; 34 return FALSE; 35 } 36 haveLine = TRUE; 37 pos = 0; 38 } 39 lastpos = pos; 40 if (!nextToken(token, ec)) { 41 haveLine = FALSE; 42 if (U_FAILURE(ec)) return FALSE; 43 continue; 44 } 45 return TRUE; 46 } 47 } 48 49 int32_t TokenIterator::getLineNumber() const { 50 return reader->getLineNumber(); 51 } 52 53 /** 54 * Read the next token from 'this->line' and append it to 'token'. 55 * Tokens are separated by rule white space. Tokens may also be 56 * delimited by double or single quotes. The closing quote must match 57 * the opening quote. If a '#' is encountered, the rest of the line 58 * is ignored, unless it is backslash-escaped or within quotes. 59 * @param token the token is appended to this StringBuffer 60 * @param ec input-output error code 61 * @return TRUE if a valid token is found, or FALSE if the end 62 * of the line is reached or an error occurs 63 */ 64 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { 65 ICU_Utility::skipWhitespace(line, pos, TRUE); 66 if (pos == line.length()) { 67 return FALSE; 68 } 69 UChar c = line.charAt(pos++); 70 UChar quote = 0; 71 switch (c) { 72 case 34/*'"'*/: 73 case 39/*'\\'*/: 74 quote = c; 75 break; 76 case 35/*'#'*/: 77 return FALSE; 78 default: 79 token.append(c); 80 break; 81 } 82 while (pos < line.length()) { 83 c = line.charAt(pos); // 16-bit ok 84 if (c == 92/*'\\'*/) { 85 UChar32 c32 = line.unescapeAt(pos); 86 if (c32 < 0) { 87 ec = U_MALFORMED_UNICODE_ESCAPE; 88 return FALSE; 89 } 90 token.append(c32); 91 } else if ((quote != 0 && c == quote) || 92 (quote == 0 && uprv_isRuleWhiteSpace(c))) { 93 ++pos; 94 return TRUE; 95 } else if (quote == 0 && c == '#') { 96 return TRUE; // do NOT increment 97 } else { 98 token.append(c); 99 ++pos; 100 } 101 } 102 if (quote != 0) { 103 ec = U_UNTERMINATED_QUOTE; 104 return FALSE; 105 } 106 return TRUE; 107 } 108