1 /* 2 ********************************************************************** 3 * Copyright (c) 2004-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Alan Liu 7 * Created: March 22 2004 8 * Since: ICU 3.0 9 ********************************************************************** 10 */ 11 #include "tokiter.h" 12 #include "textfile.h" 13 #include "patternprops.h" 14 #include "util.h" 15 #include "uprops.h" 16 17 TokenIterator::TokenIterator(TextFile* r) { 18 reader = r; 19 done = haveLine = FALSE; 20 pos = lastpos = -1; 21 } 22 23 TokenIterator::~TokenIterator() { 24 } 25 26 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { 27 if (done || U_FAILURE(ec)) { 28 return FALSE; 29 } 30 token.truncate(0); 31 for (;;) { 32 if (!haveLine) { 33 if (!reader->readLineSkippingComments(line, ec)) { 34 done = TRUE; 35 return FALSE; 36 } 37 haveLine = TRUE; 38 pos = 0; 39 } 40 lastpos = pos; 41 if (!nextToken(token, ec)) { 42 haveLine = FALSE; 43 if (U_FAILURE(ec)) return FALSE; 44 continue; 45 } 46 return TRUE; 47 } 48 } 49 50 int32_t TokenIterator::getLineNumber() const { 51 return reader->getLineNumber(); 52 } 53 54 /** 55 * Read the next token from 'this->line' and append it to 'token'. 56 * Tokens are separated by Pattern_White_Space. Tokens may also be 57 * delimited by double or single quotes. The closing quote must match 58 * the opening quote. If a '#' is encountered, the rest of the line 59 * is ignored, unless it is backslash-escaped or within quotes. 60 * @param token the token is appended to this StringBuffer 61 * @param ec input-output error code 62 * @return TRUE if a valid token is found, or FALSE if the end 63 * of the line is reached or an error occurs 64 */ 65 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { 66 ICU_Utility::skipWhitespace(line, pos, TRUE); 67 if (pos == line.length()) { 68 return FALSE; 69 } 70 UChar c = line.charAt(pos++); 71 UChar quote = 0; 72 switch (c) { 73 case 34/*'"'*/: 74 case 39/*'\\'*/: 75 quote = c; 76 break; 77 case 35/*'#'*/: 78 return FALSE; 79 default: 80 token.append(c); 81 break; 82 } 83 while (pos < line.length()) { 84 c = line.charAt(pos); // 16-bit ok 85 if (c == 92/*'\\'*/) { 86 UChar32 c32 = line.unescapeAt(pos); 87 if (c32 < 0) { 88 ec = U_MALFORMED_UNICODE_ESCAPE; 89 return FALSE; 90 } 91 token.append(c32); 92 } else if ((quote != 0 && c == quote) || 93 (quote == 0 && PatternProps::isWhiteSpace(c))) { 94 ++pos; 95 return TRUE; 96 } else if (quote == 0 && c == '#') { 97 return TRUE; // do NOT increment 98 } else { 99 token.append(c); 100 ++pos; 101 } 102 } 103 if (quote != 0) { 104 ec = U_UNTERMINATED_QUOTE; 105 return FALSE; 106 } 107 return TRUE; 108 } 109