1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2004-2011, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: March 22 2004 10 * Since: ICU 3.0 11 ********************************************************************** 12 */ 13 #include "tokiter.h" 14 #include "textfile.h" 15 #include "patternprops.h" 16 #include "util.h" 17 #include "uprops.h" 18 19 TokenIterator::TokenIterator(TextFile* r) { 20 reader = r; 21 done = haveLine = FALSE; 22 pos = lastpos = -1; 23 } 24 25 TokenIterator::~TokenIterator() { 26 } 27 28 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { 29 if (done || U_FAILURE(ec)) { 30 return FALSE; 31 } 32 token.truncate(0); 33 for (;;) { 34 if (!haveLine) { 35 if (!reader->readLineSkippingComments(line, ec)) { 36 done = TRUE; 37 return FALSE; 38 } 39 haveLine = TRUE; 40 pos = 0; 41 } 42 lastpos = pos; 43 if (!nextToken(token, ec)) { 44 haveLine = FALSE; 45 if (U_FAILURE(ec)) return FALSE; 46 continue; 47 } 48 return TRUE; 49 } 50 } 51 52 int32_t TokenIterator::getLineNumber() const { 53 return reader->getLineNumber(); 54 } 55 56 /** 57 * Read the next token from 'this->line' and append it to 'token'. 58 * Tokens are separated by Pattern_White_Space. Tokens may also be 59 * delimited by double or single quotes. The closing quote must match 60 * the opening quote. If a '#' is encountered, the rest of the line 61 * is ignored, unless it is backslash-escaped or within quotes. 62 * @param token the token is appended to this StringBuffer 63 * @param ec input-output error code 64 * @return TRUE if a valid token is found, or FALSE if the end 65 * of the line is reached or an error occurs 66 */ 67 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { 68 ICU_Utility::skipWhitespace(line, pos, TRUE); 69 if (pos == line.length()) { 70 return FALSE; 71 } 72 UChar c = line.charAt(pos++); 73 UChar quote = 0; 74 switch (c) { 75 case 34/*'"'*/: 76 case 39/*'\\'*/: 77 quote = c; 78 break; 79 case 35/*'#'*/: 80 return FALSE; 81 default: 82 token.append(c); 83 break; 84 } 85 while (pos < line.length()) { 86 c = line.charAt(pos); // 16-bit ok 87 if (c == 92/*'\\'*/) { 88 UChar32 c32 = line.unescapeAt(pos); 89 if (c32 < 0) { 90 ec = U_MALFORMED_UNICODE_ESCAPE; 91 return FALSE; 92 } 93 token.append(c32); 94 } else if ((quote != 0 && c == quote) || 95 (quote == 0 && PatternProps::isWhiteSpace(c))) { 96 ++pos; 97 return TRUE; 98 } else if (quote == 0 && c == '#') { 99 return TRUE; // do NOT increment 100 } else { 101 token.append(c); 102 ++pos; 103 } 104 } 105 if (quote != 0) { 106 ec = U_UNTERMINATED_QUOTE; 107 return FALSE; 108 } 109 return TRUE; 110 } 111