1 /* 2 ********************************************************************** 3 * Copyright (c) 2003-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Alan Liu 7 * Created: September 24 2003 8 * Since: ICU 2.8 9 ********************************************************************** 10 */ 11 #include "ruleiter.h" 12 #include "unicode/parsepos.h" 13 #include "unicode/unistr.h" 14 #include "unicode/symtable.h" 15 #include "patternprops.h" 16 17 /* \U87654321 or \ud800\udc00 */ 18 #define MAX_U_NOTATION_LEN 12 19 20 U_NAMESPACE_BEGIN 21 22 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, 23 ParsePosition& thePos) : 24 text(theText), 25 pos(thePos), 26 sym(theSym), 27 buf(0), 28 bufPos(0) 29 {} 30 31 UBool RuleCharacterIterator::atEnd() const { 32 return buf == 0 && pos.getIndex() == text.length(); 33 } 34 35 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { 36 if (U_FAILURE(ec)) return DONE; 37 38 UChar32 c = DONE; 39 isEscaped = FALSE; 40 41 for (;;) { 42 c = _current(); 43 _advance(UTF_CHAR_LENGTH(c)); 44 45 if (c == SymbolTable::SYMBOL_REF && buf == 0 && 46 (options & PARSE_VARIABLES) != 0 && sym != 0) { 47 UnicodeString name = sym->parseReference(text, pos, text.length()); 48 // If name is empty there was an isolated SYMBOL_REF; 49 // return it. Caller must be prepared for this. 50 if (name.length() == 0) { 51 break; 52 } 53 bufPos = 0; 54 buf = sym->lookup(name); 55 if (buf == 0) { 56 ec = U_UNDEFINED_VARIABLE; 57 return DONE; 58 } 59 // Handle empty variable value 60 if (buf->length() == 0) { 61 buf = 0; 62 } 63 continue; 64 } 65 66 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { 67 continue; 68 } 69 70 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { 71 UnicodeString tempEscape; 72 int32_t offset = 0; 73 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); 74 jumpahead(offset); 75 isEscaped = TRUE; 76 if (c < 0) { 77 ec = U_MALFORMED_UNICODE_ESCAPE; 78 return DONE; 79 } 80 } 81 82 break; 83 } 84 85 return c; 86 } 87 88 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { 89 p.buf = buf; 90 p.pos = pos.getIndex(); 91 p.bufPos = bufPos; 92 } 93 94 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { 95 buf = p.buf; 96 pos.setIndex(p.pos); 97 bufPos = p.bufPos; 98 } 99 100 void RuleCharacterIterator::skipIgnored(int32_t options) { 101 if ((options & SKIP_WHITESPACE) != 0) { 102 for (;;) { 103 UChar32 a = _current(); 104 if (!PatternProps::isWhiteSpace(a)) break; 105 _advance(UTF_CHAR_LENGTH(a)); 106 } 107 } 108 } 109 110 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { 111 if (maxLookAhead < 0) { 112 maxLookAhead = 0x7FFFFFFF; 113 } 114 if (buf != 0) { 115 buf->extract(bufPos, maxLookAhead, result); 116 } else { 117 text.extract(pos.getIndex(), maxLookAhead, result); 118 } 119 return result; 120 } 121 122 void RuleCharacterIterator::jumpahead(int32_t count) { 123 _advance(count); 124 } 125 126 /* 127 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { 128 int32_t b = pos.getIndex(); 129 text.extract(0, b, result); 130 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index 131 } 132 */ 133 134 UChar32 RuleCharacterIterator::_current() const { 135 if (buf != 0) { 136 return buf->char32At(bufPos); 137 } else { 138 int i = pos.getIndex(); 139 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; 140 } 141 } 142 143 void RuleCharacterIterator::_advance(int32_t count) { 144 if (buf != 0) { 145 bufPos += count; 146 if (bufPos == buf->length()) { 147 buf = 0; 148 } 149 } else { 150 pos.setIndex(pos.getIndex() + count); 151 if (pos.getIndex() > text.length()) { 152 pos.setIndex(text.length()); 153 } 154 } 155 } 156 157 U_NAMESPACE_END 158 159 //eof 160