Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2003-2011, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 * Author: Alan Liu
      7 * Created: September 24 2003
      8 * Since: ICU 2.8
      9 **********************************************************************
     10 */
     11 #include "ruleiter.h"
     12 #include "unicode/parsepos.h"
     13 #include "unicode/unistr.h"
     14 #include "unicode/symtable.h"
     15 #include "patternprops.h"
     16 
     17 /* \U87654321 or \ud800\udc00 */
     18 #define MAX_U_NOTATION_LEN 12
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
     23                       ParsePosition& thePos) :
     24     text(theText),
     25     pos(thePos),
     26     sym(theSym),
     27     buf(0),
     28     bufPos(0)
     29 {}
     30 
     31 UBool RuleCharacterIterator::atEnd() const {
     32     return buf == 0 && pos.getIndex() == text.length();
     33 }
     34 
     35 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
     36     if (U_FAILURE(ec)) return DONE;
     37 
     38     UChar32 c = DONE;
     39     isEscaped = FALSE;
     40 
     41     for (;;) {
     42         c = _current();
     43         _advance(UTF_CHAR_LENGTH(c));
     44 
     45         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
     46             (options & PARSE_VARIABLES) != 0 && sym != 0) {
     47             UnicodeString name = sym->parseReference(text, pos, text.length());
     48             // If name is empty there was an isolated SYMBOL_REF;
     49             // return it.  Caller must be prepared for this.
     50             if (name.length() == 0) {
     51                 break;
     52             }
     53             bufPos = 0;
     54             buf = sym->lookup(name);
     55             if (buf == 0) {
     56                 ec = U_UNDEFINED_VARIABLE;
     57                 return DONE;
     58             }
     59             // Handle empty variable value
     60             if (buf->length() == 0) {
     61                 buf = 0;
     62             }
     63             continue;
     64         }
     65 
     66         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
     67             continue;
     68         }
     69 
     70         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
     71             UnicodeString tempEscape;
     72             int32_t offset = 0;
     73             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
     74             jumpahead(offset);
     75             isEscaped = TRUE;
     76             if (c < 0) {
     77                 ec = U_MALFORMED_UNICODE_ESCAPE;
     78                 return DONE;
     79             }
     80         }
     81 
     82         break;
     83     }
     84 
     85     return c;
     86 }
     87 
     88 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
     89     p.buf = buf;
     90     p.pos = pos.getIndex();
     91     p.bufPos = bufPos;
     92 }
     93 
     94 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
     95     buf = p.buf;
     96     pos.setIndex(p.pos);
     97     bufPos = p.bufPos;
     98 }
     99 
    100 void RuleCharacterIterator::skipIgnored(int32_t options) {
    101     if ((options & SKIP_WHITESPACE) != 0) {
    102         for (;;) {
    103             UChar32 a = _current();
    104             if (!PatternProps::isWhiteSpace(a)) break;
    105             _advance(UTF_CHAR_LENGTH(a));
    106         }
    107     }
    108 }
    109 
    110 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
    111     if (maxLookAhead < 0) {
    112         maxLookAhead = 0x7FFFFFFF;
    113     }
    114     if (buf != 0) {
    115         buf->extract(bufPos, maxLookAhead, result);
    116     } else {
    117         text.extract(pos.getIndex(), maxLookAhead, result);
    118     }
    119     return result;
    120 }
    121 
    122 void RuleCharacterIterator::jumpahead(int32_t count) {
    123     _advance(count);
    124 }
    125 
    126 /*
    127 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
    128     int32_t b = pos.getIndex();
    129     text.extract(0, b, result);
    130     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
    131 }
    132 */
    133 
    134 UChar32 RuleCharacterIterator::_current() const {
    135     if (buf != 0) {
    136         return buf->char32At(bufPos);
    137     } else {
    138         int i = pos.getIndex();
    139         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
    140     }
    141 }
    142 
    143 void RuleCharacterIterator::_advance(int32_t count) {
    144     if (buf != 0) {
    145         bufPos += count;
    146         if (bufPos == buf->length()) {
    147             buf = 0;
    148         }
    149     } else {
    150         pos.setIndex(pos.getIndex() + count);
    151         if (pos.getIndex() > text.length()) {
    152             pos.setIndex(text.length());
    153         }
    154     }
    155 }
    156 
    157 U_NAMESPACE_END
    158 
    159 //eof
    160