Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2003-2011, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 * Author: Alan Liu
      7 * Created: September 24 2003
      8 * Since: ICU 2.8
      9 **********************************************************************
     10 */
     11 #include "ruleiter.h"
     12 #include "unicode/parsepos.h"
     13 #include "unicode/symtable.h"
     14 #include "unicode/unistr.h"
     15 #include "unicode/utf16.h"
     16 #include "patternprops.h"
     17 
     18 /* \U87654321 or \ud800\udc00 */
     19 #define MAX_U_NOTATION_LEN 12
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
     24                       ParsePosition& thePos) :
     25     text(theText),
     26     pos(thePos),
     27     sym(theSym),
     28     buf(0),
     29     bufPos(0)
     30 {}
     31 
     32 UBool RuleCharacterIterator::atEnd() const {
     33     return buf == 0 && pos.getIndex() == text.length();
     34 }
     35 
     36 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
     37     if (U_FAILURE(ec)) return DONE;
     38 
     39     UChar32 c = DONE;
     40     isEscaped = FALSE;
     41 
     42     for (;;) {
     43         c = _current();
     44         _advance(U16_LENGTH(c));
     45 
     46         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
     47             (options & PARSE_VARIABLES) != 0 && sym != 0) {
     48             UnicodeString name = sym->parseReference(text, pos, text.length());
     49             // If name is empty there was an isolated SYMBOL_REF;
     50             // return it.  Caller must be prepared for this.
     51             if (name.length() == 0) {
     52                 break;
     53             }
     54             bufPos = 0;
     55             buf = sym->lookup(name);
     56             if (buf == 0) {
     57                 ec = U_UNDEFINED_VARIABLE;
     58                 return DONE;
     59             }
     60             // Handle empty variable value
     61             if (buf->length() == 0) {
     62                 buf = 0;
     63             }
     64             continue;
     65         }
     66 
     67         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
     68             continue;
     69         }
     70 
     71         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
     72             UnicodeString tempEscape;
     73             int32_t offset = 0;
     74             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
     75             jumpahead(offset);
     76             isEscaped = TRUE;
     77             if (c < 0) {
     78                 ec = U_MALFORMED_UNICODE_ESCAPE;
     79                 return DONE;
     80             }
     81         }
     82 
     83         break;
     84     }
     85 
     86     return c;
     87 }
     88 
     89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
     90     p.buf = buf;
     91     p.pos = pos.getIndex();
     92     p.bufPos = bufPos;
     93 }
     94 
     95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
     96     buf = p.buf;
     97     pos.setIndex(p.pos);
     98     bufPos = p.bufPos;
     99 }
    100 
    101 void RuleCharacterIterator::skipIgnored(int32_t options) {
    102     if ((options & SKIP_WHITESPACE) != 0) {
    103         for (;;) {
    104             UChar32 a = _current();
    105             if (!PatternProps::isWhiteSpace(a)) break;
    106             _advance(U16_LENGTH(a));
    107         }
    108     }
    109 }
    110 
    111 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
    112     if (maxLookAhead < 0) {
    113         maxLookAhead = 0x7FFFFFFF;
    114     }
    115     if (buf != 0) {
    116         buf->extract(bufPos, maxLookAhead, result);
    117     } else {
    118         text.extract(pos.getIndex(), maxLookAhead, result);
    119     }
    120     return result;
    121 }
    122 
    123 void RuleCharacterIterator::jumpahead(int32_t count) {
    124     _advance(count);
    125 }
    126 
    127 /*
    128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
    129     int32_t b = pos.getIndex();
    130     text.extract(0, b, result);
    131     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
    132 }
    133 */
    134 
    135 UChar32 RuleCharacterIterator::_current() const {
    136     if (buf != 0) {
    137         return buf->char32At(bufPos);
    138     } else {
    139         int i = pos.getIndex();
    140         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
    141     }
    142 }
    143 
    144 void RuleCharacterIterator::_advance(int32_t count) {
    145     if (buf != 0) {
    146         bufPos += count;
    147         if (bufPos == buf->length()) {
    148             buf = 0;
    149         }
    150     } else {
    151         pos.setIndex(pos.getIndex() + count);
    152         if (pos.getIndex() > text.length()) {
    153             pos.setIndex(text.length());
    154         }
    155     }
    156 }
    157 
    158 U_NAMESPACE_END
    159 
    160 //eof
    161