Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (c) 2003-2011, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 * Author: Alan Liu
      9 * Created: September 24 2003
     10 * Since: ICU 2.8
     11 **********************************************************************
     12 */
     13 #include "ruleiter.h"
     14 #include "unicode/parsepos.h"
     15 #include "unicode/symtable.h"
     16 #include "unicode/unistr.h"
     17 #include "unicode/utf16.h"
     18 #include "patternprops.h"
     19 
     20 /* \U87654321 or \ud800\udc00 */
     21 #define MAX_U_NOTATION_LEN 12
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
     26                       ParsePosition& thePos) :
     27     text(theText),
     28     pos(thePos),
     29     sym(theSym),
     30     buf(0),
     31     bufPos(0)
     32 {}
     33 
     34 UBool RuleCharacterIterator::atEnd() const {
     35     return buf == 0 && pos.getIndex() == text.length();
     36 }
     37 
     38 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
     39     if (U_FAILURE(ec)) return DONE;
     40 
     41     UChar32 c = DONE;
     42     isEscaped = FALSE;
     43 
     44     for (;;) {
     45         c = _current();
     46         _advance(U16_LENGTH(c));
     47 
     48         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
     49             (options & PARSE_VARIABLES) != 0 && sym != 0) {
     50             UnicodeString name = sym->parseReference(text, pos, text.length());
     51             // If name is empty there was an isolated SYMBOL_REF;
     52             // return it.  Caller must be prepared for this.
     53             if (name.length() == 0) {
     54                 break;
     55             }
     56             bufPos = 0;
     57             buf = sym->lookup(name);
     58             if (buf == 0) {
     59                 ec = U_UNDEFINED_VARIABLE;
     60                 return DONE;
     61             }
     62             // Handle empty variable value
     63             if (buf->length() == 0) {
     64                 buf = 0;
     65             }
     66             continue;
     67         }
     68 
     69         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
     70             continue;
     71         }
     72 
     73         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
     74             UnicodeString tempEscape;
     75             int32_t offset = 0;
     76             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
     77             jumpahead(offset);
     78             isEscaped = TRUE;
     79             if (c < 0) {
     80                 ec = U_MALFORMED_UNICODE_ESCAPE;
     81                 return DONE;
     82             }
     83         }
     84 
     85         break;
     86     }
     87 
     88     return c;
     89 }
     90 
     91 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
     92     p.buf = buf;
     93     p.pos = pos.getIndex();
     94     p.bufPos = bufPos;
     95 }
     96 
     97 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
     98     buf = p.buf;
     99     pos.setIndex(p.pos);
    100     bufPos = p.bufPos;
    101 }
    102 
    103 void RuleCharacterIterator::skipIgnored(int32_t options) {
    104     if ((options & SKIP_WHITESPACE) != 0) {
    105         for (;;) {
    106             UChar32 a = _current();
    107             if (!PatternProps::isWhiteSpace(a)) break;
    108             _advance(U16_LENGTH(a));
    109         }
    110     }
    111 }
    112 
    113 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
    114     if (maxLookAhead < 0) {
    115         maxLookAhead = 0x7FFFFFFF;
    116     }
    117     if (buf != 0) {
    118         buf->extract(bufPos, maxLookAhead, result);
    119     } else {
    120         text.extract(pos.getIndex(), maxLookAhead, result);
    121     }
    122     return result;
    123 }
    124 
    125 void RuleCharacterIterator::jumpahead(int32_t count) {
    126     _advance(count);
    127 }
    128 
    129 /*
    130 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
    131     int32_t b = pos.getIndex();
    132     text.extract(0, b, result);
    133     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
    134 }
    135 */
    136 
    137 UChar32 RuleCharacterIterator::_current() const {
    138     if (buf != 0) {
    139         return buf->char32At(bufPos);
    140     } else {
    141         int i = pos.getIndex();
    142         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
    143     }
    144 }
    145 
    146 void RuleCharacterIterator::_advance(int32_t count) {
    147     if (buf != 0) {
    148         bufPos += count;
    149         if (bufPos == buf->length()) {
    150             buf = 0;
    151         }
    152     } else {
    153         pos.setIndex(pos.getIndex() + count);
    154         if (pos.getIndex() > text.length()) {
    155             pos.setIndex(text.length());
    156         }
    157     }
    158 }
    159 
    160 U_NAMESPACE_END
    161 
    162 //eof
    163