Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (c) 2001-2011, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/19/2001  aliu        Creation.
      8 **********************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_TRANSLITERATION
     14 
     15 #include "unicode/utf16.h"
     16 #include "esctrn.h"
     17 #include "util.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 static const UChar UNIPRE[] = {85,43,0}; // "U+"
     22 static const UChar BS_u[] = {92,117,0}; // "\\u"
     23 static const UChar BS_U[] = {92,85,0}; // "\\U"
     24 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
     25 static const UChar XML10PRE[] = {38,35,0}; // "&#"
     26 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
     27 static const UChar SEMI[] = {59,0}; // ";"
     28 static const UChar RBRACE[] = {125,0}; // "}"
     29 
     30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
     31 
     32 /**
     33  * Factory methods
     34  */
     35 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
     36     // Unicode: "U+10FFFF" hex, min=4, max=6
     37     return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
     38 }
     39 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
     40     // Java: "\\uFFFF" hex, min=4, max=4
     41     return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
     42 }
     43 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
     44     // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
     45     return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
     46              new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
     47 }
     48 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
     49     // XML: "" hex, min=1, max=6
     50     return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
     51 }
     52 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
     53     // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
     54     return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
     55 }
     56 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
     57     // Perl: "\\x{263A}" hex, min=1, max=6
     58     return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
     59 }
     60 
     61 /**
     62  * Registers standard variants with the system.  Called by
     63  * Transliterator during initialization.
     64  */
     65 void EscapeTransliterator::registerIDs() {
     66     Token t = integerToken(0);
     67 
     68     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
     69 
     70     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
     71 
     72     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
     73 
     74     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
     75 
     76     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
     77 
     78     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
     79 
     80     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
     81 }
     82 
     83 /**
     84  * Constructs an escape transliterator with the given ID and
     85  * parameters.  See the class member documentation for details.
     86  */
     87 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
     88                          const UnicodeString& _prefix, const UnicodeString& _suffix,
     89                          int32_t _radix, int32_t _minDigits,
     90                          UBool _grokSupplementals,
     91                          EscapeTransliterator* adoptedSupplementalHandler) :
     92     Transliterator(newID, NULL)
     93 {
     94     this->prefix = _prefix;
     95     this->suffix = _suffix;
     96     this->radix = _radix;
     97     this->minDigits = _minDigits;
     98     this->grokSupplementals = _grokSupplementals;
     99     this->supplementalHandler = adoptedSupplementalHandler;
    100 }
    101 
    102 /**
    103  * Copy constructor.
    104  */
    105 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
    106     Transliterator(o),
    107     prefix(o.prefix),
    108     suffix(o.suffix),
    109     radix(o.radix),
    110     minDigits(o.minDigits),
    111     grokSupplementals(o.grokSupplementals) {
    112     supplementalHandler = (o.supplementalHandler != 0) ?
    113         new EscapeTransliterator(*o.supplementalHandler) : NULL;
    114 }
    115 
    116 EscapeTransliterator::~EscapeTransliterator() {
    117     delete supplementalHandler;
    118 }
    119 
    120 /**
    121  * Transliterator API.
    122  */
    123 Transliterator* EscapeTransliterator::clone() const {
    124     return new EscapeTransliterator(*this);
    125 }
    126 
    127 /**
    128  * Implements {@link Transliterator#handleTransliterate}.
    129  */
    130 void EscapeTransliterator::handleTransliterate(Replaceable& text,
    131                                                UTransPosition& pos,
    132                                                UBool /*isIncremental*/) const
    133 {
    134     /* TODO: Verify that isIncremental can be ignored */
    135     int32_t start = pos.start;
    136     int32_t limit = pos.limit;
    137 
    138     UnicodeString buf(prefix);
    139     int32_t prefixLen = prefix.length();
    140     UBool redoPrefix = FALSE;
    141 
    142     while (start < limit) {
    143         int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
    144         int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
    145 
    146         if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
    147             buf.truncate(0);
    148             buf.append(supplementalHandler->prefix);
    149             ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
    150                                   supplementalHandler->minDigits);
    151             buf.append(supplementalHandler->suffix);
    152             redoPrefix = TRUE;
    153         } else {
    154             if (redoPrefix) {
    155                 buf.truncate(0);
    156                 buf.append(prefix);
    157                 redoPrefix = FALSE;
    158             } else {
    159                 buf.truncate(prefixLen);
    160             }
    161             ICU_Utility::appendNumber(buf, c, radix, minDigits);
    162             buf.append(suffix);
    163         }
    164 
    165         text.handleReplaceBetween(start, start + charLen, buf);
    166         start += buf.length();
    167         limit += buf.length() - charLen;
    168     }
    169 
    170     pos.contextLimit += limit - pos.limit;
    171     pos.limit = limit;
    172     pos.start = start;
    173 }
    174 
    175 U_NAMESPACE_END
    176 
    177 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    178 
    179 //eof
    180