1 /* 2 ********************************************************************** 3 * Copyright (c) 2001-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 11/19/2001 aliu Creation. 8 ********************************************************************** 9 */ 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_TRANSLITERATION 14 15 #include "unicode/utf16.h" 16 #include "esctrn.h" 17 #include "util.h" 18 19 U_NAMESPACE_BEGIN 20 21 static const UChar UNIPRE[] = {85,43,0}; // "U+" 22 static const UChar BS_u[] = {92,117,0}; // "\\u" 23 static const UChar BS_U[] = {92,85,0}; // "\\U" 24 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" 25 static const UChar XML10PRE[] = {38,35,0}; // "&#" 26 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" 27 static const UChar SEMI[] = {59,0}; // ";" 28 static const UChar RBRACE[] = {125,0}; // "}" 29 30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) 31 32 /** 33 * Factory methods 34 */ 35 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { 36 // Unicode: "U+10FFFF" hex, min=4, max=6 37 return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); 38 } 39 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { 40 // Java: "\\uFFFF" hex, min=4, max=4 41 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); 42 } 43 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { 44 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 45 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, 46 new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); 47 } 48 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { 49 // XML: "" hex, min=1, max=6 50 return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); 51 } 52 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { 53 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") 54 return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); 55 } 56 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { 57 // Perl: "\\x{263A}" hex, min=1, max=6 58 return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); 59 } 60 61 /** 62 * Registers standard variants with the system. Called by 63 * Transliterator during initialization. 64 */ 65 void EscapeTransliterator::registerIDs() { 66 Token t = integerToken(0); 67 68 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); 69 70 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); 71 72 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); 73 74 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); 75 76 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); 77 78 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); 79 80 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); 81 } 82 83 /** 84 * Constructs an escape transliterator with the given ID and 85 * parameters. See the class member documentation for details. 86 */ 87 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, 88 const UnicodeString& _prefix, const UnicodeString& _suffix, 89 int32_t _radix, int32_t _minDigits, 90 UBool _grokSupplementals, 91 EscapeTransliterator* adoptedSupplementalHandler) : 92 Transliterator(newID, NULL) 93 { 94 this->prefix = _prefix; 95 this->suffix = _suffix; 96 this->radix = _radix; 97 this->minDigits = _minDigits; 98 this->grokSupplementals = _grokSupplementals; 99 this->supplementalHandler = adoptedSupplementalHandler; 100 } 101 102 /** 103 * Copy constructor. 104 */ 105 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : 106 Transliterator(o), 107 prefix(o.prefix), 108 suffix(o.suffix), 109 radix(o.radix), 110 minDigits(o.minDigits), 111 grokSupplementals(o.grokSupplementals) { 112 supplementalHandler = (o.supplementalHandler != 0) ? 113 new EscapeTransliterator(*o.supplementalHandler) : NULL; 114 } 115 116 EscapeTransliterator::~EscapeTransliterator() { 117 delete supplementalHandler; 118 } 119 120 /** 121 * Transliterator API. 122 */ 123 Transliterator* EscapeTransliterator::clone() const { 124 return new EscapeTransliterator(*this); 125 } 126 127 /** 128 * Implements {@link Transliterator#handleTransliterate}. 129 */ 130 void EscapeTransliterator::handleTransliterate(Replaceable& text, 131 UTransPosition& pos, 132 UBool /*isIncremental*/) const 133 { 134 /* TODO: Verify that isIncremental can be ignored */ 135 int32_t start = pos.start; 136 int32_t limit = pos.limit; 137 138 UnicodeString buf(prefix); 139 int32_t prefixLen = prefix.length(); 140 UBool redoPrefix = FALSE; 141 142 while (start < limit) { 143 int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); 144 int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; 145 146 if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { 147 buf.truncate(0); 148 buf.append(supplementalHandler->prefix); 149 ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, 150 supplementalHandler->minDigits); 151 buf.append(supplementalHandler->suffix); 152 redoPrefix = TRUE; 153 } else { 154 if (redoPrefix) { 155 buf.truncate(0); 156 buf.append(prefix); 157 redoPrefix = FALSE; 158 } else { 159 buf.truncate(prefixLen); 160 } 161 ICU_Utility::appendNumber(buf, c, radix, minDigits); 162 buf.append(suffix); 163 } 164 165 text.handleReplaceBetween(start, start + charLen, buf); 166 start += buf.length(); 167 limit += buf.length() - charLen; 168 } 169 170 pos.contextLimit += limit - pos.limit; 171 pos.limit = limit; 172 pos.start = start; 173 } 174 175 U_NAMESPACE_END 176 177 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 178 179 //eof 180