1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2001-2011, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 11/19/2001 aliu Creation. 10 ********************************************************************** 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_TRANSLITERATION 16 17 #include "unicode/utf16.h" 18 #include "esctrn.h" 19 #include "util.h" 20 21 U_NAMESPACE_BEGIN 22 23 static const UChar UNIPRE[] = {85,43,0}; // "U+" 24 static const UChar BS_u[] = {92,117,0}; // "\\u" 25 static const UChar BS_U[] = {92,85,0}; // "\\U" 26 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" 27 static const UChar XML10PRE[] = {38,35,0}; // "&#" 28 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" 29 static const UChar SEMI[] = {59,0}; // ";" 30 static const UChar RBRACE[] = {125,0}; // "}" 31 32 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) 33 34 /** 35 * Factory methods 36 */ 37 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { 38 // Unicode: "U+10FFFF" hex, min=4, max=6 39 return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); 40 } 41 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { 42 // Java: "\\uFFFF" hex, min=4, max=4 43 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); 44 } 45 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { 46 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 47 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, 48 new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); 49 } 50 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { 51 // XML: "" hex, min=1, max=6 52 return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); 53 } 54 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { 55 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") 56 return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); 57 } 58 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { 59 // Perl: "\\x{263A}" hex, min=1, max=6 60 return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); 61 } 62 63 /** 64 * Registers standard variants with the system. Called by 65 * Transliterator during initialization. 66 */ 67 void EscapeTransliterator::registerIDs() { 68 Token t = integerToken(0); 69 70 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); 71 72 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); 73 74 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); 75 76 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); 77 78 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); 79 80 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); 81 82 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); 83 } 84 85 /** 86 * Constructs an escape transliterator with the given ID and 87 * parameters. See the class member documentation for details. 88 */ 89 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, 90 const UnicodeString& _prefix, const UnicodeString& _suffix, 91 int32_t _radix, int32_t _minDigits, 92 UBool _grokSupplementals, 93 EscapeTransliterator* adoptedSupplementalHandler) : 94 Transliterator(newID, NULL) 95 { 96 this->prefix = _prefix; 97 this->suffix = _suffix; 98 this->radix = _radix; 99 this->minDigits = _minDigits; 100 this->grokSupplementals = _grokSupplementals; 101 this->supplementalHandler = adoptedSupplementalHandler; 102 } 103 104 /** 105 * Copy constructor. 106 */ 107 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : 108 Transliterator(o), 109 prefix(o.prefix), 110 suffix(o.suffix), 111 radix(o.radix), 112 minDigits(o.minDigits), 113 grokSupplementals(o.grokSupplementals) { 114 supplementalHandler = (o.supplementalHandler != 0) ? 115 new EscapeTransliterator(*o.supplementalHandler) : NULL; 116 } 117 118 EscapeTransliterator::~EscapeTransliterator() { 119 delete supplementalHandler; 120 } 121 122 /** 123 * Transliterator API. 124 */ 125 Transliterator* EscapeTransliterator::clone() const { 126 return new EscapeTransliterator(*this); 127 } 128 129 /** 130 * Implements {@link Transliterator#handleTransliterate}. 131 */ 132 void EscapeTransliterator::handleTransliterate(Replaceable& text, 133 UTransPosition& pos, 134 UBool /*isIncremental*/) const 135 { 136 /* TODO: Verify that isIncremental can be ignored */ 137 int32_t start = pos.start; 138 int32_t limit = pos.limit; 139 140 UnicodeString buf(prefix); 141 int32_t prefixLen = prefix.length(); 142 UBool redoPrefix = FALSE; 143 144 while (start < limit) { 145 int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); 146 int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; 147 148 if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { 149 buf.truncate(0); 150 buf.append(supplementalHandler->prefix); 151 ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, 152 supplementalHandler->minDigits); 153 buf.append(supplementalHandler->suffix); 154 redoPrefix = TRUE; 155 } else { 156 if (redoPrefix) { 157 buf.truncate(0); 158 buf.append(prefix); 159 redoPrefix = FALSE; 160 } else { 161 buf.truncate(prefixLen); 162 } 163 ICU_Utility::appendNumber(buf, c, radix, minDigits); 164 buf.append(suffix); 165 } 166 167 text.handleReplaceBetween(start, start + charLen, buf); 168 start += buf.length(); 169 limit += buf.length() - charLen; 170 } 171 172 pos.contextLimit += limit - pos.limit; 173 pos.limit = limit; 174 pos.start = start; 175 } 176 177 U_NAMESPACE_END 178 179 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 180 181 //eof 182