1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ********************************************************************** 6 * Copyright (c) 2001-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ********************************************************************** 9 * Date Name Description 10 * 11/19/2001 aliu Creation. 11 ********************************************************************** 12 */ 13 package android.icu.text; 14 import android.icu.impl.Utility; 15 import android.icu.lang.UCharacter; 16 17 /** 18 * A transliterator that converts Unicode escape forms to the 19 * characters they represent. Escape forms have a prefix, a suffix, a 20 * radix, and minimum and maximum digit counts. 21 * 22 * <p>This class is package private. It registers several standard 23 * variants with the system which are then accessed via their IDs. 24 * 25 * @author Alan Liu 26 */ 27 class UnescapeTransliterator extends Transliterator { 28 29 /** 30 * The encoded pattern specification. The pattern consists of 31 * zero or more forms. Each form consists of a prefix, suffix, 32 * radix, minimum digit count, and maximum digit count. These 33 * values are stored as a five character header. That is, their 34 * numeric values are cast to 16-bit characters and stored in the 35 * string. Following these five characters, the prefix 36 * characters, then suffix characters are stored. Each form thus 37 * takes n+5 characters, where n is the total length of the prefix 38 * and suffix. The end is marked by a header of length one 39 * consisting of the character END. 40 */ 41 private char spec[]; 42 43 /** 44 * Special character marking the end of the spec[] array. 45 */ 46 private static final char END = 0xFFFF; 47 48 /** 49 * Registers standard variants with the system. Called by 50 * Transliterator during initialization. 51 */ 52 static void register() { 53 // Unicode: "U+10FFFF" hex, min=4, max=6 54 Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() { 55 @Override 56 public Transliterator getInstance(String ID) { 57 return new UnescapeTransliterator("Hex-Any/Unicode", new char[] { 58 2, 0, 16, 4, 6, 'U', '+', 59 END 60 }); 61 } 62 }); 63 64 // Java: "\\uFFFF" hex, min=4, max=4 65 Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() { 66 @Override 67 public Transliterator getInstance(String ID) { 68 return new UnescapeTransliterator("Hex-Any/Java", new char[] { 69 2, 0, 16, 4, 4, '\\', 'u', 70 END 71 }); 72 } 73 }); 74 75 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 76 Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() { 77 @Override 78 public Transliterator getInstance(String ID) { 79 return new UnescapeTransliterator("Hex-Any/C", new char[] { 80 2, 0, 16, 4, 4, '\\', 'u', 81 2, 0, 16, 8, 8, '\\', 'U', 82 END 83 }); 84 } 85 }); 86 87 // XML: "" hex, min=1, max=6 88 Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() { 89 @Override 90 public Transliterator getInstance(String ID) { 91 return new UnescapeTransliterator("Hex-Any/XML", new char[] { 92 3, 1, 16, 1, 6, '&', '#', 'x', ';', 93 END 94 }); 95 } 96 }); 97 98 // XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any") 99 Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() { 100 @Override 101 public Transliterator getInstance(String ID) { 102 return new UnescapeTransliterator("Hex-Any/XML10", new char[] { 103 2, 1, 10, 1, 7, '&', '#', ';', 104 END 105 }); 106 } 107 }); 108 109 // Perl: "\\x{263A}" hex, min=1, max=6 110 Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() { 111 @Override 112 public Transliterator getInstance(String ID) { 113 return new UnescapeTransliterator("Hex-Any/Perl", new char[] { 114 3, 1, 16, 1, 6, '\\', 'x', '{', '}', 115 END 116 }); 117 } 118 }); 119 120 // All: Java, C, Perl, XML, XML10, Unicode 121 Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() { 122 @Override 123 public Transliterator getInstance(String ID) { 124 return new UnescapeTransliterator("Hex-Any", new char[] { 125 2, 0, 16, 4, 6, 'U', '+', // Unicode 126 2, 0, 16, 4, 4, '\\', 'u', // Java 127 2, 0, 16, 8, 8, '\\', 'U', // C (surrogates) 128 3, 1, 16, 1, 6, '&', '#', 'x', ';', // XML 129 2, 1, 10, 1, 7, '&', '#', ';', // XML10 130 3, 1, 16, 1, 6, '\\', 'x', '{', '}', // Perl 131 END 132 }); 133 } 134 }); 135 } 136 137 /** 138 * Package private constructor. Takes the encoded spec array. 139 */ 140 UnescapeTransliterator(String ID, char spec[]) { 141 super(ID, null); 142 this.spec = spec; 143 } 144 145 /** 146 * Implements {@link Transliterator#handleTransliterate}. 147 */ 148 @Override 149 protected void handleTransliterate(Replaceable text, 150 Position pos, boolean isIncremental) { 151 int start = pos.start; 152 int limit = pos.limit; 153 int i, ipat; 154 155 loop: 156 while (start < limit) { 157 // Loop over the forms in spec[]. Exit this loop when we 158 // match one of the specs. Exit the outer loop if a 159 // partial match is detected and isIncremental is true. 160 for (ipat = 0; spec[ipat] != END;) { 161 162 // Read the header 163 int prefixLen = spec[ipat++]; 164 int suffixLen = spec[ipat++]; 165 int radix = spec[ipat++]; 166 int minDigits = spec[ipat++]; 167 int maxDigits = spec[ipat++]; 168 169 // s is a copy of start that is advanced over the 170 // characters as we parse them. 171 int s = start; 172 boolean match = true; 173 174 for (i=0; i<prefixLen; ++i) { 175 if (s >= limit) { 176 if (i > 0) { 177 // We've already matched a character. This is 178 // a partial match, so we return if in 179 // incremental mode. In non-incremental mode, 180 // go to the next spec. 181 if (isIncremental) { 182 break loop; 183 } 184 match = false; 185 break; 186 } 187 } 188 char c = text.charAt(s++); 189 if (c != spec[ipat + i]) { 190 match = false; 191 break; 192 } 193 } 194 195 if (match) { 196 int u = 0; 197 int digitCount = 0; 198 for (;;) { 199 if (s >= limit) { 200 // Check for partial match in incremental mode. 201 if (s > start && isIncremental) { 202 break loop; 203 } 204 break; 205 } 206 int ch = text.char32At(s); 207 int digit = UCharacter.digit(ch, radix); 208 if (digit < 0) { 209 break; 210 } 211 s += UTF16.getCharCount(ch); 212 u = (u * radix) + digit; 213 if (++digitCount == maxDigits) { 214 break; 215 } 216 } 217 218 match = (digitCount >= minDigits); 219 220 if (match) { 221 for (i=0; i<suffixLen; ++i) { 222 if (s >= limit) { 223 // Check for partial match in incremental mode. 224 if (s > start && isIncremental) { 225 break loop; 226 } 227 match = false; 228 break; 229 } 230 char c = text.charAt(s++); 231 if (c != spec[ipat + prefixLen + i]) { 232 match = false; 233 break; 234 } 235 } 236 237 if (match) { 238 // At this point, we have a match 239 String str = UTF16.valueOf(u); 240 text.replace(start, s, str); 241 limit -= s - start - str.length(); 242 // The following break statement leaves the 243 // loop that is traversing the forms in 244 // spec[]. We then parse the next input 245 // character. 246 break; 247 } 248 } 249 } 250 251 ipat += prefixLen + suffixLen; 252 } 253 254 if (start < limit) { 255 start += UTF16.getCharCount(text.char32At(start)); 256 } 257 } 258 259 pos.contextLimit += limit - pos.limit; 260 pos.limit = limit; 261 pos.start = start; 262 } 263 264 /* (non-Javadoc) 265 * @see android.icu.text.Transliterator#addSourceTargetSet(android.icu.text.UnicodeSet, android.icu.text.UnicodeSet, android.icu.text.UnicodeSet) 266 */ 267 @Override 268 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 269 // Each form consists of a prefix, suffix, 270 // * radix, minimum digit count, and maximum digit count. These 271 // * values are stored as a five character header. ... 272 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 273 UnicodeSet items = new UnicodeSet(); 274 StringBuilder buffer = new StringBuilder(); 275 for (int i = 0; spec[i] != END;) { 276 // first 5 items are header 277 int end = i + spec[i] + spec[i+1] + 5; 278 int radix = spec[i+2]; 279 for (int j = 0; j < radix; ++j) { 280 Utility.appendNumber(buffer, j, radix, 0); 281 } 282 // then add the characters 283 for (int j = i + 5; j < end; ++j) { 284 items.add(spec[j]); 285 } 286 // and go to next block 287 i = end; 288 } 289 items.addAll(buffer.toString()); 290 items.retainAll(myFilter); 291 292 if (items.size() > 0) { 293 sourceSet.addAll(items); 294 targetSet.addAll(0,0x10FFFF); // assume we can produce any character 295 } 296 } 297 } 298