1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2013-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * ContractionsAndExpansions.java, ported from collationsets.h/.cpp 10 * 11 * C++ version created on: 2013feb09 12 * created by: Markus W. Scherer 13 */ 14 15 package android.icu.impl.coll; 16 17 import java.util.Iterator; 18 19 import android.icu.impl.Trie2; 20 import android.icu.text.UnicodeSet; 21 import android.icu.util.CharsTrie; 22 import android.icu.util.CharsTrie.Entry; 23 24 /** 25 * @hide Only a subset of ICU is exposed in Android 26 */ 27 public final class ContractionsAndExpansions { 28 // C++: The following fields are @internal, only public for access by callback. 29 private CollationData data; 30 private UnicodeSet contractions; 31 private UnicodeSet expansions; 32 private CESink sink; 33 private boolean addPrefixes; 34 private int checkTailored = 0; // -1: collected tailored +1: exclude tailored 35 private UnicodeSet tailored = new UnicodeSet(); 36 private UnicodeSet ranges; 37 private StringBuilder unreversedPrefix = new StringBuilder(); 38 private String suffix; 39 private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH]; 40 41 public static interface CESink { 42 void handleCE(long ce); 43 void handleExpansion(long ces[], int start, int length); 44 } 45 46 public ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink s, boolean prefixes) { 47 contractions = con; 48 expansions = exp; 49 sink = s; 50 addPrefixes = prefixes; 51 } 52 53 public void forData(CollationData d) { 54 // Add all from the data, can be tailoring or base. 55 if (d.base != null) { 56 checkTailored = -1; 57 } 58 data = d; 59 Iterator<Trie2.Range> trieIterator = data.trie.iterator(); 60 Trie2.Range range; 61 while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) { 62 enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this); 63 } 64 if (d.base == null) { 65 return; 66 } 67 // Add all from the base data but only for un-tailored code points. 68 tailored.freeze(); 69 checkTailored = 1; 70 data = d.base; 71 trieIterator = data.trie.iterator(); 72 while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) { 73 enumCnERange(range.startCodePoint, range.endCodePoint, range.value, this); 74 } 75 } 76 77 private void enumCnERange(int start, int end, int ce32, ContractionsAndExpansions cne) { 78 if (cne.checkTailored == 0) { 79 // There is no tailoring. 80 // No need to collect nor check the tailored set. 81 } else if (cne.checkTailored < 0) { 82 // Collect the set of code points with mappings in the tailoring data. 83 if (ce32 == Collation.FALLBACK_CE32) { 84 return; // fallback to base, not tailored 85 } else { 86 cne.tailored.add(start, end); 87 } 88 // checkTailored > 0: Exclude tailored ranges from the base data enumeration. 89 } else if (start == end) { 90 if (cne.tailored.contains(start)) { 91 return; 92 } 93 } else if (cne.tailored.containsSome(start, end)) { 94 if (cne.ranges == null) { 95 cne.ranges = new UnicodeSet(); 96 } 97 cne.ranges.set(start, end).removeAll(cne.tailored); 98 int count = cne.ranges.getRangeCount(); 99 for (int i = 0; i < count; ++i) { 100 cne.handleCE32(cne.ranges.getRangeStart(i), cne.ranges.getRangeEnd(i), ce32); 101 } 102 } 103 cne.handleCE32(start, end, ce32); 104 } 105 106 public void forCodePoint(CollationData d, int c) { 107 int ce32 = d.getCE32(c); 108 if (ce32 == Collation.FALLBACK_CE32) { 109 d = d.base; 110 ce32 = d.getCE32(c); 111 } 112 data = d; 113 handleCE32(c, c, ce32); 114 } 115 116 private void handleCE32(int start, int end, int ce32) { 117 for (;;) { 118 if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE) { 119 // !isSpecialCE32() 120 if (sink != null) { 121 sink.handleCE(Collation.ceFromSimpleCE32(ce32)); 122 } 123 return; 124 } 125 switch (Collation.tagFromCE32(ce32)) { 126 case Collation.FALLBACK_TAG: 127 return; 128 case Collation.RESERVED_TAG_3: 129 case Collation.BUILDER_DATA_TAG: 130 case Collation.LEAD_SURROGATE_TAG: 131 // Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C. 132 throw new AssertionError( 133 String.format("Unexpected CE32 tag type %d for ce32=0x%08x", 134 Collation.tagFromCE32(ce32), ce32)); 135 case Collation.LONG_PRIMARY_TAG: 136 if (sink != null) { 137 sink.handleCE(Collation.ceFromLongPrimaryCE32(ce32)); 138 } 139 return; 140 case Collation.LONG_SECONDARY_TAG: 141 if (sink != null) { 142 sink.handleCE(Collation.ceFromLongSecondaryCE32(ce32)); 143 } 144 return; 145 case Collation.LATIN_EXPANSION_TAG: 146 if (sink != null) { 147 ces[0] = Collation.latinCE0FromCE32(ce32); 148 ces[1] = Collation.latinCE1FromCE32(ce32); 149 sink.handleExpansion(ces, 0, 2); 150 } 151 // Optimization: If we have a prefix, 152 // then the relevant strings have been added already. 153 if (unreversedPrefix.length() == 0) { 154 addExpansions(start, end); 155 } 156 return; 157 case Collation.EXPANSION32_TAG: 158 if (sink != null) { 159 int idx = Collation.indexFromCE32(ce32); 160 int length = Collation.lengthFromCE32(ce32); 161 for (int i = 0; i < length; ++i) { 162 ces[i] = Collation.ceFromCE32(data.ce32s[idx + i]); 163 } 164 sink.handleExpansion(ces, 0, length); 165 } 166 // Optimization: If we have a prefix, 167 // then the relevant strings have been added already. 168 if (unreversedPrefix.length() == 0) { 169 addExpansions(start, end); 170 } 171 return; 172 case Collation.EXPANSION_TAG: 173 if (sink != null) { 174 int idx = Collation.indexFromCE32(ce32); 175 int length = Collation.lengthFromCE32(ce32); 176 sink.handleExpansion(data.ces, idx, length); 177 } 178 // Optimization: If we have a prefix, 179 // then the relevant strings have been added already. 180 if (unreversedPrefix.length() == 0) { 181 addExpansions(start, end); 182 } 183 return; 184 case Collation.PREFIX_TAG: 185 handlePrefixes(start, end, ce32); 186 return; 187 case Collation.CONTRACTION_TAG: 188 handleContractions(start, end, ce32); 189 return; 190 case Collation.DIGIT_TAG: 191 // Fetch the non-numeric-collation CE32 and continue. 192 ce32 = data.ce32s[Collation.indexFromCE32(ce32)]; 193 break; 194 case Collation.U0000_TAG: 195 assert (start == 0 && end == 0); 196 // Fetch the normal ce32 for U+0000 and continue. 197 ce32 = data.ce32s[0]; 198 break; 199 case Collation.HANGUL_TAG: 200 if (sink != null) { 201 // TODO: This should be optimized, 202 // especially if [start..end] is the complete Hangul range. (assert that) 203 UTF16CollationIterator iter = new UTF16CollationIterator(data); 204 StringBuilder hangul = new StringBuilder(1); 205 for (int c = start; c <= end; ++c) { 206 hangul.setLength(0); 207 hangul.appendCodePoint(c); 208 iter.setText(false, hangul, 0); 209 int length = iter.fetchCEs(); 210 // Ignore the terminating non-CE. 211 assert (length >= 2 && iter.getCE(length - 1) == Collation.NO_CE); 212 sink.handleExpansion(iter.getCEs(), 0, length - 1); 213 } 214 } 215 // Optimization: If we have a prefix, 216 // then the relevant strings have been added already. 217 if (unreversedPrefix.length() == 0) { 218 addExpansions(start, end); 219 } 220 return; 221 case Collation.OFFSET_TAG: 222 // Currently no need to send offset CEs to the sink. 223 return; 224 case Collation.IMPLICIT_TAG: 225 // Currently no need to send implicit CEs to the sink. 226 return; 227 } 228 } 229 } 230 231 private void handlePrefixes(int start, int end, int ce32) { 232 int index = Collation.indexFromCE32(ce32); 233 ce32 = data.getCE32FromContexts(index); // Default if no prefix match. 234 handleCE32(start, end, ce32); 235 if (!addPrefixes) { 236 return; 237 } 238 CharsTrie.Iterator prefixes = new CharsTrie(data.contexts, index + 2).iterator(); 239 while (prefixes.hasNext()) { 240 Entry e = prefixes.next(); 241 setPrefix(e.chars); 242 // Prefix/pre-context mappings are special kinds of contractions 243 // that always yield expansions. 244 addStrings(start, end, contractions); 245 addStrings(start, end, expansions); 246 handleCE32(start, end, e.value); 247 } 248 resetPrefix(); 249 } 250 251 void handleContractions(int start, int end, int ce32) { 252 int index = Collation.indexFromCE32(ce32); 253 if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) { 254 // No match on the single code point. 255 // We are underneath a prefix, and the default mapping is just 256 // a fallback to the mappings for a shorter prefix. 257 assert (unreversedPrefix.length() != 0); 258 } else { 259 ce32 = data.getCE32FromContexts(index); // Default if no suffix match. 260 assert (!Collation.isContractionCE32(ce32)); 261 handleCE32(start, end, ce32); 262 } 263 CharsTrie.Iterator suffixes = new CharsTrie(data.contexts, index + 2).iterator(); 264 while (suffixes.hasNext()) { 265 Entry e = suffixes.next(); 266 suffix = e.chars.toString(); 267 addStrings(start, end, contractions); 268 if (unreversedPrefix.length() != 0) { 269 addStrings(start, end, expansions); 270 } 271 handleCE32(start, end, e.value); 272 } 273 suffix = null; 274 } 275 276 void addExpansions(int start, int end) { 277 if (unreversedPrefix.length() == 0 && suffix == null) { 278 if (expansions != null) { 279 expansions.add(start, end); 280 } 281 } else { 282 addStrings(start, end, expansions); 283 } 284 } 285 286 void addStrings(int start, int end, UnicodeSet set) { 287 if (set == null) { 288 return; 289 } 290 StringBuilder s = new StringBuilder(unreversedPrefix); 291 do { 292 s.appendCodePoint(start); 293 if (suffix != null) { 294 s.append(suffix); 295 } 296 set.add(s); 297 s.setLength(unreversedPrefix.length()); 298 } while (++start <= end); 299 } 300 301 // Prefixes are reversed in the data structure. 302 private void setPrefix(CharSequence pfx) { 303 unreversedPrefix.setLength(0); 304 unreversedPrefix.append(pfx).reverse(); 305 } 306 307 private void resetPrefix() { 308 unreversedPrefix.setLength(0); 309 } 310 }