1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2010, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package android.icu.dev.test.normalizer; 11 12 import java.util.Collection; 13 import java.util.Iterator; 14 import java.util.Set; 15 import java.util.SortedSet; 16 import java.util.TreeSet; 17 18 import org.junit.Test; 19 import org.junit.runner.RunWith; 20 import org.junit.runners.JUnit4; 21 22 import android.icu.dev.test.TestFmwk; 23 import android.icu.impl.Utility; 24 import android.icu.lang.UCharacter; 25 import android.icu.text.CanonicalIterator; 26 import android.icu.text.Normalizer; 27 import android.icu.text.UTF16; 28 import android.icu.testsharding.MainTestShard; 29 30 31 // TODO: fit into test framework 32 33 @MainTestShard 34 @RunWith(JUnit4.class) 35 public class TestCanonicalIterator extends TestFmwk { 36 37 static final boolean SHOW_NAMES = false; 38 39 static final String testArray[][] = { 40 {"\u00C5d\u0307\u0327", "A\u030Ad\u0307\u0327, A\u030Ad\u0327\u0307, A\u030A\u1E0B\u0327, " 41 + "A\u030A\u1E11\u0307, \u00C5d\u0307\u0327, \u00C5d\u0327\u0307, " 42 + "\u00C5\u1E0B\u0327, \u00C5\u1E11\u0307, \u212Bd\u0307\u0327, " 43 + "\u212Bd\u0327\u0307, \u212B\u1E0B\u0327, \u212B\u1E11\u0307"}, 44 {"\u010d\u017E", "c\u030Cz\u030C, c\u030C\u017E, \u010Dz\u030C, \u010D\u017E"}, 45 {"x\u0307\u0327", "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327"}, 46 }; 47 48 @Test 49 public void TestExhaustive() { 50 int counter = 0; 51 CanonicalIterator it = new CanonicalIterator(""); 52 /* 53 CanonicalIterator slowIt = new CanonicalIterator(""); 54 slowIt.SKIP_ZEROS = false; 55 */ 56 //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); 57 //Set itSet = new TreeSet(); 58 //Set slowItSet = new TreeSet(); 59 60 61 for (int i = 0; i < 0x10FFFF; ++i) { 62 63 // skip characters we know don't have decomps 64 int type = UCharacter.getType(i); 65 if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE 66 || type == Character.SURROGATE) continue; 67 68 if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0)); 69 70 String s = UTF16.valueOf(i); 71 characterTest(s, i, it); 72 73 characterTest(s + "\u0345", i, it); 74 } 75 } 76 77 public int TestSpeed() { 78 // skip unless verbose 79 if (!isVerbose()) return 0; 80 81 String s = "\uAC01\u0345"; 82 83 CanonicalIterator it = new CanonicalIterator(s); 84 double start, end; 85 int x = 0; // just to keep code from optimizing away. 86 int iterations = 10000; 87 double slowDelta = 0; 88 89 /* 90 CanonicalIterator slowIt = new CanonicalIterator(s); 91 slowIt.SKIP_ZEROS = false; 92 93 start = System.currentTimeMillis(); 94 for (int i = 0; i < iterations; ++i) { 95 slowIt.setSource(s); 96 while (true) { 97 String item = slowIt.next(); 98 if (item == null) break; 99 x += item.length(); 100 } 101 } 102 end = System.currentTimeMillis(); 103 double slowDelta = (end-start) / iterations; 104 logln("Slow iteration: " + slowDelta); 105 */ 106 107 start = System.currentTimeMillis(); 108 for (int i = 0; i < iterations; ++i) { 109 it.setSource(s); 110 while (true) { 111 String item = it.next(); 112 if (item == null) break; 113 x += item.length(); 114 } 115 } 116 end = System.currentTimeMillis(); 117 double fastDelta = (end-start) / iterations; 118 logln("Fast iteration: " + fastDelta + (slowDelta != 0 ? ", " + (fastDelta/slowDelta) : "")); 119 120 121 return x; 122 } 123 124 @Test 125 public void TestBasic() { 126 // This is not interesting anymore as the data is already built 127 // beforehand 128 129 // check build 130 // UnicodeSet ss = CanonicalIterator.getSafeStart(); 131 // logln("Safe Start: " + ss.toPattern(true)); 132 // ss = CanonicalIterator.getStarts('a'); 133 // expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'), 134 // new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB" 135 // + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]") 136 // ); 137 138 // check permute 139 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! 140 141 Set results = new TreeSet(); 142 CanonicalIterator.permute("ABC", false, results); 143 expectEqual("Simple permutation ", "", collectionToString(results), "ABC, ACB, BAC, BCA, CAB, CBA"); 144 145 // try samples 146 SortedSet set = new TreeSet(); 147 for (int i = 0; i < testArray.length; ++i) { 148 //logln("Results for: " + name.transliterate(testArray[i])); 149 CanonicalIterator it = new CanonicalIterator(testArray[i][0]); 150 // int counter = 0; 151 set.clear(); 152 String first = null; 153 while (true) { 154 String result = it.next(); 155 if(first==null){ 156 first = result; 157 } 158 if (result == null) break; 159 set.add(result); // sort them 160 //logln(++counter + ": " + hex.transliterate(result)); 161 //logln(" = " + name.transliterate(result)); 162 } 163 expectEqual(i + ": ", testArray[i][0], collectionToString(set), testArray[i][1]); 164 it.reset(); 165 if(!it.next().equals(first)){ 166 errln("CanonicalIterator.reset() failed"); 167 } 168 if(!it.getSource().equals(Normalizer.normalize(testArray[i][0],Normalizer.NFD))){ 169 errln("CanonicalIterator.getSource() does not return NFD of input source"); 170 } 171 } 172 } 173 174 private void expectEqual(String message, String item, Object a, Object b) { 175 if (!a.equals(b)) { 176 errln("FAIL: " + message + getReadable(item)); 177 errln("\t" + getReadable(a)); 178 errln("\t" + getReadable(b)); 179 } else { 180 logln("Checked: " + message + getReadable(item)); 181 logln("\t" + getReadable(a)); 182 logln("\t" + getReadable(b)); 183 } 184 } 185 186 //Transliterator name = null; 187 //Transliterator hex = null; 188 189 public String getReadable(Object obj) { 190 if (obj == null) return "null"; 191 String s = obj.toString(); 192 if (s.length() == 0) return ""; 193 // set up for readable display 194 //if (name == null) name = Transliterator.getInstance("[^\\ -\\u007F] name"); 195 //if (hex == null) hex = Transliterator.getInstance("[^\\ -\\u007F] hex"); 196 return "[" + (SHOW_NAMES ? hex(s) + "; " : "") + hex(s) + "]"; 197 } 198 199 private void characterTest(String s, int ch, CanonicalIterator it) 200 { 201 int mixedCounter = 0; 202 int lastMixedCounter = -1; 203 boolean gotDecomp = false; 204 boolean gotComp = false; 205 boolean gotSource = false; 206 String decomp = Normalizer.decompose(s, false); 207 String comp = Normalizer.compose(s, false); 208 209 // skip characters that don't have either decomp. 210 // need quick test for this! 211 if (s.equals(decomp) && s.equals(comp)) return; 212 213 it.setSource(s); 214 215 while (true) { 216 String item = it.next(); 217 if (item == null) break; 218 if (item.equals(s)) gotSource = true; 219 if (item.equals(decomp)) gotDecomp = true; 220 if (item.equals(comp)) gotComp = true; 221 if ((mixedCounter & 0x7F) == 0 && (ch < 0xAD00 || ch > 0xAC00 + 11172)) { 222 if (lastMixedCounter != mixedCounter) { 223 logln(""); 224 lastMixedCounter = mixedCounter; 225 } 226 logln("\t" + mixedCounter + "\t" + hex(item) 227 + (item.equals(s) ? "\t(*original*)" : "") 228 + (item.equals(decomp) ? "\t(*decomp*)" : "") 229 + (item.equals(comp) ? "\t(*comp*)" : "") 230 ); 231 } 232 233 } 234 235 // check that zeros optimization doesn't mess up. 236 /* 237 if (true) { 238 it.reset(); 239 itSet.clear(); 240 while (true) { 241 String item = it.next(); 242 if (item == null) break; 243 itSet.add(item); 244 } 245 slowIt.setSource(s); 246 slowItSet.clear(); 247 while (true) { 248 String item = slowIt.next(); 249 if (item == null) break; 250 slowItSet.add(item); 251 } 252 if (!itSet.equals(slowItSet)) { 253 errln("Zero optimization failure with " + getReadable(s)); 254 } 255 } 256 */ 257 258 mixedCounter++; 259 if (!gotSource || !gotDecomp || !gotComp) { 260 errln("FAIL CanonicalIterator: " + s + " decomp: " +decomp+" comp: "+comp); 261 it.reset(); 262 for(String item=it.next();item!=null;item=it.next()){ 263 err(item + " "); 264 } 265 errln(""); 266 } 267 } 268 269 static String collectionToString(Collection col) { 270 StringBuffer result = new StringBuffer(); 271 Iterator it = col.iterator(); 272 while (it.hasNext()) { 273 if (result.length() != 0) result.append(", "); 274 result.append(it.next().toString()); 275 } 276 return result.toString(); 277 } 278 }