1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2010, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.normalizer; 10 11 import java.util.Collection; 12 import java.util.Iterator; 13 import java.util.Set; 14 import java.util.SortedSet; 15 import java.util.TreeSet; 16 17 import org.junit.Test; 18 import org.junit.runner.RunWith; 19 import org.junit.runners.JUnit4; 20 21 import com.ibm.icu.dev.test.TestFmwk; 22 import com.ibm.icu.impl.Utility; 23 import com.ibm.icu.lang.UCharacter; 24 import com.ibm.icu.text.CanonicalIterator; 25 import com.ibm.icu.text.Normalizer; 26 import com.ibm.icu.text.UTF16; 27 28 29 // TODO: fit into test framework 30 31 @RunWith(JUnit4.class) 32 public class TestCanonicalIterator extends TestFmwk { 33 34 static final boolean SHOW_NAMES = false; 35 36 static final String testArray[][] = { 37 {"\u00C5d\u0307\u0327", "A\u030Ad\u0307\u0327, A\u030Ad\u0327\u0307, A\u030A\u1E0B\u0327, " 38 + "A\u030A\u1E11\u0307, \u00C5d\u0307\u0327, \u00C5d\u0327\u0307, " 39 + "\u00C5\u1E0B\u0327, \u00C5\u1E11\u0307, \u212Bd\u0307\u0327, " 40 + "\u212Bd\u0327\u0307, \u212B\u1E0B\u0327, \u212B\u1E11\u0307"}, 41 {"\u010d\u017E", "c\u030Cz\u030C, c\u030C\u017E, \u010Dz\u030C, \u010D\u017E"}, 42 {"x\u0307\u0327", "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327"}, 43 }; 44 45 @Test 46 public void TestExhaustive() { 47 int counter = 0; 48 CanonicalIterator it = new CanonicalIterator(""); 49 /* 50 CanonicalIterator slowIt = new CanonicalIterator(""); 51 slowIt.SKIP_ZEROS = false; 52 */ 53 //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); 54 //Set itSet = new TreeSet(); 55 //Set slowItSet = new TreeSet(); 56 57 58 for (int i = 0; i < 0x10FFFF; ++i) { 59 60 // skip characters we know don't have decomps 61 int type = UCharacter.getType(i); 62 if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE 63 || type == Character.SURROGATE) continue; 64 65 if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0)); 66 67 String s = UTF16.valueOf(i); 68 characterTest(s, i, it); 69 70 characterTest(s + "\u0345", i, it); 71 } 72 } 73 74 public int TestSpeed() { 75 // skip unless verbose 76 if (!isVerbose()) return 0; 77 78 String s = "\uAC01\u0345"; 79 80 CanonicalIterator it = new CanonicalIterator(s); 81 double start, end; 82 int x = 0; // just to keep code from optimizing away. 83 int iterations = 10000; 84 double slowDelta = 0; 85 86 /* 87 CanonicalIterator slowIt = new CanonicalIterator(s); 88 slowIt.SKIP_ZEROS = false; 89 90 start = System.currentTimeMillis(); 91 for (int i = 0; i < iterations; ++i) { 92 slowIt.setSource(s); 93 while (true) { 94 String item = slowIt.next(); 95 if (item == null) break; 96 x += item.length(); 97 } 98 } 99 end = System.currentTimeMillis(); 100 double slowDelta = (end-start) / iterations; 101 logln("Slow iteration: " + slowDelta); 102 */ 103 104 start = System.currentTimeMillis(); 105 for (int i = 0; i < iterations; ++i) { 106 it.setSource(s); 107 while (true) { 108 String item = it.next(); 109 if (item == null) break; 110 x += item.length(); 111 } 112 } 113 end = System.currentTimeMillis(); 114 double fastDelta = (end-start) / iterations; 115 logln("Fast iteration: " + fastDelta + (slowDelta != 0 ? ", " + (fastDelta/slowDelta) : "")); 116 117 118 return x; 119 } 120 121 @Test 122 public void TestBasic() { 123 // This is not interesting anymore as the data is already built 124 // beforehand 125 126 // check build 127 // UnicodeSet ss = CanonicalIterator.getSafeStart(); 128 // logln("Safe Start: " + ss.toPattern(true)); 129 // ss = CanonicalIterator.getStarts('a'); 130 // expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'), 131 // new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB" 132 // + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]") 133 // ); 134 135 // check permute 136 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! 137 138 Set results = new TreeSet(); 139 CanonicalIterator.permute("ABC", false, results); 140 expectEqual("Simple permutation ", "", collectionToString(results), "ABC, ACB, BAC, BCA, CAB, CBA"); 141 142 // try samples 143 SortedSet set = new TreeSet(); 144 for (int i = 0; i < testArray.length; ++i) { 145 //logln("Results for: " + name.transliterate(testArray[i])); 146 CanonicalIterator it = new CanonicalIterator(testArray[i][0]); 147 // int counter = 0; 148 set.clear(); 149 String first = null; 150 while (true) { 151 String result = it.next(); 152 if(first==null){ 153 first = result; 154 } 155 if (result == null) break; 156 set.add(result); // sort them 157 //logln(++counter + ": " + hex.transliterate(result)); 158 //logln(" = " + name.transliterate(result)); 159 } 160 expectEqual(i + ": ", testArray[i][0], collectionToString(set), testArray[i][1]); 161 it.reset(); 162 if(!it.next().equals(first)){ 163 errln("CanonicalIterator.reset() failed"); 164 } 165 if(!it.getSource().equals(Normalizer.normalize(testArray[i][0],Normalizer.NFD))){ 166 errln("CanonicalIterator.getSource() does not return NFD of input source"); 167 } 168 } 169 } 170 171 private void expectEqual(String message, String item, Object a, Object b) { 172 if (!a.equals(b)) { 173 errln("FAIL: " + message + getReadable(item)); 174 errln("\t" + getReadable(a)); 175 errln("\t" + getReadable(b)); 176 } else { 177 logln("Checked: " + message + getReadable(item)); 178 logln("\t" + getReadable(a)); 179 logln("\t" + getReadable(b)); 180 } 181 } 182 183 //Transliterator name = null; 184 //Transliterator hex = null; 185 186 public String getReadable(Object obj) { 187 if (obj == null) return "null"; 188 String s = obj.toString(); 189 if (s.length() == 0) return ""; 190 // set up for readable display 191 //if (name == null) name = Transliterator.getInstance("[^\\ -\\u007F] name"); 192 //if (hex == null) hex = Transliterator.getInstance("[^\\ -\\u007F] hex"); 193 return "[" + (SHOW_NAMES ? hex(s) + "; " : "") + hex(s) + "]"; 194 } 195 196 private void characterTest(String s, int ch, CanonicalIterator it) 197 { 198 int mixedCounter = 0; 199 int lastMixedCounter = -1; 200 boolean gotDecomp = false; 201 boolean gotComp = false; 202 boolean gotSource = false; 203 String decomp = Normalizer.decompose(s, false); 204 String comp = Normalizer.compose(s, false); 205 206 // skip characters that don't have either decomp. 207 // need quick test for this! 208 if (s.equals(decomp) && s.equals(comp)) return; 209 210 it.setSource(s); 211 212 while (true) { 213 String item = it.next(); 214 if (item == null) break; 215 if (item.equals(s)) gotSource = true; 216 if (item.equals(decomp)) gotDecomp = true; 217 if (item.equals(comp)) gotComp = true; 218 if ((mixedCounter & 0x7F) == 0 && (ch < 0xAD00 || ch > 0xAC00 + 11172)) { 219 if (lastMixedCounter != mixedCounter) { 220 logln(""); 221 lastMixedCounter = mixedCounter; 222 } 223 logln("\t" + mixedCounter + "\t" + hex(item) 224 + (item.equals(s) ? "\t(*original*)" : "") 225 + (item.equals(decomp) ? "\t(*decomp*)" : "") 226 + (item.equals(comp) ? "\t(*comp*)" : "") 227 ); 228 } 229 230 } 231 232 // check that zeros optimization doesn't mess up. 233 /* 234 if (true) { 235 it.reset(); 236 itSet.clear(); 237 while (true) { 238 String item = it.next(); 239 if (item == null) break; 240 itSet.add(item); 241 } 242 slowIt.setSource(s); 243 slowItSet.clear(); 244 while (true) { 245 String item = slowIt.next(); 246 if (item == null) break; 247 slowItSet.add(item); 248 } 249 if (!itSet.equals(slowItSet)) { 250 errln("Zero optimization failure with " + getReadable(s)); 251 } 252 } 253 */ 254 255 mixedCounter++; 256 if (!gotSource || !gotDecomp || !gotComp) { 257 errln("FAIL CanonicalIterator: " + s + " decomp: " +decomp+" comp: "+comp); 258 it.reset(); 259 for(String item=it.next();item!=null;item=it.next()){ 260 err(item + " "); 261 } 262 errln(""); 263 } 264 } 265 266 static String collectionToString(Collection col) { 267 StringBuffer result = new StringBuffer(); 268 Iterator it = col.iterator(); 269 while (it.hasNext()) { 270 if (result.length() != 0) result.append(", "); 271 result.append(it.next().toString()); 272 } 273 return result.toString(); 274 } 275 }