1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.dev.test.normalizer; 12 13 import java.io.BufferedReader; 14 import java.io.IOException; 15 16 import org.junit.Ignore; 17 import org.junit.Test; 18 import org.junit.runner.RunWith; 19 import org.junit.runners.JUnit4; 20 21 import android.icu.dev.test.TestFmwk; 22 import android.icu.dev.test.TestUtil; 23 import android.icu.text.UTF16; 24 import android.icu.text.UnicodeSet; 25 import android.icu.testsharding.MainTestShard; 26 27 28 @MainTestShard 29 @RunWith(JUnit4.class) 30 public class UnicodeNormalizerConformanceTest extends TestFmwk { 31 32 UnicodeNormalizer normalizer_C, normalizer_D, normalizer_KC, normalizer_KD; 33 34 public UnicodeNormalizerConformanceTest() { 35 // Doesn't matter what the string and mode are; we'll change 36 // them later as needed. 37 normalizer_C = new UnicodeNormalizer(UnicodeNormalizer.C, true); 38 normalizer_D = new UnicodeNormalizer(UnicodeNormalizer.D, false); 39 normalizer_KC = new UnicodeNormalizer(UnicodeNormalizer.KC, false); 40 normalizer_KD = new UnicodeNormalizer(UnicodeNormalizer.KD, false); 41 42 } 43 // more interesting conformance test cases, not in the unicode.org NormalizationTest.txt 44 static String[] moreCases ={ 45 // Markus 2001aug30 46 "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0", 47 48 // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129 49 "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1" 50 }; 51 52 /** 53 * Test the conformance of NewNormalizer to 54 * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt. 55 * This file must be located at the path specified as TEST_SUITE_FILE. 56 */ 57 @Test 58 public void TestConformance() throws Exception{ 59 String line = null; 60 String[] fields = new String[5]; 61 StringBuffer buf = new StringBuffer(); 62 int passCount = 0; 63 int failCount = 0; 64 UnicodeSet other = new UnicodeSet(0, 0x10ffff); 65 int c=0; 66 BufferedReader input = null; 67 try { 68 input = TestUtil.getDataReader("unicode/NormalizationTest.txt"); 69 for (int count = 0;;++count) { 70 line = input.readLine(); 71 if (line == null) { 72 //read the extra test cases 73 if(count > moreCases.length) { 74 count = 0; 75 } else if(count == moreCases.length) { 76 // all done 77 break; 78 } 79 line = moreCases[count++]; 80 } 81 if (line.length() == 0) continue; 82 83 // Expect 5 columns of this format: 84 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments> 85 86 // Skip comments 87 if (line.charAt(0) == '#' || line.charAt(0)=='@') continue; 88 89 // Parse out the fields 90 hexsplit(line, ';', fields, buf); 91 92 // Remove a single code point from the "other" UnicodeSet 93 if(fields[0].length()==UTF16.moveCodePointOffset(fields[0],0, 1)) { 94 c=UTF16.charAt(fields[0],0); 95 if(0xac20<=c && c<=0xd73f) { 96 // not an exhaustive test run: skip most Hangul syllables 97 if(c==0xac20) { 98 other.remove(0xac20, 0xd73f); 99 } 100 continue; 101 } 102 other.remove(c); 103 } 104 if (checkConformance(fields, line)) { 105 ++passCount; 106 } else { 107 ++failCount; 108 } 109 if ((count % 1000) == 999) { 110 logln("Line " + (count+1)); 111 } 112 } 113 } catch (IOException ex) { 114 ex.printStackTrace(); 115 throw new IllegalArgumentException("Couldn't read file " 116 + ex.getClass().getName() + " " + ex.getMessage() 117 + " line = " + line 118 ); 119 } finally { 120 if (input != null) { 121 try { 122 input.close(); 123 } catch (Exception ignored) { 124 } 125 } 126 } 127 128 if (failCount != 0) { 129 errln("Total: " + failCount + " lines failed, " + 130 passCount + " lines passed"); 131 } else { 132 logln("Total: " + passCount + " lines passed"); 133 } 134 } 135 136 /** 137 * Verify the conformance of the given line of the Unicode 138 * normalization (UTR 15) test suite file. For each line, 139 * there are five columns, corresponding to field[0]..field[4]. 140 * 141 * The following invariants must be true for all conformant implementations 142 * c2 == NFC(c1) == NFC(c2) == NFC(c3) 143 * c3 == NFD(c1) == NFD(c2) == NFD(c3) 144 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 145 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 146 * 147 * @param field the 5 columns 148 * @param line the source line from the test suite file 149 * @return true if the test passes 150 */ 151 private boolean checkConformance(String[] field, String line) throws Exception{ 152 boolean pass = true; 153 // StringBuffer buf = new StringBuffer(); // scratch 154 String out; 155 int i=0; 156 for (i=0; i<5; ++i) { 157 if (i<3) { 158 out = normalizer_C.normalize(field[i]); 159 pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c" + (i+1)); 160 161 out = normalizer_D.normalize(field[i]); 162 pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c" + (i+1)); 163 164 } 165 out = normalizer_KC.normalize(field[i]); 166 pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c" + (i+1)); 167 168 out = normalizer_KD.normalize(field[i]); 169 pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c" + (i+1)); 170 171 } 172 173 if (!pass) { 174 errln("FAIL: " + line); 175 } 176 177 return pass; 178 } 179 180 /** 181 * @param op name of normalization form, e.g., "KC" 182 * @param s string being normalized 183 * @param got value received 184 * @param exp expected value 185 * @param msg description of this test 186 * @returns true if got == exp 187 */ 188 private boolean assertEqual(String op, String s, String got, 189 String exp, String msg) { 190 if (exp.equals(got)) { 191 return true; 192 } 193 errln((" " + msg + ") " + op + "(" + s + ")=" + hex(got) + 194 ", exp. " + hex(exp))); 195 return false; 196 } 197 198 /** 199 * Split a string into pieces based on the given delimiter 200 * character. Then, parse the resultant fields from hex into 201 * characters. That is, "0040 0400;0C00;0899" -> new String[] { 202 * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to 203 * be of the proper length already, and exactly output.length 204 * fields are parsed. If there are too few an exception is 205 * thrown. If there are too many the extras are ignored. 206 * 207 * @param buf scratch buffer 208 */ 209 private static void hexsplit(String s, char delimiter, 210 String[] output, StringBuffer buf) { 211 int i; 212 int pos = 0; 213 for (i=0; i<output.length; ++i) { 214 int delim = s.indexOf(delimiter, pos); 215 if (delim < 0) { 216 throw new IllegalArgumentException("Missing field in " + s); 217 } 218 // Our field is from pos..delim-1. 219 buf.setLength(0); 220 221 String toHex = s.substring(pos,delim); 222 pos = delim; 223 int index = 0; 224 int len = toHex.length(); 225 while(index< len){ 226 if(toHex.charAt(index)==' '){ 227 index++; 228 }else{ 229 int spacePos = toHex.indexOf(' ', index); 230 if(spacePos==-1){ 231 appendInt(buf,toHex.substring(index,len),s); 232 spacePos = len; 233 }else{ 234 appendInt(buf,toHex.substring(index, spacePos),s); 235 } 236 index = spacePos+1; 237 } 238 } 239 240 if (buf.length() < 1) { 241 throw new IllegalArgumentException("Empty field " + i + " in " + s); 242 } 243 output[i] = buf.toString(); 244 ++pos; // Skip over delim 245 } 246 } 247 public static void appendInt(StringBuffer buf, String strToHex, String s){ 248 int hex = Integer.parseInt(strToHex,16); 249 if (hex < 0 ) { 250 throw new IllegalArgumentException("Out of range hex " + 251 hex + " in " + s); 252 }else if (hex > 0xFFFF){ 253 buf.append((char)((hex>>10)+0xd7c0)); 254 buf.append((char)((hex&0x3ff)|0xdc00)); 255 }else{ 256 buf.append((char) hex); 257 } 258 } 259 260 // Specific tests for debugging. These are generally failures 261 // taken from the conformance file, but culled out to make 262 // debugging easier. These can be eliminated without affecting 263 // coverage. 264 @Ignore 265 @Test 266 public void _hideTestCase6() throws Exception{ 267 _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;"); 268 } 269 270 private void _testOneLine(String line) throws Exception{ 271 String[] fields = new String[5]; 272 StringBuffer buf = new StringBuffer(); 273 // Parse out the fields 274 hexsplit(line, ';', fields, buf); 275 checkConformance(fields, line); 276 } 277 278 279 } 280