1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.dev.test.normalizer; 12 13 import java.io.BufferedReader; 14 import java.io.IOException; 15 import java.text.StringCharacterIterator; 16 17 import org.junit.Ignore; 18 import org.junit.Test; 19 20 import android.icu.dev.test.TestFmwk; 21 import android.icu.dev.test.TestUtil; 22 import android.icu.impl.Utility; 23 import android.icu.text.Normalizer; 24 import android.icu.text.UTF16; 25 import android.icu.text.UnicodeSet; 26 27 public class ConformanceTest extends TestFmwk { 28 29 Normalizer normalizer; 30 31 public ConformanceTest() { 32 // Doesn't matter what the string and mode are; we'll change 33 // them later as needed. 34 normalizer = new Normalizer("", Normalizer.NFC, 0); 35 } 36 // more interesting conformance test cases, not in the unicode.org NormalizationTest.txt 37 static String[] moreCases ={ 38 // Markus 2001aug30 39 "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0", 40 41 // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129 42 "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1" 43 }; 44 45 /** 46 * Test the conformance of Normalizer to 47 * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.* http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt 48 * This file must be located at the path specified as TEST_SUITE_FILE. 49 */ 50 @Test 51 public void TestConformance() throws Exception{ 52 runConformance("unicode/NormalizationTest.txt",0); 53 } 54 @Test 55 public void TestConformance_3_2() throws Exception{ 56 runConformance("unicode/NormalizationTest-3.2.0.txt",Normalizer.UNICODE_3_2); 57 } 58 59 public void runConformance(String fileName, int options) throws Exception{ 60 String line = null; 61 String[] fields = new String[5]; 62 StringBuffer buf = new StringBuffer(); 63 int passCount = 0; 64 int failCount = 0; 65 UnicodeSet other = new UnicodeSet(0, 0x10ffff); 66 int c=0; 67 BufferedReader input = null; 68 try { 69 input = TestUtil.getDataReader(fileName); 70 for (int count = 0;;++count) { 71 line = input.readLine(); 72 if (line == null) { 73 //read the extra test cases 74 if(count > moreCases.length) { 75 count = 0; 76 } else if(count == moreCases.length) { 77 // all done 78 break; 79 } 80 line = moreCases[count++]; 81 } 82 if (line.length() == 0) continue; 83 84 // Expect 5 columns of this format: 85 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments> 86 87 // Skip comments 88 if (line.charAt(0) == '#' || line.charAt(0)=='@') continue; 89 90 // Parse out the fields 91 hexsplit(line, ';', fields, buf); 92 93 // Remove a single code point from the "other" UnicodeSet 94 if(fields[0].length()==UTF16.moveCodePointOffset(fields[0],0, 1)) { 95 c=UTF16.charAt(fields[0],0); 96 if(0xac20<=c && c<=0xd73f) { 97 // not an exhaustive test run: skip most Hangul syllables 98 if(c==0xac20) { 99 other.remove(0xac20, 0xd73f); 100 } 101 continue; 102 } 103 other.remove(c); 104 } 105 if (checkConformance(fields, line,options)) { 106 ++passCount; 107 } else { 108 ++failCount; 109 } 110 if ((count % 1000) == 999) { 111 logln("Line " + (count+1)); 112 } 113 } 114 } catch (IOException ex) { 115 ex.printStackTrace(); 116 throw new IllegalArgumentException("Couldn't read file " 117 + ex.getClass().getName() + " " + ex.getMessage() 118 + " line = " + line 119 ); 120 } finally { 121 if (input != null) { 122 try { 123 input.close(); 124 } catch (IOException ignored) { 125 } 126 } 127 } 128 129 if (failCount != 0) { 130 errln("Total: " + failCount + " lines failed, " + 131 passCount + " lines passed"); 132 } else { 133 logln("Total: " + passCount + " lines passed"); 134 } 135 } 136 137 /** 138 * Verify the conformance of the given line of the Unicode 139 * normalization (UTR 15) test suite file. For each line, 140 * there are five columns, corresponding to field[0]..field[4]. 141 * 142 * The following invariants must be true for all conformant implementations 143 * c2 == NFC(c1) == NFC(c2) == NFC(c3) 144 * c3 == NFD(c1) == NFD(c2) == NFD(c3) 145 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 146 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 147 * 148 * @param field the 5 columns 149 * @param line the source line from the test suite file 150 * @return true if the test passes 151 */ 152 private boolean checkConformance(String[] field, String line, int options) throws Exception{ 153 boolean pass = true; 154 StringBuffer buf = new StringBuffer(); // scratch 155 String out,fcd; 156 int i=0; 157 for (i=0; i<5; ++i) { 158 if (i<3) { 159 out = Normalizer.normalize(field[i], Normalizer.NFC, options); 160 pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c" + (i+1)); 161 162 out = iterativeNorm(field[i], Normalizer.NFC, buf, +1,options); 163 pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 164 165 out = iterativeNorm(field[i], Normalizer.NFC, buf, -1,options); 166 pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 167 168 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFC, buf, +1,options); 169 pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 170 171 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFC, buf, -1,options); 172 pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c" + (i+1)); 173 174 out = Normalizer.normalize(field[i], Normalizer.NFD); 175 pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c" + (i+1)); 176 177 out = iterativeNorm(field[i], Normalizer.NFD, buf, +1,options); 178 pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 179 180 out = iterativeNorm(field[i], Normalizer.NFD, buf, -1,options); 181 pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 182 183 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFD, buf, +1,options); 184 pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 185 186 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFD, buf, -1,options); 187 pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c" + (i+1)); 188 189 cross(field[2] /*NFD String*/, field[1]/*NFC String*/, Normalizer.NFC); 190 cross(field[1] /*NFC String*/, field[2]/*NFD String*/, Normalizer.NFD); 191 } 192 out = Normalizer.normalize(field[i], Normalizer.NFKC,options); 193 pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c" + (i+1)); 194 195 out = iterativeNorm(field[i], Normalizer.NFKC, buf, +1,options); 196 pass &= assertEqual("KD(+1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 197 198 out = iterativeNorm(field[i], Normalizer.NFKC, buf, -1,options); 199 pass &= assertEqual("KD(-1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 200 201 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKC, buf, +1,options); 202 pass &= assertEqual("KD(+1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 203 204 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKC, buf, -1,options); 205 pass &= assertEqual("KD(-1)", field[i], out, field[3], "c4!=KC(c" + (i+1)); 206 207 208 out = Normalizer.normalize(field[i], Normalizer.NFKD,options); 209 pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c" + (i+1)); 210 211 out = iterativeNorm(field[i], Normalizer.NFKD, buf, +1,options); 212 pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 213 214 out = iterativeNorm(field[i], Normalizer.NFKD, buf, -1,options); 215 pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 216 217 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKD, buf, +1,options); 218 pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 219 220 out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKD, buf, -1,options); 221 pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c" + (i+1)); 222 223 cross(field[4] /*NFKD String*/, field[3]/*NFKC String*/, Normalizer.NFKC); 224 cross(field[3] /*NFKC String*/, field[4]/*NFKD String*/, Normalizer.NFKD); 225 226 } 227 compare(field[1],field[2]); 228 compare(field[0],field[1]); 229 compare(field[0],field[2]); 230 // test quick checks 231 if(Normalizer.NO == Normalizer.quickCheck(field[1], Normalizer.NFC,options)) { 232 errln("Normalizer error: quickCheck(NFC(s), Normalizer.NFC) is Normalizer.NO"); 233 pass = false; 234 } 235 if(Normalizer.NO == Normalizer.quickCheck(field[2], Normalizer.NFD,options)) { 236 errln("Normalizer error: quickCheck(NFD(s), Normalizer.NFD) is Normalizer.NO"); 237 pass = false; 238 } 239 if(Normalizer.NO == Normalizer.quickCheck(field[3], Normalizer.NFKC,options)) { 240 errln("Normalizer error: quickCheck(NFKC(s), Normalizer.NFKC) is Normalizer.NO"); 241 pass = false; 242 } 243 if(Normalizer.NO == Normalizer.quickCheck(field[4], Normalizer.NFKD,options)) { 244 errln("Normalizer error: quickCheck(NFKD(s), Normalizer.NFKD) is Normalizer.NO"); 245 pass = false; 246 } 247 248 if(!Normalizer.isNormalized(field[1], Normalizer.NFC, options)) { 249 errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false"); 250 pass = false; 251 } 252 if(!field[0].equals(field[1]) && Normalizer.isNormalized(field[0], Normalizer.NFC, options)) { 253 errln("Normalizer error: isNormalized(s, Normalizer.NFC) is TRUE"); 254 pass = false; 255 } 256 if(!Normalizer.isNormalized(field[3], Normalizer.NFKC, options)) { 257 errln("Normalizer error: isNormalized(NFKC(s), Normalizer.NFKC) is false"); 258 pass = false; 259 } 260 if(!field[0].equals(field[3]) && Normalizer.isNormalized(field[0], Normalizer.NFKC, options)) { 261 errln("Normalizer error: isNormalized(s, Normalizer.NFKC) is TRUE"); 262 pass = false; 263 } 264 // test api that takes a char[] 265 if(!Normalizer.isNormalized(field[1].toCharArray(),0,field[1].length(), Normalizer.NFC,options)) { 266 errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false"); 267 pass = false; 268 } 269 // test api that takes a codepoint 270 if(!Normalizer.isNormalized(UTF16.charAt(field[1],0), Normalizer.NFC,options)) { 271 errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false"); 272 pass = false; 273 } 274 // test FCD quick check and "makeFCD" 275 fcd=Normalizer.normalize(field[0], Normalizer.FCD); 276 if(Normalizer.NO == Normalizer.quickCheck(fcd, Normalizer.FCD,options)) { 277 errln("Normalizer error: quickCheck(FCD(s), Normalizer.FCD) is Normalizer.NO"); 278 pass = false; 279 } 280 // check FCD return length 281 { 282 char[] fcd2 = new char[ fcd.length() * 2 ]; 283 char[] src = field[0].toCharArray(); 284 int fcdLen = Normalizer.normalize(src, 0, src.length, fcd2, fcd.length(), fcd2.length,Normalizer.FCD, 0); 285 if(fcdLen != fcd.length()){ 286 errln("makeFCD did not return the correct length"); 287 } 288 } 289 if(Normalizer.NO == Normalizer.quickCheck(fcd, Normalizer.FCD, options)) { 290 errln("Normalizer error: quickCheck(FCD(s), Normalizer.FCD) is Normalizer.NO"); 291 pass = false; 292 } 293 if(Normalizer.NO == Normalizer.quickCheck(field[2], Normalizer.FCD, options)) { 294 errln("Normalizer error: quickCheck(NFD(s), Normalizer.FCD) is Normalizer.NO"); 295 pass = false; 296 } 297 298 if(Normalizer.NO == Normalizer.quickCheck(field[4], Normalizer.FCD, options)) { 299 errln("Normalizer error: quickCheck(NFKD(s), Normalizer.FCD) is Normalizer.NO"); 300 pass = false; 301 } 302 303 out = iterativeNorm(new StringCharacterIterator(field[0]), Normalizer.FCD, buf, +1,options); 304 out = iterativeNorm(new StringCharacterIterator(field[0]), Normalizer.FCD, buf, -1,options); 305 306 out = iterativeNorm(new StringCharacterIterator(field[2]), Normalizer.FCD, buf, +1,options); 307 out = iterativeNorm(new StringCharacterIterator(field[2]), Normalizer.FCD, buf, -1,options); 308 309 out = iterativeNorm(new StringCharacterIterator(field[4]), Normalizer.FCD, buf, +1,options); 310 out = iterativeNorm(new StringCharacterIterator(field[4]), Normalizer.FCD, buf, -1,options); 311 312 out=Normalizer.normalize(fcd, Normalizer.NFD); 313 if(!out.equals(field[2])) { 314 errln("Normalizer error: NFD(FCD(s))!=NFD(s)"); 315 pass = false; 316 } 317 if (!pass) { 318 errln("FAIL: " + line); 319 } 320 if(field[0]!=field[2]) { 321 // two strings that are canonically equivalent must test 322 // equal under a canonical caseless match 323 // see UAX #21 Case Mappings and Jitterbug 2021 and 324 // Unicode Technical Committee meeting consensus 92-C31 325 int rc; 326 if((rc = Normalizer.compare(field[0], field[2], (options<<Normalizer.COMPARE_NORM_OPTIONS_SHIFT)|Normalizer.COMPARE_IGNORE_CASE))!=0){ 327 errln("Normalizer.compare(original, NFD, case-insensitive) returned "+rc+" instead of 0 for equal"); 328 pass=false; 329 } 330 } 331 332 return pass; 333 } 334 // two strings that are canonically equivalent must test 335 // equal under a canonical caseless match 336 // see UAX #21 Case Mappings and Jitterbug 2021 and 337 // Unicode Technical Committee meeting consensus 92-C31 338 private void compare(String s1, String s2){ 339 if(s1.length()==1 && s2.length()==1){ 340 if(Normalizer.compare(UTF16.charAt(s1,0),UTF16.charAt(s2,0),Normalizer.COMPARE_IGNORE_CASE)!=0){ 341 errln("Normalizer.compare(int,int) failed for s1: " 342 +Utility.hex(s1) + " s2: " + Utility.hex(s2)); 343 } 344 } 345 if(s1.length()==1 && s2.length()>1){ 346 if(Normalizer.compare(UTF16.charAt(s1,0),s2,Normalizer.COMPARE_IGNORE_CASE)!=0){ 347 errln("Normalizer.compare(int,String) failed for s1: " 348 +Utility.hex(s1) + " s2: " + Utility.hex(s2)); 349 } 350 } 351 if(s1.length()>1 && s2.length()>1){ 352 // TODO: Re-enable this tests after UTC fixes UAX 21 353 if(Normalizer.compare(s1.toCharArray(),s2.toCharArray(),Normalizer.COMPARE_IGNORE_CASE)!=0){ 354 errln("Normalizer.compare(char[],char[]) failed for s1: " 355 +Utility.hex(s1) + " s2: " + Utility.hex(s2)); 356 } 357 } 358 } 359 private void cross(String s1, String s2,Normalizer.Mode mode){ 360 String result = Normalizer.normalize(s1,mode); 361 if(!result.equals(s2)){ 362 errln("cross test failed s1: " + Utility.hex(s1) + " s2: " 363 +Utility.hex(s2)); 364 } 365 } 366 /** 367 * Do a normalization using the iterative API in the given direction. 368 * @param buf scratch buffer 369 * @param dir either +1 or -1 370 */ 371 private String iterativeNorm(String str, Normalizer.Mode mode, 372 StringBuffer buf, int dir ,int options) throws Exception{ 373 normalizer.setText(str); 374 normalizer.setMode(mode); 375 buf.setLength(0); 376 normalizer.setOption(-1, false); // reset all options 377 normalizer.setOption(options, true); // set desired options 378 379 int ch; 380 if (dir > 0) { 381 for (ch = normalizer.first(); ch != Normalizer.DONE; 382 ch = normalizer.next()) { 383 buf.append(UTF16.valueOf(ch)); 384 } 385 } else { 386 for (ch = normalizer.last(); ch != Normalizer.DONE; 387 ch = normalizer.previous()) { 388 buf.insert(0, UTF16.valueOf(ch)); 389 } 390 } 391 return buf.toString(); 392 } 393 394 /** 395 * Do a normalization using the iterative API in the given direction. 396 * @param str a Java StringCharacterIterator 397 * @param buf scratch buffer 398 * @param dir either +1 or -1 399 */ 400 private String iterativeNorm(StringCharacterIterator str, Normalizer.Mode mode, 401 StringBuffer buf, int dir,int options) throws Exception{ 402 normalizer.setText(str); 403 normalizer.setMode(mode); 404 buf.setLength(0); 405 normalizer.setOption(-1, false); // reset all options 406 normalizer.setOption(options, true); // set desired options 407 408 int ch; 409 if (dir > 0) { 410 for (ch = normalizer.first(); ch != Normalizer.DONE; 411 ch = normalizer.next()) { 412 buf.append(UTF16.valueOf(ch)); 413 } 414 } else { 415 for (ch = normalizer.last(); ch != Normalizer.DONE; 416 ch = normalizer.previous()) { 417 buf.insert(0, UTF16.valueOf(ch)); 418 } 419 } 420 return buf.toString(); 421 } 422 423 /** 424 * @param op name of normalization form, e.g., "KC" 425 * @param s string being normalized 426 * @param got value received 427 * @param exp expected value 428 * @param msg description of this test 429 * @returns true if got == exp 430 */ 431 private boolean assertEqual(String op, String s, String got, 432 String exp, String msg) { 433 if (exp.equals(got)) { 434 return true; 435 } 436 errln((" " + msg + ") " + op + "(" + s + ")=" + hex(got) + 437 ", exp. " + hex(exp))); 438 return false; 439 } 440 441 /** 442 * Split a string into pieces based on the given delimiter 443 * character. Then, parse the resultant fields from hex into 444 * characters. That is, "0040 0400;0C00;0899" -> new String[] { 445 * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to 446 * be of the proper length already, and exactly output.length 447 * fields are parsed. If there are too few an exception is 448 * thrown. If there are too many the extras are ignored. 449 * 450 * @param buf scratch buffer 451 */ 452 private static void hexsplit(String s, char delimiter, 453 String[] output, StringBuffer buf) { 454 int i; 455 int pos = 0; 456 for (i=0; i<output.length; ++i) { 457 int delim = s.indexOf(delimiter, pos); 458 if (delim < 0) { 459 throw new IllegalArgumentException("Missing field in " + s); 460 } 461 // Our field is from pos..delim-1. 462 buf.setLength(0); 463 464 String toHex = s.substring(pos,delim); 465 pos = delim; 466 int index = 0; 467 int len = toHex.length(); 468 while(index< len){ 469 if(toHex.charAt(index)==' '){ 470 index++; 471 }else{ 472 int spacePos = toHex.indexOf(' ', index); 473 if(spacePos==-1){ 474 appendInt(buf,toHex.substring(index,len),s); 475 spacePos = len; 476 }else{ 477 appendInt(buf,toHex.substring(index, spacePos),s); 478 } 479 index = spacePos+1; 480 } 481 } 482 483 if (buf.length() < 1) { 484 throw new IllegalArgumentException("Empty field " + i + " in " + s); 485 } 486 output[i] = buf.toString(); 487 ++pos; // Skip over delim 488 } 489 } 490 public static void appendInt(StringBuffer buf, String strToHex, String s){ 491 int hex = Integer.parseInt(strToHex,16); 492 if (hex < 0 ) { 493 throw new IllegalArgumentException("Out of range hex " + 494 hex + " in " + s); 495 }else if (hex > 0xFFFF){ 496 buf.append((char)((hex>>10)+0xd7c0)); 497 buf.append((char)((hex&0x3ff)|0xdc00)); 498 }else{ 499 buf.append((char) hex); 500 } 501 } 502 503 // Specific tests for debugging. These are generally failures 504 // taken from the conformance file, but culled out to make 505 // debugging easier. These can be eliminated without affecting 506 // coverage. 507 @Ignore 508 @Test 509 public void _hideTestCase6(/*int options*/) throws Exception{ 510 _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;", /*options*/ 0); 511 } 512 513 private void _testOneLine(String line,int options) throws Exception{ 514 String[] fields = new String[5]; 515 StringBuffer buf = new StringBuffer(); 516 // Parse out the fields 517 hexsplit(line, ';', fields, buf); 518 checkConformance(fields, line,options); 519 } 520 521 522 } 523