1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /******************************************************************** 4 * Copyright (c) 2002-2014, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ********************************************************************/ 7 8 /** 9 * UCAConformanceTest performs conformance tests defined in the data 10 * files. ICU ships with stub data files, as the whole test are too 11 * long. To do the whole test, download the test files. 12 */ 13 14 package com.ibm.icu.dev.test.collator; 15 16 import java.io.BufferedReader; 17 import java.io.IOException; 18 19 import org.junit.Before; 20 import org.junit.Test; 21 import org.junit.runner.RunWith; 22 import org.junit.runners.JUnit4; 23 24 import com.ibm.icu.dev.test.TestFmwk; 25 import com.ibm.icu.dev.test.TestUtil; 26 import com.ibm.icu.lang.UCharacter; 27 import com.ibm.icu.text.Collator; 28 import com.ibm.icu.text.RawCollationKey; 29 import com.ibm.icu.text.RuleBasedCollator; 30 import com.ibm.icu.text.UTF16; 31 import com.ibm.icu.util.ULocale; 32 import com.ibm.icu.util.VersionInfo; 33 34 @RunWith(JUnit4.class) 35 public class UCAConformanceTest extends TestFmwk { 36 37 public UCAConformanceTest() { 38 } 39 40 @Before 41 public void init() throws Exception { 42 UCA = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 43 comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT); 44 } 45 46 private RuleBasedCollator UCA; 47 private RuleBasedCollator rbUCA; 48 private UTF16.StringComparator comparer; 49 private boolean isAtLeastUCA62 = UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0; 50 51 @Test 52 public void TestTableNonIgnorable() { 53 setCollNonIgnorable(UCA); 54 openTestFile("NON_IGNORABLE"); 55 conformanceTest(UCA); 56 } 57 58 @Test 59 public void TestTableShifted() { 60 setCollShifted(UCA); 61 openTestFile("SHIFTED"); 62 conformanceTest(UCA); 63 } 64 65 @Test 66 public void TestRulesNonIgnorable() { 67 if (logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { 68 return; 69 } 70 initRbUCA(); 71 if (rbUCA == null) { 72 return; 73 } 74 75 setCollNonIgnorable(rbUCA); 76 openTestFile("NON_IGNORABLE"); 77 conformanceTest(rbUCA); 78 } 79 80 @Test 81 public void TestRulesShifted() { 82 logln("This test is currently disabled, as it is impossible to " 83 + "wholly represent fractional UCA using tailoring rules."); 84 return; 85 /* 86 * initRbUCA(); if(rbUCA == null) { return; } 87 * 88 * setCollShifted(rbUCA); openTestFile("SHIFTED"); testConformance(rbUCA); 89 */ 90 } 91 92 BufferedReader in; 93 94 private void openTestFile(String type) { 95 String collationTest = "CollationTest_"; 96 String ext = ".txt"; 97 try { 98 in = TestUtil.getDataReader(collationTest + type + "_SHORT" + ext); 99 } catch (Exception e) { 100 try { 101 in = TestUtil.getDataReader(collationTest + type + ext); 102 } catch (Exception e1) { 103 try { 104 in = TestUtil.getDataReader(collationTest + type + "_STUB" + ext); 105 logln("INFO: Working with the stub file.\n" + "If you need the full conformance test, please\n" 106 + "download the appropriate data files from:\n" 107 + "http://unicode.org/cldr/trac/browser/trunk/common/uca"); 108 } catch (Exception e11) { 109 errln("ERROR: Could not find any of the test files"); 110 } 111 } 112 } 113 } 114 115 private void setCollNonIgnorable(RuleBasedCollator coll) { 116 if (coll != null) { 117 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 118 coll.setLowerCaseFirst(false); 119 coll.setCaseLevel(false); 120 coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY); 121 coll.setAlternateHandlingShifted(false); 122 } 123 } 124 125 private void setCollShifted(RuleBasedCollator coll) { 126 if (coll != null) { 127 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 128 coll.setLowerCaseFirst(false); 129 coll.setCaseLevel(false); 130 coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY); 131 coll.setAlternateHandlingShifted(true); 132 } 133 } 134 135 private void initRbUCA() { 136 if (rbUCA == null) { 137 String ucarules = UCA.getRules(true); 138 try { 139 rbUCA = new RuleBasedCollator(ucarules); 140 } catch (Exception e) { 141 errln("Failure creating UCA rule-based collator: " + e); 142 } 143 } 144 } 145 146 private String parseString(String line) { 147 int i = 0, value; 148 StringBuilder result = new StringBuilder(), buffer = new StringBuilder(); 149 150 for (;;) { 151 while (i < line.length() && Character.isWhitespace(line.charAt(i))) { 152 i++; 153 } 154 while (i < line.length() && Character.isLetterOrDigit(line.charAt(i))) { 155 buffer.append(line.charAt(i)); 156 i++; 157 } 158 if (buffer.length() == 0) { 159 // We hit something that was not whitespace/letter/digit. 160 // Should be ';' or end of string. 161 return result.toString(); 162 } 163 /* read one code point */ 164 value = Integer.parseInt(buffer.toString(), 16); 165 buffer.setLength(0); 166 result.appendCodePoint(value); 167 } 168 169 } 170 171 private static final int IS_SHIFTED = 1; 172 private static final int FROM_RULES = 2; 173 174 private static boolean skipLineBecauseOfBug(String s, int flags) { 175 // Add temporary exceptions here if there are ICU bugs, until we can fix them. 176 // For examples see the ICU 52 version of this file. 177 return false; 178 } 179 180 private static int normalizeResult(int result) { 181 return result < 0 ? -1 : result == 0 ? 0 : 1; 182 } 183 184 private void conformanceTest(RuleBasedCollator coll) { 185 if (in == null || coll == null) { 186 return; 187 } 188 int skipFlags = 0; 189 if (coll.isAlternateHandlingShifted()) { 190 skipFlags |= IS_SHIFTED; 191 } 192 if (coll == rbUCA) { 193 skipFlags |= FROM_RULES; 194 } 195 196 logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); 197 boolean withSortKeys = getProperty("ucaconfnosortkeys") == null; 198 199 int lineNo = 0; 200 201 String line = null, oldLine = null, buffer = null, oldB = null; 202 RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey(); 203 RawCollationKey oldSk = null, newSk = sk1; 204 205 try { 206 while ((line = in.readLine()) != null) { 207 lineNo++; 208 if (line.length() == 0 || line.charAt(0) == '#') { 209 continue; 210 } 211 buffer = parseString(line); 212 213 if (skipLineBecauseOfBug(buffer, skipFlags)) { 214 logln("Skipping line " + lineNo + " because of a known bug"); 215 continue; 216 } 217 218 if (withSortKeys) { 219 coll.getRawCollationKey(buffer, newSk); 220 } 221 if (oldSk != null) { 222 boolean ok = true; 223 int skres = withSortKeys ? oldSk.compareTo(newSk) : 0; 224 int cmpres = coll.compare(oldB, buffer); 225 int cmpres2 = coll.compare(buffer, oldB); 226 227 if (cmpres != -cmpres2) { 228 errln(String.format( 229 "Compare result not symmetrical on line %d: " 230 + "previous vs. current (%d) / current vs. previous (%d)", 231 lineNo, cmpres, cmpres2)); 232 ok = false; 233 } 234 235 // TODO: Compare with normalization turned off if the input passes the FCD test. 236 237 if (withSortKeys && cmpres != normalizeResult(skres)) { 238 errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres 239 + ") on line " + lineNo); 240 ok = false; 241 } 242 243 int res = cmpres; 244 if (res == 0 && !isAtLeastUCA62) { 245 // Up to UCA 6.1, the collation test files use a custom tie-breaker, 246 // comparing the raw input strings. 247 res = comparer.compare(oldB, buffer); 248 // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, 249 // comparing the NFD versions of the input strings, 250 // which we do via setting strength=identical. 251 } 252 if (res > 0) { 253 errln("Line " + lineNo + " is not greater or equal than previous line"); 254 ok = false; 255 } 256 257 if (!ok) { 258 errln(" Previous data line " + oldLine); 259 errln(" Current data line " + line); 260 if (withSortKeys) { 261 errln(" Previous key: " + CollationTest.prettify(oldSk)); 262 errln(" Current key: " + CollationTest.prettify(newSk)); 263 } 264 } 265 } 266 267 oldSk = newSk; 268 oldB = buffer; 269 oldLine = line; 270 if (oldSk == sk1) { 271 newSk = sk2; 272 } else { 273 newSk = sk1; 274 } 275 } 276 } catch (Exception e) { 277 errln("Unexpected exception " + e); 278 } finally { 279 try { 280 in.close(); 281 } catch (IOException ignored) { 282 } 283 in = null; 284 } 285 } 286 } 287