1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 2002-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9 /** 10 * UCAConformanceTest performs conformance tests defined in the data 11 * files. ICU ships with stub data files, as the whole test are too 12 * long. To do the whole test, download the test files. 13 */ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "ucaconf.h" 20 #include "unicode/sortkey.h" 21 #include "unicode/tblcoll.h" 22 #include "unicode/ustring.h" 23 #include "cmemory.h" 24 #include "cstring.h" 25 #include "uparse.h" 26 27 UCAConformanceTest::UCAConformanceTest() : 28 rbUCA(NULL), 29 testFile(NULL), 30 status(U_ZERO_ERROR) 31 { 32 UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status); 33 if(U_FAILURE(status)) { 34 dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status)); 35 } 36 37 const char *srcDir = IntlTest::getSourceTestData(status); 38 if (U_FAILURE(status)) { 39 dataerrln("Could not open test data %s", u_errorName(status)); 40 return; 41 } 42 uprv_strcpy(testDataPath, srcDir); 43 uprv_strcat(testDataPath, "CollationTest_"); 44 45 UVersionInfo uniVersion; 46 static const UVersionInfo v62 = { 6, 2, 0, 0 }; 47 u_getUnicodeVersion(uniVersion); 48 isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0; 49 } 50 51 UCAConformanceTest::~UCAConformanceTest() 52 { 53 delete UCA; 54 delete rbUCA; 55 if (testFile) { 56 fclose(testFile); 57 } 58 } 59 60 void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) 61 { 62 if(exec) { 63 logln("TestSuite UCAConformanceTest: "); 64 } 65 TESTCASE_AUTO_BEGIN; 66 TESTCASE_AUTO(TestTableNonIgnorable); 67 TESTCASE_AUTO(TestTableShifted); 68 TESTCASE_AUTO(TestRulesNonIgnorable); 69 TESTCASE_AUTO(TestRulesShifted); 70 TESTCASE_AUTO_END; 71 } 72 73 void UCAConformanceTest::initRbUCA() 74 { 75 if(!rbUCA) { 76 if (UCA) { 77 UnicodeString ucarules; 78 UCA->getRules(UCOL_FULL_RULES, ucarules); 79 rbUCA = new RuleBasedCollator(ucarules, status); 80 if (U_FAILURE(status)) { 81 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); 82 return; 83 } 84 } else { 85 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); 86 return; 87 } 88 } 89 } 90 91 void UCAConformanceTest::setCollNonIgnorable(Collator *coll) 92 { 93 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 94 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); 95 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); 96 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status); 97 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status); 98 } 99 100 void UCAConformanceTest::setCollShifted(Collator *coll) 101 { 102 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 103 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); 104 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); 105 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status); 106 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 107 } 108 109 void UCAConformanceTest::openTestFile(const char *type) 110 { 111 const char *ext = ".txt"; 112 if(testFile) { 113 fclose(testFile); 114 } 115 char buffer[1024]; 116 uprv_strcpy(buffer, testDataPath); 117 uprv_strcat(buffer, type); 118 int32_t bufLen = (int32_t)uprv_strlen(buffer); 119 120 // we try to open 3 files: 121 // path/CollationTest_type.txt 122 // path/CollationTest_type_SHORT.txt 123 // path/CollationTest_type_STUB.txt 124 // we are going to test with the first one that we manage to open. 125 126 uprv_strcpy(buffer+bufLen, ext); 127 128 testFile = fopen(buffer, "rb"); 129 130 if(testFile == 0) { 131 uprv_strcpy(buffer+bufLen, "_SHORT"); 132 uprv_strcat(buffer, ext); 133 testFile = fopen(buffer, "rb"); 134 135 if(testFile == 0) { 136 uprv_strcpy(buffer+bufLen, "_STUB"); 137 uprv_strcat(buffer, ext); 138 testFile = fopen(buffer, "rb"); 139 140 if (testFile == 0) { 141 *(buffer+bufLen) = 0; 142 dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer); 143 return; 144 } else { 145 infoln( 146 "INFO: Working with the stub file.\n" 147 "If you need the full conformance test, please\n" 148 "download the appropriate data files from:\n" 149 "http://unicode.org/cldr/trac/browser/trunk/common/uca"); 150 } 151 } 152 } 153 } 154 155 static const uint32_t IS_SHIFTED = 1; 156 static const uint32_t FROM_RULES = 2; 157 158 static UBool 159 skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) { 160 // Add temporary exceptions here if there are ICU bugs, until we can fix them. 161 // For examples see the ICU 52 version of this file. 162 (void)s; 163 (void)length; 164 (void)flags; 165 return FALSE; 166 } 167 168 static UCollationResult 169 normalizeResult(int32_t result) { 170 return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER; 171 } 172 173 void UCAConformanceTest::testConformance(const Collator *coll) 174 { 175 if(testFile == 0) { 176 return; 177 } 178 uint32_t skipFlags = 0; 179 if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) { 180 skipFlags |= IS_SHIFTED; 181 } 182 if(coll == rbUCA) { 183 skipFlags |= FROM_RULES; 184 } 185 186 logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); 187 UBool withSortKeys = getProperty("ucaconfnosortkeys") == NULL; 188 189 int32_t line = 0; 190 191 UChar b1[1024], b2[1024]; 192 UChar *buffer = b1, *oldB = NULL; 193 194 char lineB1[1024], lineB2[1024]; 195 char *lineB = lineB1, *oldLineB = lineB2; 196 197 uint8_t sk1[1024], sk2[1024]; 198 uint8_t *oldSk = NULL, *newSk = sk1; 199 200 int32_t oldLen = 0; 201 int32_t oldBlen = 0; 202 uint32_t first = 0; 203 204 while (fgets(lineB, 1024, testFile) != NULL) { 205 // remove trailing whitespace 206 u_rtrim(lineB); 207 208 line++; 209 if(*lineB == 0 || lineB[0] == '#') { 210 continue; 211 } 212 int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status); 213 if(U_FAILURE(status)) { 214 errln("Error parsing line %ld (%s): %s\n", 215 (long)line, u_errorName(status), lineB); 216 status = U_ZERO_ERROR; 217 } 218 buffer[buflen] = 0; 219 220 if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) { 221 logln("Skipping line %i because of a known bug", line); 222 continue; 223 } 224 225 int32_t resLen = withSortKeys ? coll->getSortKey(buffer, buflen, newSk, 1024) : 0; 226 227 if(oldSk != NULL) { 228 UBool ok=TRUE; 229 int32_t skres = withSortKeys ? strcmp((char *)oldSk, (char *)newSk) : 0; 230 int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status); 231 int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status); 232 233 if(cmpres != -cmpres2) { 234 errln("Compare result not symmetrical on line %i: " 235 "previous vs. current (%d) / current vs. previous (%d)", 236 line, cmpres, cmpres2); 237 ok = FALSE; 238 } 239 240 // TODO: Compare with normalization turned off if the input passes the FCD test. 241 242 if(withSortKeys && cmpres != normalizeResult(skres)) { 243 errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i", 244 cmpres, skres, line); 245 ok = FALSE; 246 } 247 248 int32_t res = cmpres; 249 if(res == 0 && !isAtLeastUCA62) { 250 // Up to UCA 6.1, the collation test files use a custom tie-breaker, 251 // comparing the raw input strings. 252 res = u_strcmpCodePointOrder(oldB, buffer); 253 // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, 254 // comparing the NFD versions of the input strings, 255 // which we do via setting strength=identical. 256 } 257 if(res > 0) { 258 errln("Line %i is not greater or equal than previous line", line); 259 ok = FALSE; 260 } 261 262 if(!ok) { 263 errln(" Previous data line %s", oldLineB); 264 errln(" Current data line %s", lineB); 265 if(withSortKeys) { 266 UnicodeString oldS, newS; 267 prettify(CollationKey(oldSk, oldLen), oldS); 268 prettify(CollationKey(newSk, resLen), newS); 269 errln(" Previous key: "+oldS); 270 errln(" Current key: "+newS); 271 } 272 } 273 } 274 275 // swap buffers 276 oldLineB = lineB; 277 oldB = buffer; 278 oldSk = newSk; 279 if(lineB == lineB1) { 280 lineB = lineB2; 281 buffer = b2; 282 newSk = sk2; 283 } else { 284 lineB = lineB1; 285 buffer = b1; 286 newSk = sk1; 287 } 288 oldLen = resLen; 289 oldBlen = buflen; 290 } 291 } 292 293 void UCAConformanceTest::TestTableNonIgnorable(/* par */) { 294 if (U_FAILURE(status)) { 295 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); 296 return; 297 } 298 setCollNonIgnorable(UCA); 299 openTestFile("NON_IGNORABLE"); 300 testConformance(UCA); 301 } 302 303 void UCAConformanceTest::TestTableShifted(/* par */) { 304 if (U_FAILURE(status)) { 305 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); 306 return; 307 } 308 setCollShifted(UCA); 309 openTestFile("SHIFTED"); 310 testConformance(UCA); 311 } 312 313 void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { 314 if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; } 315 initRbUCA(); 316 317 if(U_SUCCESS(status)) { 318 setCollNonIgnorable(rbUCA); 319 openTestFile("NON_IGNORABLE"); 320 testConformance(rbUCA); 321 } 322 } 323 324 void UCAConformanceTest::TestRulesShifted(/* par */) { 325 logln("This test is currently disabled, as it is impossible to " 326 "wholly represent fractional UCA using tailoring rules."); 327 return; 328 329 initRbUCA(); 330 331 if(U_SUCCESS(status)) { 332 setCollShifted(rbUCA); 333 openTestFile("SHIFTED"); 334 testConformance(rbUCA); 335 } 336 } 337 338 #endif /* #if !UCONFIG_NO_COLLATION */ 339