1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2002-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 /** 8 * UCAConformanceTest performs conformance tests defined in the data 9 * files. ICU ships with stub data files, as the whole test are too 10 * long. To do the whole test, download the test files. 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_COLLATION 16 17 #include "ucaconf.h" 18 #include "unicode/sortkey.h" 19 #include "unicode/tblcoll.h" 20 #include "unicode/ustring.h" 21 #include "cmemory.h" 22 #include "cstring.h" 23 #include "uparse.h" 24 25 UCAConformanceTest::UCAConformanceTest() : 26 rbUCA(NULL), 27 testFile(NULL), 28 status(U_ZERO_ERROR) 29 { 30 UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status); 31 if(U_FAILURE(status)) { 32 dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status)); 33 } 34 35 const char *srcDir = IntlTest::getSourceTestData(status); 36 if (U_FAILURE(status)) { 37 dataerrln("Could not open test data %s", u_errorName(status)); 38 return; 39 } 40 uprv_strcpy(testDataPath, srcDir); 41 uprv_strcat(testDataPath, "CollationTest_"); 42 43 UVersionInfo uniVersion; 44 static const UVersionInfo v62 = { 6, 2, 0, 0 }; 45 u_getUnicodeVersion(uniVersion); 46 isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0; 47 } 48 49 UCAConformanceTest::~UCAConformanceTest() 50 { 51 delete UCA; 52 delete rbUCA; 53 if (testFile) { 54 fclose(testFile); 55 } 56 } 57 58 void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) 59 { 60 if(exec) { 61 logln("TestSuite UCAConformanceTest: "); 62 } 63 TESTCASE_AUTO_BEGIN; 64 TESTCASE_AUTO(TestTableNonIgnorable); 65 TESTCASE_AUTO(TestTableShifted); 66 TESTCASE_AUTO(TestRulesNonIgnorable); 67 TESTCASE_AUTO(TestRulesShifted); 68 TESTCASE_AUTO_END; 69 } 70 71 void UCAConformanceTest::initRbUCA() 72 { 73 if(!rbUCA) { 74 if (UCA) { 75 UnicodeString ucarules; 76 UCA->getRules(UCOL_FULL_RULES, ucarules); 77 rbUCA = new RuleBasedCollator(ucarules, status); 78 if (U_FAILURE(status)) { 79 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); 80 return; 81 } 82 } else { 83 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); 84 return; 85 } 86 } 87 } 88 89 void UCAConformanceTest::setCollNonIgnorable(Collator *coll) 90 { 91 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 92 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); 93 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); 94 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status); 95 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status); 96 } 97 98 void UCAConformanceTest::setCollShifted(Collator *coll) 99 { 100 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 101 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); 102 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); 103 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status); 104 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 105 } 106 107 void UCAConformanceTest::openTestFile(const char *type) 108 { 109 const char *ext = ".txt"; 110 if(testFile) { 111 fclose(testFile); 112 } 113 char buffer[1024]; 114 uprv_strcpy(buffer, testDataPath); 115 uprv_strcat(buffer, type); 116 int32_t bufLen = (int32_t)uprv_strlen(buffer); 117 118 // we try to open 3 files: 119 // path/CollationTest_type.txt 120 // path/CollationTest_type_SHORT.txt 121 // path/CollationTest_type_STUB.txt 122 // we are going to test with the first one that we manage to open. 123 124 uprv_strcpy(buffer+bufLen, ext); 125 126 testFile = fopen(buffer, "rb"); 127 128 if(testFile == 0) { 129 uprv_strcpy(buffer+bufLen, "_SHORT"); 130 uprv_strcat(buffer, ext); 131 testFile = fopen(buffer, "rb"); 132 133 if(testFile == 0) { 134 uprv_strcpy(buffer+bufLen, "_STUB"); 135 uprv_strcat(buffer, ext); 136 testFile = fopen(buffer, "rb"); 137 138 if (testFile == 0) { 139 *(buffer+bufLen) = 0; 140 dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer); 141 return; 142 } else { 143 infoln( 144 "INFO: Working with the stub file.\n" 145 "If you need the full conformance test, please\n" 146 "download the appropriate data files from:\n" 147 "http://unicode.org/cldr/trac/browser/trunk/common/uca"); 148 } 149 } 150 } 151 } 152 153 static const uint32_t IS_SHIFTED = 1; 154 static const uint32_t FROM_RULES = 2; 155 156 static UBool 157 skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) { 158 // Add temporary exceptions here if there are ICU bugs, until we can fix them. 159 // For examples see the ICU 52 version of this file. 160 (void)s; 161 (void)length; 162 (void)flags; 163 return FALSE; 164 } 165 166 static UCollationResult 167 normalizeResult(int32_t result) { 168 return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER; 169 } 170 171 void UCAConformanceTest::testConformance(const Collator *coll) 172 { 173 if(testFile == 0) { 174 return; 175 } 176 uint32_t skipFlags = 0; 177 if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) { 178 skipFlags |= IS_SHIFTED; 179 } 180 if(coll == rbUCA) { 181 skipFlags |= FROM_RULES; 182 } 183 184 logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); 185 UBool withSortKeys = getProperty("ucaconfnosortkeys") == NULL; 186 187 int32_t line = 0; 188 189 UChar b1[1024], b2[1024]; 190 UChar *buffer = b1, *oldB = NULL; 191 192 char lineB1[1024], lineB2[1024]; 193 char *lineB = lineB1, *oldLineB = lineB2; 194 195 uint8_t sk1[1024], sk2[1024]; 196 uint8_t *oldSk = NULL, *newSk = sk1; 197 198 int32_t oldLen = 0; 199 int32_t oldBlen = 0; 200 uint32_t first = 0; 201 202 while (fgets(lineB, 1024, testFile) != NULL) { 203 // remove trailing whitespace 204 u_rtrim(lineB); 205 206 line++; 207 if(*lineB == 0 || lineB[0] == '#') { 208 continue; 209 } 210 int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status); 211 if(U_FAILURE(status)) { 212 errln("Error parsing line %ld (%s): %s\n", 213 (long)line, u_errorName(status), lineB); 214 status = U_ZERO_ERROR; 215 } 216 buffer[buflen] = 0; 217 218 if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) { 219 logln("Skipping line %i because of a known bug", line); 220 continue; 221 } 222 223 int32_t resLen = withSortKeys ? coll->getSortKey(buffer, buflen, newSk, 1024) : 0; 224 225 if(oldSk != NULL) { 226 UBool ok=TRUE; 227 int32_t skres = withSortKeys ? strcmp((char *)oldSk, (char *)newSk) : 0; 228 int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status); 229 int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status); 230 231 if(cmpres != -cmpres2) { 232 errln("Compare result not symmetrical on line %i: " 233 "previous vs. current (%d) / current vs. previous (%d)", 234 line, cmpres, cmpres2); 235 ok = FALSE; 236 } 237 238 // TODO: Compare with normalization turned off if the input passes the FCD test. 239 240 if(withSortKeys && cmpres != normalizeResult(skres)) { 241 errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i", 242 cmpres, skres, line); 243 ok = FALSE; 244 } 245 246 int32_t res = cmpres; 247 if(res == 0 && !isAtLeastUCA62) { 248 // Up to UCA 6.1, the collation test files use a custom tie-breaker, 249 // comparing the raw input strings. 250 res = u_strcmpCodePointOrder(oldB, buffer); 251 // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, 252 // comparing the NFD versions of the input strings, 253 // which we do via setting strength=identical. 254 } 255 if(res > 0) { 256 errln("Line %i is not greater or equal than previous line", line); 257 ok = FALSE; 258 } 259 260 if(!ok) { 261 errln(" Previous data line %s", oldLineB); 262 errln(" Current data line %s", lineB); 263 if(withSortKeys) { 264 UnicodeString oldS, newS; 265 prettify(CollationKey(oldSk, oldLen), oldS); 266 prettify(CollationKey(newSk, resLen), newS); 267 errln(" Previous key: "+oldS); 268 errln(" Current key: "+newS); 269 } 270 } 271 } 272 273 // swap buffers 274 oldLineB = lineB; 275 oldB = buffer; 276 oldSk = newSk; 277 if(lineB == lineB1) { 278 lineB = lineB2; 279 buffer = b2; 280 newSk = sk2; 281 } else { 282 lineB = lineB1; 283 buffer = b1; 284 newSk = sk1; 285 } 286 oldLen = resLen; 287 oldBlen = buflen; 288 } 289 } 290 291 void UCAConformanceTest::TestTableNonIgnorable(/* par */) { 292 if (U_FAILURE(status)) { 293 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); 294 return; 295 } 296 setCollNonIgnorable(UCA); 297 openTestFile("NON_IGNORABLE"); 298 testConformance(UCA); 299 } 300 301 void UCAConformanceTest::TestTableShifted(/* par */) { 302 if (U_FAILURE(status)) { 303 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); 304 return; 305 } 306 setCollShifted(UCA); 307 openTestFile("SHIFTED"); 308 testConformance(UCA); 309 } 310 311 void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { 312 if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; } 313 initRbUCA(); 314 315 if(U_SUCCESS(status)) { 316 setCollNonIgnorable(rbUCA); 317 openTestFile("NON_IGNORABLE"); 318 testConformance(rbUCA); 319 } 320 } 321 322 void UCAConformanceTest::TestRulesShifted(/* par */) { 323 logln("This test is currently disabled, as it is impossible to " 324 "wholly represent fractional UCA using tailoring rules."); 325 return; 326 327 initRbUCA(); 328 329 if(U_SUCCESS(status)) { 330 setCollShifted(rbUCA); 331 openTestFile("SHIFTED"); 332 testConformance(rbUCA); 333 } 334 } 335 336 #endif /* #if !UCONFIG_NO_COLLATION */ 337