1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2002-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 /** 8 * UCAConformanceTest performs conformance tests defined in the data 9 * files. ICU ships with stub data files, as the whole test are too 10 * long. To do the whole test, download the test files. 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_COLLATION 16 17 #include "ucaconf.h" 18 #include "unicode/ustring.h" 19 #include "cmemory.h" 20 #include "cstring.h" 21 #include "uparse.h" 22 23 UCAConformanceTest::UCAConformanceTest() : 24 rbUCA(NULL), 25 testFile(NULL), 26 status(U_ZERO_ERROR) 27 { 28 UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status); 29 if(U_FAILURE(status)) { 30 dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status)); 31 } 32 33 const char *srcDir = IntlTest::getSourceTestData(status); 34 if (U_FAILURE(status)) { 35 dataerrln("Could not open test data %s", u_errorName(status)); 36 return; 37 } 38 uprv_strcpy(testDataPath, srcDir); 39 uprv_strcat(testDataPath, "CollationTest_"); 40 41 UVersionInfo uniVersion; 42 static const UVersionInfo v62 = { 6, 2, 0, 0 }; 43 u_getUnicodeVersion(uniVersion); 44 isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0; 45 } 46 47 UCAConformanceTest::~UCAConformanceTest() 48 { 49 delete UCA; 50 delete rbUCA; 51 if (testFile) { 52 fclose(testFile); 53 } 54 } 55 56 void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) 57 { 58 if(exec) { 59 logln("TestSuite UCAConformanceTest: "); 60 } 61 TESTCASE_AUTO_BEGIN; 62 TESTCASE_AUTO(TestTableNonIgnorable); 63 TESTCASE_AUTO(TestTableShifted); 64 TESTCASE_AUTO(TestRulesNonIgnorable); 65 TESTCASE_AUTO(TestRulesShifted); 66 TESTCASE_AUTO_END; 67 } 68 69 void UCAConformanceTest::initRbUCA() 70 { 71 if(!rbUCA) { 72 UnicodeString ucarules; 73 if (UCA) { 74 UCA->getRules(UCOL_FULL_RULES, ucarules); 75 rbUCA = new RuleBasedCollator(ucarules, status); 76 if (U_FAILURE(status)) { 77 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); 78 return; 79 } 80 } else { 81 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status)); 82 return; 83 } 84 } 85 } 86 87 void UCAConformanceTest::setCollNonIgnorable(Collator *coll) 88 { 89 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 90 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); 91 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); 92 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status); 93 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status); 94 } 95 96 void UCAConformanceTest::setCollShifted(Collator *coll) 97 { 98 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 99 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); 100 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status); 101 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status); 102 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 103 } 104 105 void UCAConformanceTest::openTestFile(const char *type) 106 { 107 const char *ext = ".txt"; 108 if(testFile) { 109 fclose(testFile); 110 } 111 char buffer[1024]; 112 uprv_strcpy(buffer, testDataPath); 113 uprv_strcat(buffer, type); 114 int32_t bufLen = (int32_t)uprv_strlen(buffer); 115 116 // we try to open 3 files: 117 // path/CollationTest_type.txt 118 // path/CollationTest_type_SHORT.txt 119 // path/CollationTest_type_STUB.txt 120 // we are going to test with the first one that we manage to open. 121 122 uprv_strcpy(buffer+bufLen, ext); 123 124 testFile = fopen(buffer, "rb"); 125 126 if(testFile == 0) { 127 uprv_strcpy(buffer+bufLen, "_SHORT"); 128 uprv_strcat(buffer, ext); 129 testFile = fopen(buffer, "rb"); 130 131 if(testFile == 0) { 132 uprv_strcpy(buffer+bufLen, "_STUB"); 133 uprv_strcat(buffer, ext); 134 testFile = fopen(buffer, "rb"); 135 136 if (testFile == 0) { 137 *(buffer+bufLen) = 0; 138 dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer); 139 return; 140 } else { 141 infoln( 142 "INFO: Working with the stub file.\n" 143 "If you need the full conformance test, please\n" 144 "download the appropriate data files from:\n" 145 "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/"); 146 } 147 } 148 } 149 } 150 151 static const uint32_t IS_SHIFTED = 1; 152 static const uint32_t FROM_RULES = 2; 153 154 static UBool 155 skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) { 156 // TODO: Fix ICU ticket #8052 157 if(length >= 3 && 158 (s[0] == 0xfb2 || s[0] == 0xfb3) && 159 s[1] == 0x334 && 160 (s[2] == 0xf73 || s[2] == 0xf75 || s[2] == 0xf81)) { 161 return TRUE; 162 } 163 // TODO: Fix ICU ticket #9361 164 if((flags & IS_SHIFTED) != 0 && length >= 2 && s[0] == 0xfffe) { 165 return TRUE; 166 } 167 // TODO: Fix tailoring builder, ICU ticket #9593. 168 UChar c; 169 if((flags & FROM_RULES) != 0 && length >= 2 && ((c = s[1]) == 0xedc || c == 0xedd)) { 170 return TRUE; 171 } 172 return FALSE; 173 } 174 175 static UCollationResult 176 normalizeResult(int32_t result) { 177 return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER; 178 } 179 180 void UCAConformanceTest::testConformance(const Collator *coll) 181 { 182 if(testFile == 0) { 183 return; 184 } 185 uint32_t skipFlags = 0; 186 if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) { 187 skipFlags |= IS_SHIFTED; 188 } 189 if(coll == rbUCA) { 190 skipFlags |= FROM_RULES; 191 } 192 193 int32_t line = 0; 194 195 UChar b1[1024], b2[1024]; 196 UChar *buffer = b1, *oldB = NULL; 197 198 char lineB1[1024], lineB2[1024]; 199 char *lineB = lineB1, *oldLineB = lineB2; 200 201 uint8_t sk1[1024], sk2[1024]; 202 uint8_t *oldSk = NULL, *newSk = sk1; 203 204 int32_t oldLen = 0; 205 int32_t oldBlen = 0; 206 uint32_t first = 0; 207 208 while (fgets(lineB, 1024, testFile) != NULL) { 209 // remove trailing whitespace 210 u_rtrim(lineB); 211 212 line++; 213 if(*lineB == 0 || lineB[0] == '#') { 214 continue; 215 } 216 int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status); 217 if(U_FAILURE(status)) { 218 errln("Error parsing line %ld (%s): %s\n", 219 (long)line, u_errorName(status), lineB); 220 status = U_ZERO_ERROR; 221 } 222 buffer[buflen] = 0; 223 224 if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) { 225 logln("Skipping line %i because of a known bug", line); 226 continue; 227 } 228 229 int32_t resLen = coll->getSortKey(buffer, buflen, newSk, 1024); 230 231 if(oldSk != NULL) { 232 int32_t skres = strcmp((char *)oldSk, (char *)newSk); 233 int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status); 234 int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status); 235 236 if(cmpres != -cmpres2) { 237 errln("Compare result not symmetrical on line %i", line); 238 } 239 240 if(cmpres != normalizeResult(skres)) { 241 errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i", 242 cmpres, skres, line); 243 errln(" Previous data line %s", oldLineB); 244 errln(" Current data line %s", lineB); 245 } 246 247 int32_t res = cmpres; 248 if(res == 0 && !isAtLeastUCA62) { 249 // Up to UCA 6.1, the collation test files use a custom tie-breaker, 250 // comparing the raw input strings. 251 res = u_strcmpCodePointOrder(oldB, buffer); 252 // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, 253 // comparing the NFD versions of the input strings, 254 // which we do via setting strength=identical. 255 } 256 if(res > 0) { 257 errln("Line %i is not greater or equal than previous line", line); 258 errln(" Previous data line %s", oldLineB); 259 errln(" Current data line %s", lineB); 260 UnicodeString oldS, newS; 261 prettify(CollationKey(oldSk, oldLen), oldS); 262 prettify(CollationKey(newSk, resLen), newS); 263 errln(" Previous key: "+oldS); 264 errln(" Current key: "+newS); 265 } 266 } 267 268 // swap buffers 269 oldLineB = lineB; 270 oldB = buffer; 271 oldSk = newSk; 272 if(lineB == lineB1) { 273 lineB = lineB2; 274 buffer = b2; 275 newSk = sk2; 276 } else { 277 lineB = lineB1; 278 buffer = b1; 279 newSk = sk1; 280 } 281 oldLen = resLen; 282 oldBlen = buflen; 283 } 284 } 285 286 void UCAConformanceTest::TestTableNonIgnorable(/* par */) { 287 if (U_FAILURE(status)) { 288 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); 289 return; 290 } 291 setCollNonIgnorable(UCA); 292 openTestFile("NON_IGNORABLE"); 293 testConformance(UCA); 294 } 295 296 void UCAConformanceTest::TestTableShifted(/* par */) { 297 if (U_FAILURE(status)) { 298 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status)); 299 return; 300 } 301 setCollShifted(UCA); 302 openTestFile("SHIFTED"); 303 testConformance(UCA); 304 } 305 306 void UCAConformanceTest::TestRulesNonIgnorable(/* par */) { 307 initRbUCA(); 308 309 if(U_SUCCESS(status)) { 310 setCollNonIgnorable(rbUCA); 311 openTestFile("NON_IGNORABLE"); 312 testConformance(rbUCA); 313 } 314 } 315 316 void UCAConformanceTest::TestRulesShifted(/* par */) { 317 logln("This test is currently disabled, as it is impossible to " 318 "wholly represent fractional UCA using tailoring rules."); 319 return; 320 321 initRbUCA(); 322 323 if(U_SUCCESS(status)) { 324 setCollShifted(rbUCA); 325 openTestFile("SHIFTED"); 326 testConformance(rbUCA); 327 } 328 } 329 330 #endif /* #if !UCONFIG_NO_COLLATION */ 331