1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 /** 8 * IntlTestCollator is the medium level test class for everything in the directory "collate". 9 */ 10 11 /*********************************************************************** 12 * Modification history 13 * Date Name Description 14 * 02/14/2001 synwee Compare with cintltst and commented away tests 15 * that are not run. 16 ***********************************************************************/ 17 18 #include "unicode/utypes.h" 19 20 #if !UCONFIG_NO_COLLATION 21 22 #include "unicode/localpointer.h" 23 #include "unicode/uchar.h" 24 #include "unicode/ustring.h" 25 26 #include "dadrcoll.h" 27 28 #include "encoll.h" 29 #include "frcoll.h" 30 #include "decoll.h" 31 #include "escoll.h" 32 #include "ficoll.h" 33 #include "jacoll.h" 34 #include "trcoll.h" 35 #include "allcoll.h" 36 #include "g7coll.h" 37 #include "mnkytst.h" 38 #include "apicoll.h" 39 #include "regcoll.h" 40 #include "currcoll.h" 41 #include "itercoll.h" 42 #include "tstnorm.h" 43 #include "normconf.h" 44 #include "thcoll.h" 45 #include "srchtest.h" 46 #include "ssearch.h" 47 #include "cntabcol.h" 48 #include "lcukocol.h" 49 #include "ucaconf.h" 50 #include "svccoll.h" 51 #include "cmemory.h" 52 //#include "rndmcoll.h" 53 54 // Set to 1 to test offsets in backAndForth() 55 #define TEST_OFFSETS 0 56 57 #define TESTCLASS(n,classname) \ 58 case n: \ 59 name = #classname; \ 60 if (exec) { \ 61 logln(#classname "---"); \ 62 logln(""); \ 63 classname t; \ 64 callTest(t, par); \ 65 } \ 66 break 67 68 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par ) 69 { 70 if (exec) { 71 logln("TestSuite Collator: "); 72 } 73 74 switch (index) { 75 TESTCLASS(0, CollationEnglishTest); 76 TESTCLASS(1, CollationFrenchTest); 77 TESTCLASS(2, CollationGermanTest); 78 TESTCLASS(3, CollationSpanishTest); 79 TESTCLASS(4, CollationKanaTest); 80 TESTCLASS(5, CollationTurkishTest); 81 TESTCLASS(6, CollationDummyTest); 82 TESTCLASS(7, G7CollationTest); 83 TESTCLASS(8, CollationMonkeyTest); 84 TESTCLASS(9, CollationAPITest); 85 TESTCLASS(10, CollationRegressionTest); 86 TESTCLASS(11, CollationCurrencyTest); 87 TESTCLASS(12, CollationIteratorTest); 88 TESTCLASS(13, CollationThaiTest); 89 TESTCLASS(14, LotusCollationKoreanTest); 90 TESTCLASS(15, StringSearchTest); 91 TESTCLASS(16, ContractionTableTest); 92 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 93 TESTCLASS(17, DataDrivenCollatorTest); 94 #endif 95 TESTCLASS(18, UCAConformanceTest); 96 TESTCLASS(19, CollationServiceTest); 97 TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation 98 //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test. 99 TESTCLASS(21, SSearchTest); 100 101 default: name = ""; break; 102 } 103 } 104 105 UCollationResult 106 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) { 107 int32_t partialSKResult = 0; 108 uint8_t sBuf[512], tBuf[512]; 109 UCharIterator sIter, tIter; 110 uint32_t sState[2], tState[2]; 111 int32_t sSize = pieceSize, tSize = pieceSize; 112 int32_t i = 0; 113 status = U_ZERO_ERROR; 114 sState[0] = 0; sState[1] = 0; 115 tState[0] = 0; tState[1] = 0; 116 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { 117 uiter_setString(&sIter, source, sLen); 118 uiter_setString(&tIter, target, tLen); 119 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status); 120 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status); 121 122 if(sState[0] != 0 || tState[0] != 0) { 123 log("State != 0 : %08X %08X\n", sState[0], tState[0]); 124 } 125 log("%i ", i++); 126 127 partialSKResult = memcmp(sBuf, tBuf, pieceSize); 128 } 129 130 if(partialSKResult < 0) { 131 return UCOL_LESS; 132 } else if(partialSKResult > 0) { 133 return UCOL_GREATER; 134 } else { 135 return UCOL_EQUAL; 136 } 137 } 138 139 void 140 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 141 { 142 UErrorCode status = U_ZERO_ERROR; 143 144 UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator(); 145 146 Collator::EComparisonResult compareResult = col->compare(source, target); 147 148 CollationKey srckey, tgtkey; 149 col->getCollationKey(source, srckey, status); 150 col->getCollationKey(target, tgtkey, status); 151 if (U_FAILURE(status)){ 152 errln("Creation of collation keys failed\n"); 153 } 154 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); 155 156 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); 157 158 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); 159 160 int32_t sLen = source.length(), tLen = target.length(); 161 const UChar* src = source.getBuffer(); 162 const UChar* trg = target.getBuffer(); 163 UCollationResult compareResultIter = (UCollationResult)result; 164 165 { 166 UCharIterator sIter, tIter; 167 uiter_setString(&sIter, src, sLen); 168 uiter_setString(&tIter, trg, tLen); 169 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 170 if(compareResultIter != (UCollationResult)result) { 171 errln("Different result for iterative comparison "+source+" "+target); 172 } 173 } 174 /* convert the strings to UTF-8 and do try comparing with char iterator */ 175 if(!quick) { /*!QUICK*/ 176 char utf8Source[256], utf8Target[256]; 177 int32_t utf8SourceLen = 0, utf8TargetLen = 0; 178 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); 179 if(U_FAILURE(status)) { /* probably buffer is not big enough */ 180 log("Src UTF-8 buffer too small! Will not compare!\n"); 181 } else { 182 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); 183 if(U_SUCCESS(status)) { /* probably buffer is not big enough */ 184 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; 185 UCharIterator sIter, tIter; 186 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ 187 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); 188 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); 189 /*uiter_setString(&sIter, source, sLen); 190 uiter_setString(&tIter, target, tLen);*/ 191 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 192 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 193 sIter.move(&sIter, 0, UITER_START); 194 tIter.move(&tIter, 0, UITER_START); 195 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 196 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 197 if(compareResultUTF8 != compareResultIter) { 198 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); 199 } 200 if(compareResultUTF8 != compareResultUTF8Norm) { 201 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); 202 } 203 } else { 204 log("Target UTF-8 buffer too small! Did not compare!\n"); 205 } 206 if(U_FAILURE(status)) { 207 log("UTF-8 strcoll failed! Ignoring result\n"); 208 } 209 } 210 } 211 212 /* testing the partial sortkeys */ 213 { /*!QUICK*/ 214 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ 215 int32_t partialSizesSize = 1; 216 if(!quick) { 217 partialSizesSize = 7; 218 } 219 int32_t i = 0; 220 log("partial sortkey test piecesize="); 221 for(i = 0; i < partialSizesSize; i++) { 222 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; 223 log("%i ", partialSizes[i]); 224 225 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 226 if(partialSKResult != (UCollationResult)result) { 227 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); 228 } 229 230 if(norm != UCOL_ON && !quick) { 231 log("N "); 232 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 233 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 234 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 235 if(partialSKResult != partialNormalizedSKResult) { 236 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); 237 } 238 } 239 } 240 log("\n"); 241 } 242 /* 243 if (compareResult != result) { 244 errln("String comparison failed in variant test\n"); 245 } 246 if (keyResult != result) { 247 errln("Collation key comparison failed in variant test\n"); 248 } 249 */ 250 } 251 252 void 253 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) { 254 doTest(col, UnicodeString(source), UnicodeString(target), result); 255 } 256 257 void 258 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 259 { 260 if(col) { 261 doTestVariant(col, source, target, result); 262 if(result == Collator::LESS) { 263 doTestVariant(col, target, source, Collator::GREATER); 264 } else if (result == Collator::GREATER) { 265 doTestVariant(col, target, source, Collator::LESS); 266 } 267 268 UErrorCode status = U_ZERO_ERROR; 269 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source)); 270 logln("Testing iterating source: "+source); 271 backAndForth(*c); 272 c->setText(target, status); 273 logln("Testing iterating target: "+target); 274 backAndForth(*c); 275 } 276 } 277 278 279 // used for collation result reporting, defined here for convenience 280 // (maybe moved later) 281 void 282 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target, 283 CollationKey &sourceKey, CollationKey &targetKey, 284 Collator::EComparisonResult compareResult, 285 Collator::EComparisonResult keyResult, 286 Collator::EComparisonResult incResult, 287 Collator::EComparisonResult expectedResult ) 288 { 289 if (expectedResult < -1 || expectedResult > 1) 290 { 291 errln("***** invalid call to reportCResult ****"); 292 return; 293 } 294 295 UBool ok1 = (compareResult == expectedResult); 296 UBool ok2 = (keyResult == expectedResult); 297 UBool ok3 = (incResult == expectedResult); 298 299 300 if (ok1 && ok2 && ok3 && !verbose) { 301 // Keep non-verbose, passing tests fast 302 return; 303 } else { 304 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare("); 305 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected "); 306 UnicodeString prettySource, prettyTarget, sExpect, sResult; 307 308 IntlTest::prettify(source, prettySource); 309 IntlTest::prettify(target, prettyTarget); 310 appendCompareResult(compareResult, sResult); 311 appendCompareResult(expectedResult, sExpect); 312 313 if (ok1) { 314 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 315 } else { 316 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 317 } 318 319 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key("); 320 msg2 = ").compareTo(key("; 321 msg3 = ")) returned "; 322 323 appendCompareResult(keyResult, sResult); 324 325 if (ok2) { 326 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 327 } else { 328 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 329 330 msg1 = " "; 331 msg2 = " vs. "; 332 333 prettify(sourceKey, prettySource); 334 prettify(targetKey, prettyTarget); 335 336 errln(msg1 + prettySource + msg2 + prettyTarget); 337 } 338 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare("); 339 msg2 = ", "; 340 msg3 = ") returned "; 341 342 appendCompareResult(incResult, sResult); 343 344 if (ok3) { 345 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 346 } else { 347 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 348 } 349 } 350 } 351 352 UnicodeString& 353 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result, 354 UnicodeString& target) 355 { 356 if (result == Collator::LESS) 357 { 358 target += "LESS"; 359 } 360 else if (result == Collator::EQUAL) 361 { 362 target += "EQUAL"; 363 } 364 else if (result == Collator::GREATER) 365 { 366 target += "GREATER"; 367 } 368 else 369 { 370 UnicodeString huh = "?"; 371 372 target += (huh + (int32_t)result); 373 } 374 375 return target; 376 } 377 378 // Produce a printable representation of a CollationKey 379 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target) 380 { 381 int32_t i, byteCount; 382 const uint8_t *bytes = source.getByteArray(byteCount); 383 384 target.remove(); 385 target += "["; 386 387 for (i = 0; i < byteCount; i += 1) 388 { 389 if (i != 0) { 390 target += " "; 391 } 392 appendHex(bytes[i], 2, target); 393 } 394 395 target += "]"; 396 397 return target; 398 } 399 400 void IntlTestCollator::backAndForth(CollationElementIterator &iter) 401 { 402 // Run through the iterator forwards and stick it into an array 403 int32_t orderLength = 0; 404 LocalArray<Order> orders(getOrders(iter, orderLength)); 405 UErrorCode status = U_ZERO_ERROR; 406 407 // Now go through it backwards and make sure we get the same values 408 int32_t index = orderLength; 409 int32_t o; 410 411 // reset the iterator 412 iter.reset(); 413 414 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 415 { 416 /*int32_t offset = */iter.getOffset(); 417 418 if (index == 0) { 419 if(o == 0) { 420 continue; 421 } else { // this is an error, orders exhausted but there are non-ignorable CEs from 422 // going backwards 423 errln("Backward iteration returned a non ignorable after orders are exhausted"); 424 break; 425 } 426 } 427 428 index -= 1; 429 if (o != orders[index].order) { 430 if (o == 0) 431 index += 1; 432 else { 433 while (index > 0 && orders[--index].order == 0) { 434 // nothing... 435 } 436 437 if (o != orders[index].order) { 438 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index, 439 orders[index].order, o); 440 //break; 441 return; 442 } 443 } 444 } 445 446 #if TEST_OFFSETS 447 if (offset != orders[index].offset) { 448 errln("Mismatched offset at index %d: %d vs. %d", index, 449 orders[index].offset, offset); 450 //break; 451 return; 452 } 453 #endif 454 455 } 456 457 while (index != 0 && orders[index - 1].order == 0) 458 { 459 index --; 460 } 461 462 if (index != 0) 463 { 464 UnicodeString msg("Didn't get back to beginning - index is "); 465 errln(msg + index); 466 467 iter.reset(); 468 err("next: "); 469 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) 470 { 471 UnicodeString hexString("0x"); 472 473 appendHex(o, 8, hexString); 474 hexString += " "; 475 err(hexString); 476 } 477 errln(""); 478 479 err("prev: "); 480 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 481 { 482 UnicodeString hexString("0x"); 483 484 appendHex(o, 8, hexString); 485 hexString += " "; 486 err(hexString); 487 } 488 errln(""); 489 } 490 } 491 492 493 /** 494 * Return an integer array containing all of the collation orders 495 * returned by calls to next on the specified iterator 496 */ 497 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength) 498 { 499 int32_t maxSize = 100; 500 int32_t size = 0; 501 LocalArray<Order> orders(new Order[maxSize]); 502 UErrorCode status = U_ZERO_ERROR; 503 int32_t offset = iter.getOffset(); 504 505 int32_t order; 506 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) 507 { 508 if (size == maxSize) 509 { 510 maxSize *= 2; 511 Order *temp = new Order[maxSize]; 512 513 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 514 orders.adoptInstead(temp); 515 } 516 517 orders[size].order = order; 518 orders[size].offset = offset; 519 520 offset = iter.getOffset(); 521 size += 1; 522 } 523 524 if (maxSize > size) 525 { 526 Order *temp = new Order[size]; 527 528 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 529 orders.adoptInstead(temp); 530 } 531 532 orderLength = size; 533 return orders.orphan(); 534 } 535 536 #endif /* #if !UCONFIG_NO_COLLATION */ 537