1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 /** 8 * IntlTestCollator is the medium level test class for everything in the directory "collate". 9 */ 10 11 /*********************************************************************** 12 * Modification history 13 * Date Name Description 14 * 02/14/2001 synwee Compare with cintltst and commented away tests 15 * that are not run. 16 ***********************************************************************/ 17 18 #include "unicode/utypes.h" 19 20 #if !UCONFIG_NO_COLLATION 21 22 #include "unicode/localpointer.h" 23 #include "unicode/uchar.h" 24 #include "unicode/ustring.h" 25 26 #include "dadrcoll.h" 27 28 #include "encoll.h" 29 #include "frcoll.h" 30 #include "decoll.h" 31 #include "escoll.h" 32 #include "ficoll.h" 33 #include "jacoll.h" 34 #include "trcoll.h" 35 #include "allcoll.h" 36 #include "g7coll.h" 37 #include "mnkytst.h" 38 #include "apicoll.h" 39 #include "regcoll.h" 40 #include "currcoll.h" 41 #include "itercoll.h" 42 #include "tstnorm.h" 43 #include "normconf.h" 44 #include "thcoll.h" 45 #include "srchtest.h" 46 #include "ssearch.h" 47 #include "cntabcol.h" 48 #include "lcukocol.h" 49 #include "ucaconf.h" 50 #include "svccoll.h" 51 #include "cmemory.h" 52 #include "alphaindextst.h" 53 //#include "rndmcoll.h" 54 55 // Set to 1 to test offsets in backAndForth() 56 #define TEST_OFFSETS 0 57 58 #define TESTCLASS(n,classname) \ 59 case n: \ 60 name = #classname; \ 61 if (exec) { \ 62 logln(#classname "---"); \ 63 logln(""); \ 64 classname t; \ 65 callTest(t, par); \ 66 } \ 67 break 68 69 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par ) 70 { 71 if (exec) { 72 logln("TestSuite Collator: "); 73 } 74 75 switch (index) { 76 TESTCLASS(0, CollationEnglishTest); 77 TESTCLASS(1, CollationFrenchTest); 78 TESTCLASS(2, CollationGermanTest); 79 TESTCLASS(3, CollationSpanishTest); 80 TESTCLASS(4, CollationKanaTest); 81 TESTCLASS(5, CollationTurkishTest); 82 TESTCLASS(6, CollationDummyTest); 83 TESTCLASS(7, G7CollationTest); 84 TESTCLASS(8, CollationMonkeyTest); 85 TESTCLASS(9, CollationAPITest); 86 TESTCLASS(10, CollationRegressionTest); 87 TESTCLASS(11, CollationCurrencyTest); 88 TESTCLASS(12, CollationIteratorTest); 89 TESTCLASS(13, CollationThaiTest); 90 TESTCLASS(14, LotusCollationKoreanTest); 91 TESTCLASS(15, StringSearchTest); 92 TESTCLASS(16, ContractionTableTest); 93 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 94 TESTCLASS(17, DataDrivenCollatorTest); 95 #endif 96 TESTCLASS(18, UCAConformanceTest); 97 TESTCLASS(19, CollationServiceTest); 98 TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation 99 //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test. 100 TESTCLASS(21, SSearchTest); 101 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION 102 TESTCLASS(22, AlphabeticIndexTest); 103 #endif 104 105 default: name = ""; break; 106 } 107 } 108 109 UCollationResult 110 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) { 111 int32_t partialSKResult = 0; 112 uint8_t sBuf[512], tBuf[512]; 113 UCharIterator sIter, tIter; 114 uint32_t sState[2], tState[2]; 115 int32_t sSize = pieceSize, tSize = pieceSize; 116 int32_t i = 0; 117 status = U_ZERO_ERROR; 118 sState[0] = 0; sState[1] = 0; 119 tState[0] = 0; tState[1] = 0; 120 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { 121 uiter_setString(&sIter, source, sLen); 122 uiter_setString(&tIter, target, tLen); 123 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status); 124 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status); 125 126 if(sState[0] != 0 || tState[0] != 0) { 127 log("State != 0 : %08X %08X\n", sState[0], tState[0]); 128 } 129 log("%i ", i++); 130 131 partialSKResult = memcmp(sBuf, tBuf, pieceSize); 132 } 133 134 if(partialSKResult < 0) { 135 return UCOL_LESS; 136 } else if(partialSKResult > 0) { 137 return UCOL_GREATER; 138 } else { 139 return UCOL_EQUAL; 140 } 141 } 142 143 void 144 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 145 { 146 UErrorCode status = U_ZERO_ERROR; 147 148 UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator(); 149 150 Collator::EComparisonResult compareResult = col->compare(source, target); 151 152 CollationKey srckey, tgtkey; 153 col->getCollationKey(source, srckey, status); 154 col->getCollationKey(target, tgtkey, status); 155 if (U_FAILURE(status)){ 156 errln("Creation of collation keys failed\n"); 157 } 158 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); 159 160 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); 161 162 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); 163 164 int32_t sLen = source.length(), tLen = target.length(); 165 const UChar* src = source.getBuffer(); 166 const UChar* trg = target.getBuffer(); 167 UCollationResult compareResultIter = (UCollationResult)result; 168 169 { 170 UCharIterator sIter, tIter; 171 uiter_setString(&sIter, src, sLen); 172 uiter_setString(&tIter, trg, tLen); 173 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 174 if(compareResultIter != (UCollationResult)result) { 175 errln("Different result for iterative comparison "+source+" "+target); 176 } 177 } 178 /* convert the strings to UTF-8 and do try comparing with char iterator */ 179 if(!quick) { /*!QUICK*/ 180 char utf8Source[256], utf8Target[256]; 181 int32_t utf8SourceLen = 0, utf8TargetLen = 0; 182 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); 183 if(U_FAILURE(status)) { /* probably buffer is not big enough */ 184 log("Src UTF-8 buffer too small! Will not compare!\n"); 185 } else { 186 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); 187 if(U_SUCCESS(status)) { /* probably buffer is not big enough */ 188 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; 189 UCharIterator sIter, tIter; 190 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ 191 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); 192 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); 193 /*uiter_setString(&sIter, source, sLen); 194 uiter_setString(&tIter, target, tLen);*/ 195 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 196 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 197 sIter.move(&sIter, 0, UITER_START); 198 tIter.move(&tIter, 0, UITER_START); 199 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 200 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 201 if(compareResultUTF8 != compareResultIter) { 202 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); 203 } 204 if(compareResultUTF8 != compareResultUTF8Norm) { 205 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); 206 } 207 } else { 208 log("Target UTF-8 buffer too small! Did not compare!\n"); 209 } 210 if(U_FAILURE(status)) { 211 log("UTF-8 strcoll failed! Ignoring result\n"); 212 } 213 } 214 } 215 216 /* testing the partial sortkeys */ 217 { /*!QUICK*/ 218 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ 219 int32_t partialSizesSize = 1; 220 if(!quick) { 221 partialSizesSize = 7; 222 } 223 int32_t i = 0; 224 log("partial sortkey test piecesize="); 225 for(i = 0; i < partialSizesSize; i++) { 226 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; 227 log("%i ", partialSizes[i]); 228 229 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 230 if(partialSKResult != (UCollationResult)result) { 231 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); 232 } 233 234 if(norm != UCOL_ON && !quick) { 235 log("N "); 236 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 237 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 238 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 239 if(partialSKResult != partialNormalizedSKResult) { 240 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); 241 } 242 } 243 } 244 log("\n"); 245 } 246 /* 247 if (compareResult != result) { 248 errln("String comparison failed in variant test\n"); 249 } 250 if (keyResult != result) { 251 errln("Collation key comparison failed in variant test\n"); 252 } 253 */ 254 } 255 256 void 257 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) { 258 doTest(col, UnicodeString(source), UnicodeString(target), result); 259 } 260 261 void 262 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 263 { 264 if(col) { 265 doTestVariant(col, source, target, result); 266 if(result == Collator::LESS) { 267 doTestVariant(col, target, source, Collator::GREATER); 268 } else if (result == Collator::GREATER) { 269 doTestVariant(col, target, source, Collator::LESS); 270 } 271 272 UErrorCode status = U_ZERO_ERROR; 273 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source)); 274 logln("Testing iterating source: "+source); 275 backAndForth(*c); 276 c->setText(target, status); 277 logln("Testing iterating target: "+target); 278 backAndForth(*c); 279 } 280 } 281 282 283 // used for collation result reporting, defined here for convenience 284 // (maybe moved later) 285 void 286 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target, 287 CollationKey &sourceKey, CollationKey &targetKey, 288 Collator::EComparisonResult compareResult, 289 Collator::EComparisonResult keyResult, 290 Collator::EComparisonResult incResult, 291 Collator::EComparisonResult expectedResult ) 292 { 293 if (expectedResult < -1 || expectedResult > 1) 294 { 295 errln("***** invalid call to reportCResult ****"); 296 return; 297 } 298 299 UBool ok1 = (compareResult == expectedResult); 300 UBool ok2 = (keyResult == expectedResult); 301 UBool ok3 = (incResult == expectedResult); 302 303 304 if (ok1 && ok2 && ok3 && !verbose) { 305 // Keep non-verbose, passing tests fast 306 return; 307 } else { 308 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare("); 309 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected "); 310 UnicodeString prettySource, prettyTarget, sExpect, sResult; 311 312 IntlTest::prettify(source, prettySource); 313 IntlTest::prettify(target, prettyTarget); 314 appendCompareResult(compareResult, sResult); 315 appendCompareResult(expectedResult, sExpect); 316 317 if (ok1) { 318 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 319 } else { 320 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 321 } 322 323 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key("); 324 msg2 = ").compareTo(key("; 325 msg3 = ")) returned "; 326 327 appendCompareResult(keyResult, sResult); 328 329 if (ok2) { 330 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 331 } else { 332 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 333 334 msg1 = " "; 335 msg2 = " vs. "; 336 337 prettify(sourceKey, prettySource); 338 prettify(targetKey, prettyTarget); 339 340 errln(msg1 + prettySource + msg2 + prettyTarget); 341 } 342 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare("); 343 msg2 = ", "; 344 msg3 = ") returned "; 345 346 appendCompareResult(incResult, sResult); 347 348 if (ok3) { 349 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 350 } else { 351 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 352 } 353 } 354 } 355 356 UnicodeString& 357 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result, 358 UnicodeString& target) 359 { 360 if (result == Collator::LESS) 361 { 362 target += "LESS"; 363 } 364 else if (result == Collator::EQUAL) 365 { 366 target += "EQUAL"; 367 } 368 else if (result == Collator::GREATER) 369 { 370 target += "GREATER"; 371 } 372 else 373 { 374 UnicodeString huh = "?"; 375 376 target += (huh + (int32_t)result); 377 } 378 379 return target; 380 } 381 382 // Produce a printable representation of a CollationKey 383 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target) 384 { 385 int32_t i, byteCount; 386 const uint8_t *bytes = source.getByteArray(byteCount); 387 388 target.remove(); 389 target += "["; 390 391 for (i = 0; i < byteCount; i += 1) 392 { 393 if (i != 0) { 394 target += " "; 395 } 396 appendHex(bytes[i], 2, target); 397 } 398 399 target += "]"; 400 401 return target; 402 } 403 404 void IntlTestCollator::backAndForth(CollationElementIterator &iter) 405 { 406 // Run through the iterator forwards and stick it into an array 407 int32_t orderLength = 0; 408 LocalArray<Order> orders(getOrders(iter, orderLength)); 409 UErrorCode status = U_ZERO_ERROR; 410 411 // Now go through it backwards and make sure we get the same values 412 int32_t index = orderLength; 413 int32_t o; 414 415 // reset the iterator 416 iter.reset(); 417 418 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 419 { 420 /*int32_t offset = */iter.getOffset(); 421 422 if (index == 0) { 423 if(o == 0) { 424 continue; 425 } else { // this is an error, orders exhausted but there are non-ignorable CEs from 426 // going backwards 427 errln("Backward iteration returned a non ignorable after orders are exhausted"); 428 break; 429 } 430 } 431 432 index -= 1; 433 if (o != orders[index].order) { 434 if (o == 0) 435 index += 1; 436 else { 437 while (index > 0 && orders[--index].order == 0) { 438 // nothing... 439 } 440 441 if (o != orders[index].order) { 442 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index, 443 orders[index].order, o); 444 //break; 445 return; 446 } 447 } 448 } 449 450 #if TEST_OFFSETS 451 if (offset != orders[index].offset) { 452 errln("Mismatched offset at index %d: %d vs. %d", index, 453 orders[index].offset, offset); 454 //break; 455 return; 456 } 457 #endif 458 459 } 460 461 while (index != 0 && orders[index - 1].order == 0) 462 { 463 index --; 464 } 465 466 if (index != 0) 467 { 468 UnicodeString msg("Didn't get back to beginning - index is "); 469 errln(msg + index); 470 471 iter.reset(); 472 err("next: "); 473 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) 474 { 475 UnicodeString hexString("0x"); 476 477 appendHex(o, 8, hexString); 478 hexString += " "; 479 err(hexString); 480 } 481 errln(""); 482 483 err("prev: "); 484 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 485 { 486 UnicodeString hexString("0x"); 487 488 appendHex(o, 8, hexString); 489 hexString += " "; 490 err(hexString); 491 } 492 errln(""); 493 } 494 } 495 496 497 /** 498 * Return an integer array containing all of the collation orders 499 * returned by calls to next on the specified iterator 500 */ 501 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength) 502 { 503 int32_t maxSize = 100; 504 int32_t size = 0; 505 LocalArray<Order> orders(new Order[maxSize]); 506 UErrorCode status = U_ZERO_ERROR; 507 int32_t offset = iter.getOffset(); 508 509 int32_t order; 510 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) 511 { 512 if (size == maxSize) 513 { 514 maxSize *= 2; 515 Order *temp = new Order[maxSize]; 516 517 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 518 orders.adoptInstead(temp); 519 } 520 521 orders[size].order = order; 522 orders[size].offset = offset; 523 524 offset = iter.getOffset(); 525 size += 1; 526 } 527 528 if (maxSize > size) 529 { 530 Order *temp = new Order[size]; 531 532 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 533 orders.adoptInstead(temp); 534 } 535 536 orderLength = size; 537 return orders.orphan(); 538 } 539 540 #endif /* #if !UCONFIG_NO_COLLATION */ 541