1 /* 2 ********************************************************************** 3 * Copyright (c) 2013-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #include <string.h> 9 #include "unicode/localpointer.h" 10 #include "unicode/uperf.h" 11 #include "unicode/ucol.h" 12 #include "unicode/coll.h" 13 #include "unicode/uiter.h" 14 #include "unicode/ustring.h" 15 #include "unicode/sortkey.h" 16 #include "uarrsort.h" 17 #include "uoptions.h" 18 #include "ustr_imp.h" 19 20 #define COMPACT_ARRAY(CompactArrays, UNIT) \ 21 struct CompactArrays{\ 22 CompactArrays(const CompactArrays & );\ 23 CompactArrays & operator=(const CompactArrays & );\ 24 int32_t count;/*total number of the strings*/ \ 25 int32_t * index;/*relative offset in data*/ \ 26 UNIT * data; /*the real space to hold strings*/ \ 27 \ 28 ~CompactArrays(){free(index);free(data);} \ 29 CompactArrays() : count(0), index(NULL), data(NULL) { \ 30 index = (int32_t *) realloc(index, sizeof(int32_t)); \ 31 index[0] = 0; \ 32 } \ 33 void append_one(int32_t theLen){ /*include terminal NULL*/ \ 34 count++; \ 35 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \ 36 index[count] = index[count - 1] + theLen; \ 37 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \ 38 } \ 39 UNIT * last(){return data + index[count - 1];} \ 40 const UNIT * dataOf(int32_t i) const {return data + index[i];} \ 41 int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \ 42 }; 43 44 COMPACT_ARRAY(CA_uchar, UChar) 45 COMPACT_ARRAY(CA_char, char) 46 47 #define MAX_TEST_STRINGS_FOR_PERMUTING 1000 48 49 // C API test cases 50 51 // 52 // Test case taking a single test data array, calling ucol_strcoll by permuting the test data 53 // 54 class Strcoll : public UPerfFunction 55 { 56 public: 57 Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen); 58 ~Strcoll(); 59 virtual void call(UErrorCode* status); 60 virtual long getOperationsPerIteration(); 61 62 private: 63 const UCollator *coll; 64 const CA_uchar *source; 65 UBool useLen; 66 int32_t maxTestStrings; 67 }; 68 69 Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen) 70 : coll(coll), 71 source(source), 72 useLen(useLen) 73 { 74 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 75 } 76 77 Strcoll::~Strcoll() 78 { 79 } 80 81 void Strcoll::call(UErrorCode* status) 82 { 83 if (U_FAILURE(*status)) return; 84 85 // call strcoll for permutation 86 int32_t divisor = source->count / maxTestStrings; 87 int32_t srcLen, tgtLen; 88 int32_t cmp = 0; 89 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) { 90 if (i % divisor) continue; 91 numTestStringsI++; 92 srcLen = useLen ? source->lengthOf(i) : -1; 93 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) { 94 if (j % divisor) continue; 95 numTestStringsJ++; 96 tgtLen = useLen ? source->lengthOf(j) : -1; 97 cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen); 98 } 99 } 100 // At the end, cmp must be 0 101 if (cmp != 0) { 102 *status = U_INTERNAL_PROGRAM_ERROR; 103 } 104 } 105 106 long Strcoll::getOperationsPerIteration() 107 { 108 return maxTestStrings * maxTestStrings; 109 } 110 111 // 112 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index 113 // 114 class Strcoll_2 : public UPerfFunction 115 { 116 public: 117 Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen); 118 ~Strcoll_2(); 119 virtual void call(UErrorCode* status); 120 virtual long getOperationsPerIteration(); 121 122 private: 123 const UCollator *coll; 124 const CA_uchar *source; 125 const CA_uchar *target; 126 UBool useLen; 127 }; 128 129 Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen) 130 : coll(coll), 131 source(source), 132 target(target), 133 useLen(useLen) 134 { 135 } 136 137 Strcoll_2::~Strcoll_2() 138 { 139 } 140 141 void Strcoll_2::call(UErrorCode* status) 142 { 143 if (U_FAILURE(*status)) return; 144 145 // call strcoll for two strings at the same index 146 if (source->count < target->count) { 147 *status = U_ILLEGAL_ARGUMENT_ERROR; 148 } else { 149 for (int32_t i = 0; i < source->count; i++) { 150 int32_t srcLen = useLen ? source->lengthOf(i) : -1; 151 int32_t tgtLen = useLen ? target->lengthOf(i) : -1; 152 ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen); 153 } 154 } 155 } 156 157 long Strcoll_2::getOperationsPerIteration() 158 { 159 return source->count; 160 } 161 162 163 // 164 // Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data 165 // 166 class StrcollUTF8 : public UPerfFunction 167 { 168 public: 169 StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen); 170 ~StrcollUTF8(); 171 virtual void call(UErrorCode* status); 172 virtual long getOperationsPerIteration(); 173 174 private: 175 const UCollator *coll; 176 const CA_char *source; 177 UBool useLen; 178 int32_t maxTestStrings; 179 }; 180 181 StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen) 182 : coll(coll), 183 source(source), 184 useLen(useLen) 185 { 186 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 187 } 188 189 StrcollUTF8::~StrcollUTF8() 190 { 191 } 192 193 void StrcollUTF8::call(UErrorCode* status) 194 { 195 if (U_FAILURE(*status)) return; 196 197 // call strcollUTF8 for permutation 198 int32_t divisor = source->count / maxTestStrings; 199 int32_t srcLen, tgtLen; 200 int32_t cmp = 0; 201 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) { 202 if (i % divisor) continue; 203 numTestStringsI++; 204 srcLen = useLen ? source->lengthOf(i) : -1; 205 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) { 206 if (j % divisor) continue; 207 numTestStringsJ++; 208 tgtLen = useLen ? source->lengthOf(j) : -1; 209 cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status); 210 } 211 } 212 // At the end, cmp must be 0 213 if (cmp != 0) { 214 *status = U_INTERNAL_PROGRAM_ERROR; 215 } 216 } 217 218 long StrcollUTF8::getOperationsPerIteration() 219 { 220 return maxTestStrings * maxTestStrings; 221 } 222 223 // 224 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index 225 // 226 class StrcollUTF8_2 : public UPerfFunction 227 { 228 public: 229 StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen); 230 ~StrcollUTF8_2(); 231 virtual void call(UErrorCode* status); 232 virtual long getOperationsPerIteration(); 233 234 private: 235 const UCollator *coll; 236 const CA_char *source; 237 const CA_char *target; 238 UBool useLen; 239 }; 240 241 StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen) 242 : coll(coll), 243 source(source), 244 target(target), 245 useLen(useLen) 246 { 247 } 248 249 StrcollUTF8_2::~StrcollUTF8_2() 250 { 251 } 252 253 void StrcollUTF8_2::call(UErrorCode* status) 254 { 255 if (U_FAILURE(*status)) return; 256 257 // call strcoll for two strings at the same index 258 if (source->count < target->count) { 259 *status = U_ILLEGAL_ARGUMENT_ERROR; 260 } else { 261 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) { 262 int32_t srcLen = useLen ? source->lengthOf(i) : -1; 263 int32_t tgtLen = useLen ? target->lengthOf(i) : -1; 264 ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status); 265 } 266 } 267 } 268 269 long StrcollUTF8_2::getOperationsPerIteration() 270 { 271 return source->count; 272 } 273 274 // 275 // Test case taking a single test data array, calling ucol_getSortKey for each 276 // 277 class GetSortKey : public UPerfFunction 278 { 279 public: 280 GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen); 281 ~GetSortKey(); 282 virtual void call(UErrorCode* status); 283 virtual long getOperationsPerIteration(); 284 285 private: 286 const UCollator *coll; 287 const CA_uchar *source; 288 UBool useLen; 289 }; 290 291 GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen) 292 : coll(coll), 293 source(source), 294 useLen(useLen) 295 { 296 } 297 298 GetSortKey::~GetSortKey() 299 { 300 } 301 302 #define KEY_BUF_SIZE 512 303 304 void GetSortKey::call(UErrorCode* status) 305 { 306 if (U_FAILURE(*status)) return; 307 308 uint8_t key[KEY_BUF_SIZE]; 309 int32_t len; 310 311 if (useLen) { 312 for (int32_t i = 0; i < source->count; i++) { 313 len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE); 314 } 315 } else { 316 for (int32_t i = 0; i < source->count; i++) { 317 len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE); 318 } 319 } 320 } 321 322 long GetSortKey::getOperationsPerIteration() 323 { 324 return source->count; 325 } 326 327 // 328 // Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the 329 // given buffer size 330 // 331 class NextSortKeyPart : public UPerfFunction 332 { 333 public: 334 NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1); 335 ~NextSortKeyPart(); 336 virtual void call(UErrorCode* status); 337 virtual long getOperationsPerIteration(); 338 virtual long getEventsPerIteration(); 339 340 private: 341 const UCollator *coll; 342 const CA_uchar *source; 343 int32_t bufSize; 344 int32_t maxIteration; 345 long events; 346 }; 347 348 // Note: maxIteration = -1 -> repeat until the end of collation key 349 NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */) 350 : coll(coll), 351 source(source), 352 bufSize(bufSize), 353 maxIteration(maxIteration), 354 events(0) 355 { 356 } 357 358 NextSortKeyPart::~NextSortKeyPart() 359 { 360 } 361 362 void NextSortKeyPart::call(UErrorCode* status) 363 { 364 if (U_FAILURE(*status)) return; 365 366 uint8_t *part = (uint8_t *)malloc(bufSize); 367 uint32_t state[2]; 368 UCharIterator iter; 369 370 events = 0; 371 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) { 372 uiter_setString(&iter, source->dataOf(i), source->lengthOf(i)); 373 state[0] = 0; 374 state[1] = 0; 375 int32_t partLen = bufSize; 376 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) { 377 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status); 378 events++; 379 } 380 } 381 free(part); 382 } 383 384 long NextSortKeyPart::getOperationsPerIteration() 385 { 386 return source->count; 387 } 388 389 long NextSortKeyPart::getEventsPerIteration() 390 { 391 return events; 392 } 393 394 // 395 // Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the 396 // given buffer size 397 // 398 class NextSortKeyPartUTF8 : public UPerfFunction 399 { 400 public: 401 NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1); 402 ~NextSortKeyPartUTF8(); 403 virtual void call(UErrorCode* status); 404 virtual long getOperationsPerIteration(); 405 virtual long getEventsPerIteration(); 406 407 private: 408 const UCollator *coll; 409 const CA_char *source; 410 int32_t bufSize; 411 int32_t maxIteration; 412 long events; 413 }; 414 415 // Note: maxIteration = -1 -> repeat until the end of collation key 416 NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */) 417 : coll(coll), 418 source(source), 419 bufSize(bufSize), 420 maxIteration(maxIteration), 421 events(0) 422 { 423 } 424 425 NextSortKeyPartUTF8::~NextSortKeyPartUTF8() 426 { 427 } 428 429 void NextSortKeyPartUTF8::call(UErrorCode* status) 430 { 431 if (U_FAILURE(*status)) return; 432 433 uint8_t *part = (uint8_t *)malloc(bufSize); 434 uint32_t state[2]; 435 UCharIterator iter; 436 437 events = 0; 438 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) { 439 uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i)); 440 state[0] = 0; 441 state[1] = 0; 442 int32_t partLen = bufSize; 443 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) { 444 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status); 445 events++; 446 } 447 } 448 free(part); 449 } 450 451 long NextSortKeyPartUTF8::getOperationsPerIteration() 452 { 453 return source->count; 454 } 455 456 long NextSortKeyPartUTF8::getEventsPerIteration() 457 { 458 return events; 459 } 460 461 // CPP API test cases 462 463 // 464 // Test case taking a single test data array, calling Collator::compare by permuting the test data 465 // 466 class CppCompare : public UPerfFunction 467 { 468 public: 469 CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen); 470 ~CppCompare(); 471 virtual void call(UErrorCode* status); 472 virtual long getOperationsPerIteration(); 473 474 private: 475 const Collator *coll; 476 const CA_uchar *source; 477 UBool useLen; 478 int32_t maxTestStrings; 479 }; 480 481 CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen) 482 : coll(coll), 483 source(source), 484 useLen(useLen) 485 { 486 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 487 } 488 489 CppCompare::~CppCompare() 490 { 491 } 492 493 void CppCompare::call(UErrorCode* status) { 494 if (U_FAILURE(*status)) return; 495 496 // call compare for permutation of test data 497 int32_t divisor = source->count / maxTestStrings; 498 int32_t srcLen, tgtLen; 499 int32_t cmp = 0; 500 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) { 501 if (i % divisor) continue; 502 numTestStringsI++; 503 srcLen = useLen ? source->lengthOf(i) : -1; 504 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) { 505 if (j % divisor) continue; 506 numTestStringsJ++; 507 tgtLen = useLen ? source->lengthOf(j) : -1; 508 cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen); 509 } 510 } 511 // At the end, cmp must be 0 512 if (cmp != 0) { 513 *status = U_INTERNAL_PROGRAM_ERROR; 514 } 515 } 516 517 long CppCompare::getOperationsPerIteration() 518 { 519 return maxTestStrings * maxTestStrings; 520 } 521 522 // 523 // Test case taking two test data arrays, calling Collator::compare for strings at a same index 524 // 525 class CppCompare_2 : public UPerfFunction 526 { 527 public: 528 CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen); 529 ~CppCompare_2(); 530 virtual void call(UErrorCode* status); 531 virtual long getOperationsPerIteration(); 532 533 private: 534 const Collator *coll; 535 const CA_uchar *source; 536 const CA_uchar *target; 537 UBool useLen; 538 }; 539 540 CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen) 541 : coll(coll), 542 source(source), 543 target(target), 544 useLen(useLen) 545 { 546 } 547 548 CppCompare_2::~CppCompare_2() 549 { 550 } 551 552 void CppCompare_2::call(UErrorCode* status) { 553 if (U_FAILURE(*status)) return; 554 555 // call strcoll for two strings at the same index 556 if (source->count < target->count) { 557 *status = U_ILLEGAL_ARGUMENT_ERROR; 558 } else { 559 for (int32_t i = 0; i < source->count; i++) { 560 int32_t srcLen = useLen ? source->lengthOf(i) : -1; 561 int32_t tgtLen = useLen ? target->lengthOf(i) : -1; 562 coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen); 563 } 564 } 565 } 566 567 long CppCompare_2::getOperationsPerIteration() 568 { 569 return source->count; 570 } 571 572 573 // 574 // Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data 575 // 576 class CppCompareUTF8 : public UPerfFunction 577 { 578 public: 579 CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen); 580 ~CppCompareUTF8(); 581 virtual void call(UErrorCode* status); 582 virtual long getOperationsPerIteration(); 583 584 private: 585 const Collator *coll; 586 const CA_char *source; 587 UBool useLen; 588 int32_t maxTestStrings; 589 }; 590 591 CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen) 592 : coll(coll), 593 source(source), 594 useLen(useLen) 595 { 596 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count; 597 } 598 599 CppCompareUTF8::~CppCompareUTF8() 600 { 601 } 602 603 void CppCompareUTF8::call(UErrorCode* status) { 604 if (U_FAILURE(*status)) return; 605 606 // call compareUTF8 for all permutations 607 int32_t divisor = source->count / maxTestStrings; 608 StringPiece src, tgt; 609 int32_t cmp = 0; 610 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) { 611 if (i % divisor) continue; 612 numTestStringsI++; 613 614 if (useLen) { 615 src.set(source->dataOf(i), source->lengthOf(i)); 616 } else { 617 src.set(source->dataOf(i)); 618 } 619 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) { 620 if (j % divisor) continue; 621 numTestStringsJ++; 622 623 if (useLen) { 624 tgt.set(source->dataOf(i), source->lengthOf(i)); 625 } else { 626 tgt.set(source->dataOf(i)); 627 } 628 cmp += coll->compareUTF8(src, tgt, *status); 629 } 630 } 631 // At the end, cmp must be 0 632 if (cmp != 0) { 633 *status = U_INTERNAL_PROGRAM_ERROR; 634 } 635 } 636 637 long CppCompareUTF8::getOperationsPerIteration() 638 { 639 return maxTestStrings * maxTestStrings; 640 } 641 642 643 // 644 // Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index 645 // 646 class CppCompareUTF8_2 : public UPerfFunction 647 { 648 public: 649 CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen); 650 ~CppCompareUTF8_2(); 651 virtual void call(UErrorCode* status); 652 virtual long getOperationsPerIteration(); 653 654 private: 655 const Collator *coll; 656 const CA_char *source; 657 const CA_char *target; 658 UBool useLen; 659 }; 660 661 CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen) 662 : coll(coll), 663 source(source), 664 target(target), 665 useLen(useLen) 666 { 667 } 668 669 CppCompareUTF8_2::~CppCompareUTF8_2() 670 { 671 } 672 673 void CppCompareUTF8_2::call(UErrorCode* status) { 674 if (U_FAILURE(*status)) return; 675 676 // call strcoll for two strings at the same index 677 StringPiece src, tgt; 678 if (source->count < target->count) { 679 *status = U_ILLEGAL_ARGUMENT_ERROR; 680 } else { 681 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) { 682 if (useLen) { 683 src.set(source->dataOf(i), source->lengthOf(i)); 684 tgt.set(target->dataOf(i), target->lengthOf(i)); 685 } else { 686 src.set(source->dataOf(i)); 687 tgt.set(target->dataOf(i)); 688 } 689 coll->compareUTF8(src, tgt, *status); 690 } 691 } 692 } 693 694 long CppCompareUTF8_2::getOperationsPerIteration() 695 { 696 return source->count; 697 } 698 699 700 // 701 // Test case taking a single test data array, calling Collator::getCollationKey for each 702 // 703 class CppGetCollationKey : public UPerfFunction 704 { 705 public: 706 CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen); 707 ~CppGetCollationKey(); 708 virtual void call(UErrorCode* status); 709 virtual long getOperationsPerIteration(); 710 711 private: 712 const Collator *coll; 713 const CA_uchar *source; 714 UBool useLen; 715 }; 716 717 CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen) 718 : coll(coll), 719 source(source), 720 useLen(useLen) 721 { 722 } 723 724 CppGetCollationKey::~CppGetCollationKey() 725 { 726 } 727 728 void CppGetCollationKey::call(UErrorCode* status) 729 { 730 if (U_FAILURE(*status)) return; 731 732 CollationKey key; 733 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) { 734 coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status); 735 } 736 } 737 738 long CppGetCollationKey::getOperationsPerIteration() { 739 return source->count; 740 } 741 742 namespace { 743 744 struct CollatorAndCounter { 745 CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {} 746 CollatorAndCounter(const Collator& coll, const UCollator *ucoll) 747 : coll(coll), ucoll(ucoll), counter(0) {} 748 const Collator& coll; 749 const UCollator *ucoll; 750 int32_t counter; 751 }; 752 753 int32_t U_CALLCONV 754 UniStrCollatorComparator(const void* context, const void* left, const void* right) { 755 CollatorAndCounter& cc = *(CollatorAndCounter*)context; 756 const UnicodeString& leftString = **(const UnicodeString**)left; 757 const UnicodeString& rightString = **(const UnicodeString**)right; 758 UErrorCode errorCode = U_ZERO_ERROR; 759 ++cc.counter; 760 return cc.coll.compare(leftString, rightString, errorCode); 761 } 762 763 } // namespace 764 765 class CollPerfFunction : public UPerfFunction { 766 public: 767 CollPerfFunction(const Collator& coll, const UCollator *ucoll) 768 : coll(coll), ucoll(ucoll), ops(0) {} 769 virtual ~CollPerfFunction(); 770 /** Calls call() to set the ops field, and returns that. */ 771 virtual long getOperationsPerIteration(); 772 773 protected: 774 const Collator& coll; 775 const UCollator *ucoll; 776 int32_t ops; 777 }; 778 779 CollPerfFunction::~CollPerfFunction() {} 780 781 long CollPerfFunction::getOperationsPerIteration() { 782 UErrorCode errorCode = U_ZERO_ERROR; 783 call(&errorCode); 784 return U_SUCCESS(errorCode) ? ops : 0; 785 } 786 787 class UniStrCollPerfFunction : public CollPerfFunction { 788 public: 789 UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16) 790 : CollPerfFunction(coll, ucoll), d16(data16), 791 source(new UnicodeString*[d16->count]) { 792 for (int32_t i = 0; i < d16->count; ++i) { 793 source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i)); 794 } 795 } 796 virtual ~UniStrCollPerfFunction(); 797 798 protected: 799 const CA_uchar* d16; 800 UnicodeString** source; 801 }; 802 803 UniStrCollPerfFunction::~UniStrCollPerfFunction() { 804 for (int32_t i = 0; i < d16->count; ++i) { 805 delete source[i]; 806 } 807 delete[] source; 808 } 809 810 // 811 // Test case sorting an array of UnicodeString pointers. 812 // 813 class UniStrSort : public UniStrCollPerfFunction { 814 public: 815 UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16) 816 : UniStrCollPerfFunction(coll, ucoll, data16), 817 dest(new UnicodeString*[d16->count]) {} 818 virtual ~UniStrSort(); 819 virtual void call(UErrorCode* status); 820 821 private: 822 UnicodeString** dest; // aliases only 823 }; 824 825 UniStrSort::~UniStrSort() { 826 delete[] dest; 827 } 828 829 void UniStrSort::call(UErrorCode* status) { 830 if (U_FAILURE(*status)) return; 831 832 CollatorAndCounter cc(coll); 833 int32_t count = d16->count; 834 memcpy(dest, source, count * sizeof(UnicodeString *)); 835 uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *), 836 UniStrCollatorComparator, &cc, TRUE, status); 837 ops = cc.counter; 838 } 839 840 namespace { 841 842 int32_t U_CALLCONV 843 StringPieceCollatorComparator(const void* context, const void* left, const void* right) { 844 CollatorAndCounter& cc = *(CollatorAndCounter*)context; 845 const StringPiece& leftString = *(const StringPiece*)left; 846 const StringPiece& rightString = *(const StringPiece*)right; 847 UErrorCode errorCode = U_ZERO_ERROR; 848 ++cc.counter; 849 return cc.coll.compareUTF8(leftString, rightString, errorCode); 850 } 851 852 int32_t U_CALLCONV 853 StringPieceUCollatorComparator(const void* context, const void* left, const void* right) { 854 CollatorAndCounter& cc = *(CollatorAndCounter*)context; 855 const StringPiece& leftString = *(const StringPiece*)left; 856 const StringPiece& rightString = *(const StringPiece*)right; 857 UErrorCode errorCode = U_ZERO_ERROR; 858 ++cc.counter; 859 return ucol_strcollUTF8(cc.ucoll, 860 leftString.data(), leftString.length(), 861 rightString.data(), rightString.length(), &errorCode); 862 } 863 864 } // namespace 865 866 class StringPieceCollPerfFunction : public CollPerfFunction { 867 public: 868 StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 869 : CollPerfFunction(coll, ucoll), d8(data8), 870 source(new StringPiece[d8->count]) { 871 for (int32_t i = 0; i < d8->count; ++i) { 872 source[i].set(d8->dataOf(i), d8->lengthOf(i)); 873 } 874 } 875 virtual ~StringPieceCollPerfFunction(); 876 877 protected: 878 const CA_char* d8; 879 StringPiece* source; 880 }; 881 882 StringPieceCollPerfFunction::~StringPieceCollPerfFunction() { 883 delete[] source; 884 } 885 886 class StringPieceSort : public StringPieceCollPerfFunction { 887 public: 888 StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 889 : StringPieceCollPerfFunction(coll, ucoll, data8), 890 dest(new StringPiece[d8->count]) {} 891 virtual ~StringPieceSort(); 892 893 protected: 894 StringPiece* dest; 895 }; 896 897 StringPieceSort::~StringPieceSort() { 898 delete[] dest; 899 } 900 901 // 902 // Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8(). 903 // 904 class StringPieceSortCpp : public StringPieceSort { 905 public: 906 StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 907 : StringPieceSort(coll, ucoll, data8) {} 908 virtual ~StringPieceSortCpp(); 909 virtual void call(UErrorCode* status); 910 }; 911 912 StringPieceSortCpp::~StringPieceSortCpp() {} 913 914 void StringPieceSortCpp::call(UErrorCode* status) { 915 if (U_FAILURE(*status)) return; 916 917 CollatorAndCounter cc(coll); 918 int32_t count = d8->count; 919 memcpy(dest, source, count * sizeof(StringPiece)); 920 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece), 921 StringPieceCollatorComparator, &cc, TRUE, status); 922 ops = cc.counter; 923 } 924 925 // 926 // Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8(). 927 // 928 class StringPieceSortC : public StringPieceSort { 929 public: 930 StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 931 : StringPieceSort(coll, ucoll, data8) {} 932 virtual ~StringPieceSortC(); 933 virtual void call(UErrorCode* status); 934 }; 935 936 StringPieceSortC::~StringPieceSortC() {} 937 938 void StringPieceSortC::call(UErrorCode* status) { 939 if (U_FAILURE(*status)) return; 940 941 CollatorAndCounter cc(coll, ucoll); 942 int32_t count = d8->count; 943 memcpy(dest, source, count * sizeof(StringPiece)); 944 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece), 945 StringPieceUCollatorComparator, &cc, TRUE, status); 946 ops = cc.counter; 947 } 948 949 // 950 // Test case performing binary searches in a sorted array of UnicodeString pointers. 951 // 952 class UniStrBinSearch : public UniStrCollPerfFunction { 953 public: 954 UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16) 955 : UniStrCollPerfFunction(coll, ucoll, data16) {} 956 virtual ~UniStrBinSearch(); 957 virtual void call(UErrorCode* status); 958 }; 959 960 UniStrBinSearch::~UniStrBinSearch() {} 961 962 void UniStrBinSearch::call(UErrorCode* status) { 963 if (U_FAILURE(*status)) return; 964 965 CollatorAndCounter cc(coll); 966 int32_t count = d16->count; 967 for (int32_t i = 0; i < count; ++i) { 968 (void)uprv_stableBinarySearch((char *)source, count, 969 source + i, (int32_t)sizeof(UnicodeString *), 970 UniStrCollatorComparator, &cc); 971 } 972 ops = cc.counter; 973 } 974 975 class StringPieceBinSearch : public StringPieceCollPerfFunction { 976 public: 977 StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 978 : StringPieceCollPerfFunction(coll, ucoll, data8) {} 979 virtual ~StringPieceBinSearch(); 980 }; 981 982 StringPieceBinSearch::~StringPieceBinSearch() {} 983 984 // 985 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's 986 // with Collator::compareUTF8(). 987 // 988 class StringPieceBinSearchCpp : public StringPieceBinSearch { 989 public: 990 StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 991 : StringPieceBinSearch(coll, ucoll, data8) {} 992 virtual ~StringPieceBinSearchCpp(); 993 virtual void call(UErrorCode* status); 994 }; 995 996 StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {} 997 998 void StringPieceBinSearchCpp::call(UErrorCode* status) { 999 if (U_FAILURE(*status)) return; 1000 1001 CollatorAndCounter cc(coll); 1002 int32_t count = d8->count; 1003 for (int32_t i = 0; i < count; ++i) { 1004 (void)uprv_stableBinarySearch((char *)source, count, 1005 source + i, (int32_t)sizeof(StringPiece), 1006 StringPieceCollatorComparator, &cc); 1007 } 1008 ops = cc.counter; 1009 } 1010 1011 // 1012 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's 1013 // with ucol_strcollUTF8(). 1014 // 1015 class StringPieceBinSearchC : public StringPieceBinSearch { 1016 public: 1017 StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8) 1018 : StringPieceBinSearch(coll, ucoll, data8) {} 1019 virtual ~StringPieceBinSearchC(); 1020 virtual void call(UErrorCode* status); 1021 }; 1022 1023 StringPieceBinSearchC::~StringPieceBinSearchC() {} 1024 1025 void StringPieceBinSearchC::call(UErrorCode* status) { 1026 if (U_FAILURE(*status)) return; 1027 1028 CollatorAndCounter cc(coll, ucoll); 1029 int32_t count = d8->count; 1030 for (int32_t i = 0; i < count; ++i) { 1031 (void)uprv_stableBinarySearch((char *)source, count, 1032 source + i, (int32_t)sizeof(StringPiece), 1033 StringPieceUCollatorComparator, &cc); 1034 } 1035 ops = cc.counter; 1036 } 1037 1038 1039 class CollPerf2Test : public UPerfTest 1040 { 1041 public: 1042 CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status); 1043 ~CollPerf2Test(); 1044 virtual UPerfFunction* runIndexedTest( 1045 int32_t index, UBool exec, const char *&name, char *par = NULL); 1046 1047 private: 1048 UCollator* coll; 1049 Collator* collObj; 1050 1051 int32_t count; 1052 CA_uchar* data16; 1053 CA_char* data8; 1054 1055 CA_uchar* modData16; 1056 CA_char* modData8; 1057 1058 CA_uchar* sortedData16; 1059 CA_char* sortedData8; 1060 1061 CA_uchar* randomData16; 1062 CA_char* randomData8; 1063 1064 const CA_uchar* getData16(UErrorCode &status); 1065 const CA_char* getData8(UErrorCode &status); 1066 1067 const CA_uchar* getModData16(UErrorCode &status); 1068 const CA_char* getModData8(UErrorCode &status); 1069 1070 const CA_uchar* getSortedData16(UErrorCode &status); 1071 const CA_char* getSortedData8(UErrorCode &status); 1072 1073 const CA_uchar* getRandomData16(UErrorCode &status); 1074 const CA_char* getRandomData8(UErrorCode &status); 1075 1076 static CA_uchar* sortData16( 1077 const CA_uchar* d16, 1078 UComparator *cmp, const void *context, 1079 UErrorCode &status); 1080 static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status); 1081 1082 UPerfFunction* TestStrcoll(); 1083 UPerfFunction* TestStrcollNull(); 1084 UPerfFunction* TestStrcollSimilar(); 1085 1086 UPerfFunction* TestStrcollUTF8(); 1087 UPerfFunction* TestStrcollUTF8Null(); 1088 UPerfFunction* TestStrcollUTF8Similar(); 1089 1090 UPerfFunction* TestGetSortKey(); 1091 UPerfFunction* TestGetSortKeyNull(); 1092 1093 UPerfFunction* TestNextSortKeyPart_4All(); 1094 UPerfFunction* TestNextSortKeyPart_4x2(); 1095 UPerfFunction* TestNextSortKeyPart_4x4(); 1096 UPerfFunction* TestNextSortKeyPart_4x8(); 1097 UPerfFunction* TestNextSortKeyPart_32All(); 1098 UPerfFunction* TestNextSortKeyPart_32x2(); 1099 1100 UPerfFunction* TestNextSortKeyPartUTF8_4All(); 1101 UPerfFunction* TestNextSortKeyPartUTF8_4x2(); 1102 UPerfFunction* TestNextSortKeyPartUTF8_4x4(); 1103 UPerfFunction* TestNextSortKeyPartUTF8_4x8(); 1104 UPerfFunction* TestNextSortKeyPartUTF8_32All(); 1105 UPerfFunction* TestNextSortKeyPartUTF8_32x2(); 1106 1107 UPerfFunction* TestCppCompare(); 1108 UPerfFunction* TestCppCompareNull(); 1109 UPerfFunction* TestCppCompareSimilar(); 1110 1111 UPerfFunction* TestCppCompareUTF8(); 1112 UPerfFunction* TestCppCompareUTF8Null(); 1113 UPerfFunction* TestCppCompareUTF8Similar(); 1114 1115 UPerfFunction* TestCppGetCollationKey(); 1116 UPerfFunction* TestCppGetCollationKeyNull(); 1117 1118 UPerfFunction* TestUniStrSort(); 1119 UPerfFunction* TestStringPieceSortCpp(); 1120 UPerfFunction* TestStringPieceSortC(); 1121 1122 UPerfFunction* TestUniStrBinSearch(); 1123 UPerfFunction* TestStringPieceBinSearchCpp(); 1124 UPerfFunction* TestStringPieceBinSearchC(); 1125 }; 1126 1127 CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) : 1128 UPerfTest(argc, argv, status), 1129 coll(NULL), 1130 collObj(NULL), 1131 count(0), 1132 data16(NULL), 1133 data8(NULL), 1134 modData16(NULL), 1135 modData8(NULL), 1136 sortedData16(NULL), 1137 sortedData8(NULL), 1138 randomData16(NULL), 1139 randomData8(NULL) 1140 { 1141 if (U_FAILURE(status)) { 1142 return; 1143 } 1144 1145 if (locale == NULL){ 1146 locale = "en_US"; // set default locale 1147 } 1148 1149 // Set up an ICU collator 1150 coll = ucol_open(locale, &status); 1151 collObj = Collator::createInstance(locale, status); 1152 1153 // Keyword support should be actually a part of ICU collator, see ICU ticket #8260. 1154 char keyBuffer[256]; 1155 UColAttributeValue val; 1156 if (uloc_getKeywordValue(locale, "strength", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1157 if (strcmp(keyBuffer, "primary") == 0) { 1158 val = UCOL_PRIMARY; 1159 } else if (strcmp(keyBuffer, "secondary") == 0) { 1160 val = UCOL_SECONDARY; 1161 } else if (strcmp(keyBuffer, "tertiary") == 0) { 1162 val = UCOL_TERTIARY; 1163 } else if (strcmp(keyBuffer, "quaternary") == 0) { 1164 val = UCOL_QUATERNARY; 1165 } else if (strcmp(keyBuffer, "identical") == 0) { 1166 val = UCOL_IDENTICAL; 1167 } else { 1168 status = U_ILLEGAL_ARGUMENT_ERROR; 1169 } 1170 if (U_SUCCESS(status)) { 1171 ucol_setAttribute(coll, UCOL_STRENGTH, val, &status); 1172 collObj->setAttribute(UCOL_STRENGTH, val, status); 1173 } 1174 } 1175 if (uloc_getKeywordValue(locale, "alternate", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1176 if (strcmp(keyBuffer, "non-ignorable") == 0) { 1177 val = UCOL_NON_IGNORABLE; 1178 } else if (strcmp(keyBuffer, "shifted") == 0) { 1179 val = UCOL_SHIFTED; 1180 } else { 1181 status = U_ILLEGAL_ARGUMENT_ERROR; 1182 } 1183 if (U_SUCCESS(status)) { 1184 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, val, &status); 1185 collObj->setAttribute(UCOL_ALTERNATE_HANDLING, val, status); 1186 } 1187 } 1188 if (uloc_getKeywordValue(locale, "backwards", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1189 if (strcmp(keyBuffer, "on") == 0) { 1190 val = UCOL_ON; 1191 } else if (strcmp(keyBuffer, "off") == 0) { 1192 val = UCOL_OFF; 1193 } else { 1194 status = U_ILLEGAL_ARGUMENT_ERROR; 1195 } 1196 if (U_SUCCESS(status)) { 1197 ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, val, &status); 1198 collObj->setAttribute(UCOL_FRENCH_COLLATION, val, status); 1199 } 1200 } 1201 if (uloc_getKeywordValue(locale, "normalization", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1202 if (strcmp(keyBuffer, "on") == 0) { 1203 val = UCOL_ON; 1204 } else if (strcmp(keyBuffer, "off") == 0) { 1205 val = UCOL_OFF; 1206 } else { 1207 status = U_ILLEGAL_ARGUMENT_ERROR; 1208 } 1209 if (U_SUCCESS(status)) { 1210 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, val, &status); 1211 collObj->setAttribute(UCOL_NORMALIZATION_MODE, val, status); 1212 } 1213 } 1214 if (uloc_getKeywordValue(locale, "caseLevel", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1215 if (strcmp(keyBuffer, "on") == 0) { 1216 val = UCOL_ON; 1217 } else if (strcmp(keyBuffer, "off") == 0) { 1218 val = UCOL_OFF; 1219 } else { 1220 status = U_ILLEGAL_ARGUMENT_ERROR; 1221 } 1222 if (U_SUCCESS(status)) { 1223 ucol_setAttribute(coll, UCOL_CASE_LEVEL, val, &status); 1224 collObj->setAttribute(UCOL_CASE_LEVEL, val, status); 1225 } 1226 } 1227 if (uloc_getKeywordValue(locale, "caseFirst", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1228 if (strcmp(keyBuffer, "upper") == 0) { 1229 val = UCOL_UPPER_FIRST; 1230 } else if (strcmp(keyBuffer, "lower") == 0) { 1231 val = UCOL_LOWER_FIRST; 1232 } else if (strcmp(keyBuffer, "off") == 0) { 1233 val = UCOL_OFF; 1234 } else { 1235 status = U_ILLEGAL_ARGUMENT_ERROR; 1236 } 1237 if (U_SUCCESS(status)) { 1238 ucol_setAttribute(coll, UCOL_CASE_FIRST, val, &status); 1239 collObj->setAttribute(UCOL_CASE_FIRST, val, status); 1240 } 1241 } 1242 if (uloc_getKeywordValue(locale, "hiraganaQuaternary", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1243 if (strcmp(keyBuffer, "on") == 0) { 1244 val = UCOL_ON; 1245 } else if (strcmp(keyBuffer, "off") == 0) { 1246 val = UCOL_OFF; 1247 } else { 1248 status = U_ILLEGAL_ARGUMENT_ERROR; 1249 } 1250 if (U_SUCCESS(status)) { 1251 ucol_setAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, val, &status); 1252 collObj->setAttribute(UCOL_HIRAGANA_QUATERNARY_MODE, val, status); 1253 } 1254 } 1255 if (uloc_getKeywordValue(locale, "numeric", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1256 if (strcmp(keyBuffer, "on") == 0) { 1257 val = UCOL_ON; 1258 } else if (strcmp(keyBuffer, "off") == 0) { 1259 val = UCOL_OFF; 1260 } else { 1261 status = U_ILLEGAL_ARGUMENT_ERROR; 1262 } 1263 if (U_SUCCESS(status)) { 1264 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, val, &status); 1265 collObj->setAttribute(UCOL_NUMERIC_COLLATION, val, status); 1266 } 1267 } 1268 if (uloc_getKeywordValue(locale, "variableTop", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1269 // no support for now 1270 status = U_UNSUPPORTED_ERROR; 1271 } 1272 if (uloc_getKeywordValue(locale, "reorder", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) { 1273 // no support for now 1274 status = U_UNSUPPORTED_ERROR; 1275 } 1276 } 1277 1278 CollPerf2Test::~CollPerf2Test() 1279 { 1280 ucol_close(coll); 1281 delete collObj; 1282 1283 delete data16; 1284 delete data8; 1285 delete modData16; 1286 delete modData8; 1287 delete sortedData16; 1288 delete sortedData8; 1289 delete randomData16; 1290 delete randomData8; 1291 } 1292 1293 #define MAX_NUM_DATA 10000 1294 1295 const CA_uchar* CollPerf2Test::getData16(UErrorCode &status) 1296 { 1297 if (U_FAILURE(status)) return NULL; 1298 if (data16) return data16; 1299 1300 CA_uchar* d16 = new CA_uchar(); 1301 const UChar *line = NULL; 1302 int32_t len = 0; 1303 int32_t numData = 0; 1304 1305 for (;;) { 1306 line = ucbuf_readline(ucharBuf, &len, &status); 1307 if (line == NULL || U_FAILURE(status)) break; 1308 1309 // Refer to the source code of ucbuf_readline() 1310 // 1. 'len' includes the line terminal symbols 1311 // 2. The length of the line terminal symbols is only one character 1312 // 3. The Windows CR LF line terminal symbols will be converted to CR 1313 1314 if (len == 1 || line[0] == 0x23 /* '#' */) { 1315 continue; // skip empty/comment line 1316 } else { 1317 d16->append_one(len); 1318 u_memcpy(d16->last(), line, len); 1319 1320 numData++; 1321 if (numData >= MAX_NUM_DATA) break; 1322 } 1323 } 1324 1325 if (U_SUCCESS(status)) { 1326 data16 = d16; 1327 } else { 1328 delete d16; 1329 } 1330 1331 return data16; 1332 } 1333 1334 const CA_char* CollPerf2Test::getData8(UErrorCode &status) 1335 { 1336 if (U_FAILURE(status)) return NULL; 1337 if (data8) return data8; 1338 return data8 = getData8FromData16(getData16(status), status); 1339 } 1340 1341 const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status) 1342 { 1343 if (U_FAILURE(status)) return NULL; 1344 if (modData16) return modData16; 1345 1346 const CA_uchar* d16 = getData16(status); 1347 if (U_FAILURE(status)) return NULL; 1348 1349 CA_uchar* modData16 = new CA_uchar(); 1350 1351 for (int32_t i = 0; i < d16->count; i++) { 1352 const UChar *s = d16->dataOf(i); 1353 int32_t len = d16->lengthOf(i) + 1; // including NULL terminator 1354 1355 modData16->append_one(len); 1356 u_memcpy(modData16->last(), s, len); 1357 1358 // replacing the last character with a different character 1359 UChar *lastChar = &modData16->last()[len -2]; 1360 for (int32_t j = i + 1; j != i; j++) { 1361 if (j >= d16->count) { 1362 j = 0; 1363 } 1364 const UChar *s1 = d16->dataOf(j); 1365 UChar lastChar1 = s1[d16->lengthOf(j) - 1]; 1366 if (*lastChar != lastChar1) { 1367 *lastChar = lastChar1; 1368 break; 1369 } 1370 } 1371 } 1372 1373 return modData16; 1374 } 1375 1376 const CA_char* CollPerf2Test::getModData8(UErrorCode &status) 1377 { 1378 if (U_FAILURE(status)) return NULL; 1379 if (modData8) return modData8; 1380 return modData8 = getData8FromData16(getModData16(status), status); 1381 } 1382 1383 namespace { 1384 1385 struct ArrayAndColl { 1386 ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {} 1387 const CA_uchar* d16; 1388 const Collator& coll; 1389 }; 1390 1391 int32_t U_CALLCONV 1392 U16CollatorComparator(const void* context, const void* left, const void* right) { 1393 const ArrayAndColl& ac = *(const ArrayAndColl*)context; 1394 const CA_uchar* d16 = ac.d16; 1395 int32_t leftIndex = *(const int32_t*)left; 1396 int32_t rightIndex = *(const int32_t*)right; 1397 UErrorCode errorCode = U_ZERO_ERROR; 1398 return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex), 1399 d16->dataOf(rightIndex), d16->lengthOf(rightIndex), 1400 errorCode); 1401 } 1402 1403 int32_t U_CALLCONV 1404 U16HashComparator(const void* context, const void* left, const void* right) { 1405 const CA_uchar* d16 = (const CA_uchar*)context; 1406 int32_t leftIndex = *(const int32_t*)left; 1407 int32_t rightIndex = *(const int32_t*)right; 1408 int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex)); 1409 int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex)); 1410 return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1; 1411 } 1412 1413 } // namespace 1414 1415 const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) { 1416 if (U_FAILURE(status)) return NULL; 1417 if (sortedData16) return sortedData16; 1418 1419 ArrayAndColl ac(getData16(status), *collObj); 1420 return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status); 1421 } 1422 1423 const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) { 1424 if (U_FAILURE(status)) return NULL; 1425 if (sortedData8) return sortedData8; 1426 return sortedData8 = getData8FromData16(getSortedData16(status), status); 1427 } 1428 1429 const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) { 1430 if (U_FAILURE(status)) return NULL; 1431 if (randomData16) return randomData16; 1432 1433 // Sort the strings by their hash codes, which should be a reasonably pseudo-random order. 1434 const CA_uchar* d16 = getData16(status); 1435 return randomData16 = sortData16(d16, U16HashComparator, d16, status); 1436 } 1437 1438 const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) { 1439 if (U_FAILURE(status)) return NULL; 1440 if (randomData8) return randomData8; 1441 return randomData8 = getData8FromData16(getRandomData16(status), status); 1442 } 1443 1444 CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16, 1445 UComparator *cmp, const void *context, 1446 UErrorCode &status) { 1447 if (U_FAILURE(status)) return NULL; 1448 1449 LocalArray<int32_t> indexes(new int32_t[d16->count]); 1450 for (int32_t i = 0; i < d16->count; ++i) { 1451 indexes[i] = i; 1452 } 1453 uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status); 1454 if (U_FAILURE(status)) return NULL; 1455 1456 // Copy the strings in sorted order into a new array. 1457 LocalPointer<CA_uchar> newD16(new CA_uchar()); 1458 for (int32_t i = 0; i < d16->count; i++) { 1459 const UChar* s = d16->dataOf(i); 1460 int32_t len = d16->lengthOf(i); 1461 int32_t capacity = len + 1; // including NULL terminator 1462 newD16->append_one(capacity); 1463 u_memcpy(newD16->last(), s, capacity); 1464 } 1465 1466 if (U_SUCCESS(status)) { 1467 return newD16.orphan(); 1468 } else { 1469 return NULL; 1470 } 1471 } 1472 1473 CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) { 1474 if (U_FAILURE(status)) return NULL; 1475 1476 // UTF-16 -> UTF-8 conversion 1477 LocalPointer<CA_char> d8(new CA_char()); 1478 for (int32_t i = 0; i < d16->count; i++) { 1479 const UChar *s16 = d16->dataOf(i); 1480 int32_t length16 = d16->lengthOf(i); 1481 1482 // get length in UTF-8 1483 int32_t length8; 1484 u_strToUTF8(NULL, 0, &length8, s16, length16, &status); 1485 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){ 1486 status = U_ZERO_ERROR; 1487 } else { 1488 break; 1489 } 1490 int32_t capacity8 = length8 + 1; // plus terminal NULL 1491 d8->append_one(capacity8); 1492 1493 // convert to UTF-8 1494 u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status); 1495 if (U_FAILURE(status)) break; 1496 } 1497 1498 if (U_SUCCESS(status)) { 1499 return d8.orphan(); 1500 } else { 1501 return NULL; 1502 } 1503 } 1504 1505 UPerfFunction* 1506 CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/) 1507 { 1508 (void)par; 1509 TESTCASE_AUTO_BEGIN; 1510 1511 TESTCASE_AUTO(TestStrcoll); 1512 TESTCASE_AUTO(TestStrcollNull); 1513 TESTCASE_AUTO(TestStrcollSimilar); 1514 1515 TESTCASE_AUTO(TestStrcollUTF8); 1516 TESTCASE_AUTO(TestStrcollUTF8Null); 1517 TESTCASE_AUTO(TestStrcollUTF8Similar); 1518 1519 TESTCASE_AUTO(TestGetSortKey); 1520 TESTCASE_AUTO(TestGetSortKeyNull); 1521 1522 TESTCASE_AUTO(TestNextSortKeyPart_4All); 1523 TESTCASE_AUTO(TestNextSortKeyPart_4x4); 1524 TESTCASE_AUTO(TestNextSortKeyPart_4x8); 1525 TESTCASE_AUTO(TestNextSortKeyPart_32All); 1526 TESTCASE_AUTO(TestNextSortKeyPart_32x2); 1527 1528 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All); 1529 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4); 1530 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8); 1531 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All); 1532 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2); 1533 1534 TESTCASE_AUTO(TestCppCompare); 1535 TESTCASE_AUTO(TestCppCompareNull); 1536 TESTCASE_AUTO(TestCppCompareSimilar); 1537 1538 TESTCASE_AUTO(TestCppCompareUTF8); 1539 TESTCASE_AUTO(TestCppCompareUTF8Null); 1540 TESTCASE_AUTO(TestCppCompareUTF8Similar); 1541 1542 TESTCASE_AUTO(TestCppGetCollationKey); 1543 TESTCASE_AUTO(TestCppGetCollationKeyNull); 1544 1545 TESTCASE_AUTO(TestUniStrSort); 1546 TESTCASE_AUTO(TestStringPieceSortCpp); 1547 TESTCASE_AUTO(TestStringPieceSortC); 1548 1549 TESTCASE_AUTO(TestUniStrBinSearch); 1550 TESTCASE_AUTO(TestStringPieceBinSearchCpp); 1551 TESTCASE_AUTO(TestStringPieceBinSearchC); 1552 1553 TESTCASE_AUTO_END; 1554 return NULL; 1555 } 1556 1557 1558 1559 UPerfFunction* CollPerf2Test::TestStrcoll() 1560 { 1561 UErrorCode status = U_ZERO_ERROR; 1562 Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */); 1563 if (U_FAILURE(status)) { 1564 delete testCase; 1565 return NULL; 1566 } 1567 return testCase; 1568 } 1569 1570 UPerfFunction* CollPerf2Test::TestStrcollNull() 1571 { 1572 UErrorCode status = U_ZERO_ERROR; 1573 Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */); 1574 if (U_FAILURE(status)) { 1575 delete testCase; 1576 return NULL; 1577 } 1578 return testCase; 1579 } 1580 1581 UPerfFunction* CollPerf2Test::TestStrcollSimilar() 1582 { 1583 UErrorCode status = U_ZERO_ERROR; 1584 Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */); 1585 if (U_FAILURE(status)) { 1586 delete testCase; 1587 return NULL; 1588 } 1589 return testCase; 1590 } 1591 1592 UPerfFunction* CollPerf2Test::TestStrcollUTF8() 1593 { 1594 UErrorCode status = U_ZERO_ERROR; 1595 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */); 1596 if (U_FAILURE(status)) { 1597 delete testCase; 1598 return NULL; 1599 } 1600 return testCase; 1601 } 1602 1603 UPerfFunction* CollPerf2Test::TestStrcollUTF8Null() 1604 { 1605 UErrorCode status = U_ZERO_ERROR; 1606 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */); 1607 if (U_FAILURE(status)) { 1608 delete testCase; 1609 return NULL; 1610 } 1611 return testCase; 1612 } 1613 1614 UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar() 1615 { 1616 UErrorCode status = U_ZERO_ERROR; 1617 StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */); 1618 if (U_FAILURE(status)) { 1619 delete testCase; 1620 return NULL; 1621 } 1622 return testCase; 1623 } 1624 1625 UPerfFunction* CollPerf2Test::TestGetSortKey() 1626 { 1627 UErrorCode status = U_ZERO_ERROR; 1628 GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */); 1629 if (U_FAILURE(status)) { 1630 delete testCase; 1631 return NULL; 1632 } 1633 return testCase; 1634 } 1635 1636 UPerfFunction* CollPerf2Test::TestGetSortKeyNull() 1637 { 1638 UErrorCode status = U_ZERO_ERROR; 1639 GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */); 1640 if (U_FAILURE(status)) { 1641 delete testCase; 1642 return NULL; 1643 } 1644 return testCase; 1645 } 1646 1647 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All() 1648 { 1649 UErrorCode status = U_ZERO_ERROR; 1650 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */); 1651 if (U_FAILURE(status)) { 1652 delete testCase; 1653 return NULL; 1654 } 1655 return testCase; 1656 } 1657 1658 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4() 1659 { 1660 UErrorCode status = U_ZERO_ERROR; 1661 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */); 1662 if (U_FAILURE(status)) { 1663 delete testCase; 1664 return NULL; 1665 } 1666 return testCase; 1667 } 1668 1669 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8() 1670 { 1671 UErrorCode status = U_ZERO_ERROR; 1672 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */); 1673 if (U_FAILURE(status)) { 1674 delete testCase; 1675 return NULL; 1676 } 1677 return testCase; 1678 } 1679 1680 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All() 1681 { 1682 UErrorCode status = U_ZERO_ERROR; 1683 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */); 1684 if (U_FAILURE(status)) { 1685 delete testCase; 1686 return NULL; 1687 } 1688 return testCase; 1689 } 1690 1691 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2() 1692 { 1693 UErrorCode status = U_ZERO_ERROR; 1694 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */); 1695 if (U_FAILURE(status)) { 1696 delete testCase; 1697 return NULL; 1698 } 1699 return testCase; 1700 } 1701 1702 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All() 1703 { 1704 UErrorCode status = U_ZERO_ERROR; 1705 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */); 1706 if (U_FAILURE(status)) { 1707 delete testCase; 1708 return NULL; 1709 } 1710 return testCase; 1711 } 1712 1713 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4() 1714 { 1715 UErrorCode status = U_ZERO_ERROR; 1716 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */); 1717 if (U_FAILURE(status)) { 1718 delete testCase; 1719 return NULL; 1720 } 1721 return testCase; 1722 } 1723 1724 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8() 1725 { 1726 UErrorCode status = U_ZERO_ERROR; 1727 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */); 1728 if (U_FAILURE(status)) { 1729 delete testCase; 1730 return NULL; 1731 } 1732 return testCase; 1733 } 1734 1735 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All() 1736 { 1737 UErrorCode status = U_ZERO_ERROR; 1738 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */); 1739 if (U_FAILURE(status)) { 1740 delete testCase; 1741 return NULL; 1742 } 1743 return testCase; 1744 } 1745 1746 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2() 1747 { 1748 UErrorCode status = U_ZERO_ERROR; 1749 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */); 1750 if (U_FAILURE(status)) { 1751 delete testCase; 1752 return NULL; 1753 } 1754 return testCase; 1755 } 1756 1757 UPerfFunction* CollPerf2Test::TestCppCompare() 1758 { 1759 UErrorCode status = U_ZERO_ERROR; 1760 CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */); 1761 if (U_FAILURE(status)) { 1762 delete testCase; 1763 return NULL; 1764 } 1765 return testCase; 1766 } 1767 1768 UPerfFunction* CollPerf2Test::TestCppCompareNull() 1769 { 1770 UErrorCode status = U_ZERO_ERROR; 1771 CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */); 1772 if (U_FAILURE(status)) { 1773 delete testCase; 1774 return NULL; 1775 } 1776 return testCase; 1777 } 1778 1779 UPerfFunction* CollPerf2Test::TestCppCompareSimilar() 1780 { 1781 UErrorCode status = U_ZERO_ERROR; 1782 CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */); 1783 if (U_FAILURE(status)) { 1784 delete testCase; 1785 return NULL; 1786 } 1787 return testCase; 1788 } 1789 1790 UPerfFunction* CollPerf2Test::TestCppCompareUTF8() 1791 { 1792 UErrorCode status = U_ZERO_ERROR; 1793 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */); 1794 if (U_FAILURE(status)) { 1795 delete testCase; 1796 return NULL; 1797 } 1798 return testCase; 1799 } 1800 1801 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null() 1802 { 1803 UErrorCode status = U_ZERO_ERROR; 1804 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */); 1805 if (U_FAILURE(status)) { 1806 delete testCase; 1807 return NULL; 1808 } 1809 return testCase; 1810 } 1811 1812 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar() 1813 { 1814 UErrorCode status = U_ZERO_ERROR; 1815 CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */); 1816 if (U_FAILURE(status)) { 1817 delete testCase; 1818 return NULL; 1819 } 1820 return testCase; 1821 } 1822 1823 UPerfFunction* CollPerf2Test::TestCppGetCollationKey() 1824 { 1825 UErrorCode status = U_ZERO_ERROR; 1826 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */); 1827 if (U_FAILURE(status)) { 1828 delete testCase; 1829 return NULL; 1830 } 1831 return testCase; 1832 } 1833 1834 UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull() 1835 { 1836 UErrorCode status = U_ZERO_ERROR; 1837 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */); 1838 if (U_FAILURE(status)) { 1839 delete testCase; 1840 return NULL; 1841 } 1842 return testCase; 1843 } 1844 1845 UPerfFunction* CollPerf2Test::TestUniStrSort() { 1846 UErrorCode status = U_ZERO_ERROR; 1847 UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status)); 1848 if (U_FAILURE(status)) { 1849 delete testCase; 1850 return NULL; 1851 } 1852 return testCase; 1853 } 1854 1855 UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() { 1856 UErrorCode status = U_ZERO_ERROR; 1857 UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status)); 1858 if (U_FAILURE(status)) { 1859 delete testCase; 1860 return NULL; 1861 } 1862 return testCase; 1863 } 1864 1865 UPerfFunction* CollPerf2Test::TestStringPieceSortC() { 1866 UErrorCode status = U_ZERO_ERROR; 1867 UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status)); 1868 if (U_FAILURE(status)) { 1869 delete testCase; 1870 return NULL; 1871 } 1872 return testCase; 1873 } 1874 1875 UPerfFunction* CollPerf2Test::TestUniStrBinSearch() { 1876 UErrorCode status = U_ZERO_ERROR; 1877 UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status)); 1878 if (U_FAILURE(status)) { 1879 delete testCase; 1880 return NULL; 1881 } 1882 return testCase; 1883 } 1884 1885 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() { 1886 UErrorCode status = U_ZERO_ERROR; 1887 UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status)); 1888 if (U_FAILURE(status)) { 1889 delete testCase; 1890 return NULL; 1891 } 1892 return testCase; 1893 } 1894 1895 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() { 1896 UErrorCode status = U_ZERO_ERROR; 1897 UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status)); 1898 if (U_FAILURE(status)) { 1899 delete testCase; 1900 return NULL; 1901 } 1902 return testCase; 1903 } 1904 1905 1906 int main(int argc, const char *argv[]) 1907 { 1908 UErrorCode status = U_ZERO_ERROR; 1909 CollPerf2Test test(argc, argv, status); 1910 1911 if (U_FAILURE(status)){ 1912 printf("The error is %s\n", u_errorName(status)); 1913 //TODO: print usage here 1914 return status; 1915 } 1916 1917 if (test.run() == FALSE){ 1918 fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n"); 1919 return -1; 1920 } 1921 return 0; 1922 } 1923