1 /*********************************************************************** 2 * 2016 and later: Unicode, Inc. and others. 3 * License & terms of use: http://www.unicode.org/copyright.html#License 4 * 5 *********************************************************************** 6 *********************************************************************** 7 * COPYRIGHT: 8 * Copyright (C) 2001-2016 IBM, Inc. All Rights Reserved. 9 * 10 ***********************************************************************/ 11 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <locale.h> 15 #include <limits.h> 16 #include <string.h> 17 #include "cmemory.h" 18 #include "unicode/uperf.h" 19 #include "uoptions.h" 20 #include "unicode/coll.h" 21 #include <unicode/ucoleitr.h> 22 23 #if !U_PLATFORM_HAS_WIN32_API 24 #define DWORD uint32_t 25 #define WCHAR wchar_t 26 #endif 27 28 /* To store an array of string<UNIT> in continue space. 29 Since string<UNIT> itself is treated as an array of UNIT, this 30 class will ease our memory management for an array of string<UNIT>. 31 */ 32 33 //template<typename UNIT> 34 #define COMPATCT_ARRAY(CompactArrays, UNIT) \ 35 struct CompactArrays{\ 36 CompactArrays(const CompactArrays & );\ 37 CompactArrays & operator=(const CompactArrays & );\ 38 int32_t count;/*total number of the strings*/ \ 39 int32_t * index;/*relative offset in data*/ \ 40 UNIT * data; /*the real space to hold strings*/ \ 41 \ 42 ~CompactArrays(){free(index);free(data);} \ 43 CompactArrays():data(NULL), index(NULL), count(0){ \ 44 index = (int32_t *) realloc(index, sizeof(int32_t)); \ 45 index[0] = 0; \ 46 } \ 47 void append_one(int32_t theLen){ /*include terminal NULL*/ \ 48 count++; \ 49 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \ 50 index[count] = index[count - 1] + theLen; \ 51 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \ 52 } \ 53 UNIT * last(){return data + index[count - 1];} \ 54 UNIT * dataOf(int32_t i){return data + index[i];} \ 55 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \ 56 }; 57 58 //typedef CompactArrays<UChar> CA_uchar; 59 //typedef CompactArrays<char> CA_char; 60 //typedef CompactArrays<uint8_t> CA_uint8; 61 //typedef CompactArrays<WCHAR> CA_win_wchar; 62 63 COMPATCT_ARRAY(CA_uchar, UChar) 64 COMPATCT_ARRAY(CA_char, char) 65 COMPATCT_ARRAY(CA_uint8, uint8_t) 66 COMPATCT_ARRAY(CA_win_wchar, WCHAR) 67 68 69 struct DataIndex { 70 static DWORD win_langid; // for qsort callback function 71 static UCollator * col; // for qsort callback function 72 uint8_t * icu_key; 73 UChar * icu_data; 74 int32_t icu_data_len; 75 char* posix_key; 76 char* posix_data; 77 int32_t posix_data_len; 78 char* win_key; 79 WCHAR * win_data; 80 int32_t win_data_len; 81 }; 82 DWORD DataIndex::win_langid; 83 UCollator * DataIndex::col; 84 85 86 87 class CmdKeyGen : public UPerfFunction { 88 typedef void (CmdKeyGen::* Func)(int32_t); 89 enum{MAX_KEY_LENGTH = 5000}; 90 UCollator * col; 91 DWORD win_langid; 92 int32_t count; 93 DataIndex * data; 94 Func fn; 95 96 union { // to save sapce 97 uint8_t icu_key[MAX_KEY_LENGTH]; 98 char posix_key[MAX_KEY_LENGTH]; 99 WCHAR win_key[MAX_KEY_LENGTH]; 100 }; 101 public: 102 CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t) 103 :col(col),win_langid(win_langid), count(count), data(data), fn(fn){} 104 105 virtual long getOperationsPerIteration(){return count;} 106 107 virtual void call(UErrorCode* status){ 108 for(int32_t i = 0; i< count; i++){ 109 (this->*fn)(i); 110 } 111 } 112 113 void icu_key_null(int32_t i){ 114 ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH); 115 } 116 117 void icu_key_len(int32_t i){ 118 ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH); 119 } 120 121 #if U_PLATFORM_HAS_WIN32_API 122 // pre-generated in CollPerfTest::prepareData(), need not to check error here 123 void win_key_null(int32_t i){ 124 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize) 125 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH); 126 } 127 128 void win_key_len(int32_t i){ 129 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH); 130 } 131 #endif 132 133 void posix_key_null(int32_t i){ 134 strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH); 135 } 136 }; 137 138 139 class CmdIter : public UPerfFunction { 140 typedef void (CmdIter::* Func)(UErrorCode* , int32_t ); 141 int32_t count; 142 CA_uchar * data; 143 Func fn; 144 UCollationElements *iter; 145 int32_t exec_count; 146 public: 147 CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t) 148 :count(count), data(data), fn(fn){ 149 exec_count = 0; 150 UChar dummytext[] = {0, 0}; 151 iter = ucol_openElements(col, NULL, 0, &status); 152 ucol_setText(iter, dummytext, 1, &status); 153 } 154 ~CmdIter(){ 155 ucol_closeElements(iter); 156 } 157 158 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;} 159 160 virtual void call(UErrorCode* status){ 161 exec_count = 0; 162 for(int32_t i = 0; i< count; i++){ 163 (this->*fn)(status, i); 164 } 165 } 166 167 void icu_forward_null(UErrorCode* status, int32_t i){ 168 ucol_setText(iter, data->dataOf(i), -1, status); 169 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++; 170 } 171 172 void icu_forward_len(UErrorCode* status, int32_t i){ 173 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status); 174 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++; 175 } 176 177 void icu_backward_null(UErrorCode* status, int32_t i){ 178 ucol_setText(iter, data->dataOf(i), -1, status); 179 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++; 180 } 181 182 void icu_backward_len(UErrorCode* status, int32_t i){ 183 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status); 184 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++; 185 } 186 }; 187 188 class CmdIterAll : public UPerfFunction { 189 typedef void (CmdIterAll::* Func)(UErrorCode* status); 190 int32_t count; 191 UChar * data; 192 Func fn; 193 UCollationElements *iter; 194 int32_t exec_count; 195 196 public: 197 enum CALL {forward_null, forward_len, backward_null, backward_len}; 198 199 ~CmdIterAll(){ 200 ucol_closeElements(iter); 201 } 202 CmdIterAll(UErrorCode & status, UCollator * col, int32_t count, UChar * data, CALL call,int32_t,int32_t) 203 :count(count),data(data) 204 { 205 exec_count = 0; 206 if (call == forward_null || call == backward_null) { 207 iter = ucol_openElements(col, data, -1, &status); 208 } else { 209 iter = ucol_openElements(col, data, count, &status); 210 } 211 212 if (call == forward_null || call == forward_len){ 213 fn = &CmdIterAll::icu_forward_all; 214 } else { 215 fn = &CmdIterAll::icu_backward_all; 216 } 217 } 218 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;} 219 220 virtual void call(UErrorCode* status){ 221 (this->*fn)(status); 222 } 223 224 void icu_forward_all(UErrorCode* status){ 225 int strlen = count - 5; 226 int count5 = 5; 227 int strindex = 0; 228 ucol_setOffset(iter, strindex, status); 229 while (TRUE) { 230 if (ucol_next(iter, status) == UCOL_NULLORDER) { 231 break; 232 } 233 exec_count++; 234 count5 --; 235 if (count5 == 0) { 236 strindex += 10; 237 if (strindex > strlen) { 238 break; 239 } 240 ucol_setOffset(iter, strindex, status); 241 count5 = 5; 242 } 243 } 244 } 245 246 void icu_backward_all(UErrorCode* status){ 247 int strlen = count; 248 int count5 = 5; 249 int strindex = 5; 250 ucol_setOffset(iter, strindex, status); 251 while (TRUE) { 252 if (ucol_previous(iter, status) == UCOL_NULLORDER) { 253 break; 254 } 255 exec_count++; 256 count5 --; 257 if (count5 == 0) { 258 strindex += 10; 259 if (strindex > strlen) { 260 break; 261 } 262 ucol_setOffset(iter, strindex, status); 263 count5 = 5; 264 } 265 } 266 } 267 268 }; 269 270 struct CmdQsort : public UPerfFunction{ 271 272 static int q_random(const void * a, const void * b){ 273 uint8_t * key_a = ((DataIndex *)a)->icu_key; 274 uint8_t * key_b = ((DataIndex *)b)->icu_key; 275 276 int val_a = 0; 277 int val_b = 0; 278 while (*key_a != 0) {val_a += val_a*37 + *key_a++;} 279 while (*key_b != 0) {val_b += val_b*37 + *key_b++;} 280 return val_a - val_b; 281 } 282 283 #define QCAST() \ 284 DataIndex * da = (DataIndex *) a; \ 285 DataIndex * db = (DataIndex *) b; \ 286 ++exec_count 287 288 static int icu_strcoll_null(const void *a, const void *b){ 289 QCAST(); 290 return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL; 291 } 292 293 static int icu_strcoll_len(const void *a, const void *b){ 294 QCAST(); 295 return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL; 296 } 297 298 static int icu_cmpkey (const void *a, const void *b){ 299 QCAST(); 300 return strcmp((char *) da->icu_key, (char *) db->icu_key); 301 } 302 303 #if U_PLATFORM_HAS_WIN32_API 304 static int win_cmp_null(const void *a, const void *b) { 305 QCAST(); 306 //CSTR_LESS_THAN 1 307 //CSTR_EQUAL 2 308 //CSTR_GREATER_THAN 3 309 int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1); 310 if (t == 0){ 311 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError()); 312 exit(-1); 313 } else{ 314 return t - CSTR_EQUAL; 315 } 316 } 317 318 static int win_cmp_len(const void *a, const void *b) { 319 QCAST(); 320 int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len); 321 if (t == 0){ 322 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError()); 323 exit(-1); 324 } else{ 325 return t - CSTR_EQUAL; 326 } 327 } 328 #endif 329 330 #define QFUNC(name, func, data) \ 331 static int name (const void *a, const void *b){ \ 332 QCAST(); \ 333 return func(da->data, db->data); \ 334 } 335 336 QFUNC(posix_strcoll_null, strcoll, posix_data) 337 QFUNC(posix_cmpkey, strcmp, posix_key) 338 #if U_PLATFORM_HAS_WIN32_API 339 QFUNC(win_cmpkey, strcmp, win_key) 340 QFUNC(win_wcscmp, wcscmp, win_data) 341 #endif 342 QFUNC(icu_strcmp, u_strcmp, icu_data) 343 QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data) 344 345 private: 346 static int32_t exec_count; // potential muilt-thread problem 347 348 typedef int (* Func)(const void *, const void *); 349 350 Func fn; 351 void * base; //Start of target array. 352 int32_t num; //Array size in elements. 353 int32_t width; //Element size in bytes. 354 355 void * backup; //copy source of base 356 public: 357 CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t) 358 :backup(theBase),num(num),width(width),fn(fn){ 359 base = malloc(num * width); 360 time_empty(100, &status); // warm memory/cache 361 } 362 363 ~CmdQsort(){ 364 free(base); 365 } 366 367 void empty_call(){ 368 exec_count = 0; 369 memcpy(base, backup, num * width); 370 } 371 372 double time_empty(int32_t n, UErrorCode* status) { 373 UTimer start, stop; 374 utimer_getTime(&start); 375 while (n-- > 0) { 376 empty_call(); 377 } 378 utimer_getTime(&stop); 379 return utimer_getDeltaSeconds(&start,&stop); // ms 380 } 381 382 virtual void call(UErrorCode* status){ 383 exec_count = 0; 384 memcpy(base, backup, num * width); 385 qsort(base, num, width, fn); 386 } 387 virtual double time(int32_t n, UErrorCode* status) { 388 double t1 = time_empty(n,status); 389 double t2 = UPerfFunction::time(n, status); 390 return t2-t1;// < 0 ? t2 : t2-t1; 391 } 392 393 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;} 394 }; 395 int32_t CmdQsort::exec_count; 396 397 398 class CmdBinSearch : public UPerfFunction{ 399 public: 400 typedef int (CmdBinSearch::* Func)(int, int); 401 402 UCollator * col; 403 DWORD win_langid; 404 int32_t count; 405 DataIndex * rnd; 406 DataIndex * ord; 407 Func fn; 408 int32_t exec_count; 409 410 CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn) 411 :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){} 412 413 414 virtual void call(UErrorCode* status){ 415 exec_count = 0; 416 for(int32_t i = 0; i< count; i++){ // search all data 417 binary_search(i); 418 } 419 } 420 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;} 421 422 void binary_search(int32_t random) { 423 int low = 0; 424 int high = count - 1; 425 int guess; 426 int last_guess = -1; 427 int r; 428 while (TRUE) { 429 guess = (high + low)/2; 430 if (last_guess == guess) break; // nothing to search 431 432 r = (this->*fn)(random, guess); 433 exec_count++; 434 435 if (r == 0) 436 return; // found, search end. 437 if (r < 0) { 438 high = guess; 439 } else { 440 low = guess; 441 } 442 last_guess = guess; 443 } 444 } 445 446 int icu_strcoll_null(int32_t i, int32_t j){ 447 return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1); 448 } 449 450 int icu_strcoll_len(int32_t i, int32_t j){ 451 return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len); 452 } 453 454 int icu_cmpkey(int32_t i, int32_t j) { 455 return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key ); 456 } 457 458 #if U_PLATFORM_HAS_WIN32_API 459 int win_cmp_null(int32_t i, int32_t j) { 460 int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1); 461 if (t == 0){ 462 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError()); 463 exit(-1); 464 } else{ 465 return t - CSTR_EQUAL; 466 } 467 } 468 469 int win_cmp_len(int32_t i, int32_t j) { 470 int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len); 471 if (t == 0){ 472 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError()); 473 exit(-1); 474 } else{ 475 return t - CSTR_EQUAL; 476 } 477 } 478 #endif 479 480 #define BFUNC(name, func, data) \ 481 int name(int32_t i, int32_t j) { \ 482 return func(rnd[i].data, ord[j].data); \ 483 } 484 485 BFUNC(posix_strcoll_null, strcoll, posix_data) 486 BFUNC(posix_cmpkey, strcmp, posix_key) 487 BFUNC(win_cmpkey, strcmp, win_key) 488 BFUNC(win_wcscmp, wcscmp, win_data) 489 BFUNC(icu_strcmp, u_strcmp, icu_data) 490 BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data) 491 }; 492 493 class CollPerfTest : public UPerfTest { 494 public: 495 UCollator * col; 496 DWORD win_langid; 497 498 UChar * icu_data_all; 499 int32_t icu_data_all_len; 500 501 int32_t count; 502 CA_uchar * icu_data; 503 CA_uint8 * icu_key; 504 CA_char * posix_data; 505 CA_char * posix_key; 506 CA_win_wchar * win_data; 507 CA_char * win_key; 508 509 DataIndex * rnd_index; // random by icu key 510 DataIndex * ord_win_data; 511 DataIndex * ord_win_key; 512 DataIndex * ord_posix_data; 513 DataIndex * ord_posix_key; 514 DataIndex * ord_icu_data; 515 DataIndex * ord_icu_key; 516 DataIndex * ord_win_wcscmp; 517 DataIndex * ord_icu_strcmp; 518 DataIndex * ord_icu_cmpcpo; 519 520 virtual ~CollPerfTest(){ 521 ucol_close(col); 522 delete [] icu_data_all; 523 delete icu_data; 524 delete icu_key; 525 delete posix_data; 526 delete posix_key; 527 delete win_data; 528 delete win_key; 529 delete[] rnd_index; 530 delete[] ord_win_data; 531 delete[] ord_win_key; 532 delete[] ord_posix_data; 533 delete[] ord_posix_key; 534 delete[] ord_icu_data; 535 delete[] ord_icu_key; 536 delete[] ord_win_wcscmp; 537 delete[] ord_icu_strcmp; 538 delete[] ord_icu_cmpcpo; 539 } 540 541 CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){ 542 col = NULL; 543 icu_data_all = NULL; 544 icu_data = NULL; 545 icu_key = NULL; 546 posix_data = NULL; 547 posix_key = NULL; 548 win_data =NULL; 549 win_key = NULL; 550 551 rnd_index = NULL; 552 ord_win_data= NULL; 553 ord_win_key= NULL; 554 ord_posix_data= NULL; 555 ord_posix_key= NULL; 556 ord_icu_data= NULL; 557 ord_icu_key= NULL; 558 ord_win_wcscmp = NULL; 559 ord_icu_strcmp = NULL; 560 ord_icu_cmpcpo = NULL; 561 562 if (U_FAILURE(status)){ 563 return; 564 } 565 566 // Parse additional arguments 567 568 UOption options[] = { 569 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG), // Windows Language ID number. 570 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG), // --rulefile <filename> 571 // Collation related arguments. All are optional. 572 // To simplify parsing, two choice arguments are disigned as NO_ARG. 573 // The default value is UPPER word in the comment 574 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG), // --french <on | OFF> 575 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG), // --alternate <NON_IGNORE | shifted> 576 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF> 577 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG), // --caselevel <on | OFF> 578 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG), // --normal <on | OFF> 579 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG), // --strength <1-5> 580 }; 581 int32_t opt_len = UPRV_LENGTHOF(options); 582 enum {i, r,f,a,c,l,n,s}; // The buffer between the option items' order and their references 583 584 _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options); 585 586 if (_remainingArgc < 0){ 587 status = U_ILLEGAL_ARGUMENT_ERROR; 588 return; 589 } 590 591 if (locale == NULL){ 592 locale = "en_US"; // set default locale 593 } 594 595 #if U_PLATFORM_HAS_WIN32_API 596 if (options[i].doesOccur) { 597 char *endp; 598 int tmp = strtol(options[i].value, &endp, 0); 599 if (endp == options[i].value) { 600 status = U_ILLEGAL_ARGUMENT_ERROR; 601 return; 602 } 603 win_langid = MAKELCID(tmp, SORT_DEFAULT); 604 } else { 605 win_langid = uloc_getLCID(locale); 606 } 607 #endif 608 609 // Set up an ICU collator 610 if (options[r].doesOccur) { 611 // TODO: implement it 612 } else { 613 col = ucol_open(locale, &status); 614 if (U_FAILURE(status)) { 615 return; 616 } 617 } 618 619 if (options[f].doesOccur) { 620 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 621 } else { 622 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 623 } 624 625 if (options[a].doesOccur) { 626 ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 627 } 628 629 if (options[c].doesOccur) { // strcmp() has i18n encoding problem 630 if (strcmp("lower", options[c].value) == 0){ 631 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status); 632 } else if (strcmp("upper", options[c].value) == 0) { 633 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status); 634 } else { 635 status = U_ILLEGAL_ARGUMENT_ERROR; 636 return; 637 } 638 } 639 640 if (options[l].doesOccur){ 641 ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status); 642 } 643 644 if (options[n].doesOccur){ 645 ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 646 } 647 648 if (options[s].doesOccur) { 649 char *endp; 650 int tmp = strtol(options[l].value, &endp, 0); 651 if (endp == options[l].value) { 652 status = U_ILLEGAL_ARGUMENT_ERROR; 653 return; 654 } 655 switch (tmp) { 656 case 1: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status); break; 657 case 2: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status); break; 658 case 3: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status); break; 659 case 4: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status); break; 660 case 5: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status); break; 661 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 662 } 663 } 664 prepareData(status); 665 } 666 667 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like 668 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \ 669 if(temp == index) {\ 670 name = #testname;\ 671 if (exec) {\ 672 UErrorCode status = U_ZERO_ERROR;\ 673 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\ 674 if (U_FAILURE(status)) {\ 675 delete t;\ 676 return NULL;\ 677 } else {\ 678 return t;\ 679 }\ 680 } else {\ 681 return NULL;\ 682 }\ 683 }\ 684 temp++\ 685 686 687 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){ 688 int temp = 0; 689 690 #define TEST_KEYGEN(testname, func)\ 691 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0) 692 TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null); 693 TEST_KEYGEN(TestIcu_KeyGen_len, icu_key_len); 694 TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null); 695 #if U_PLATFORM_HAS_WIN32_API 696 TEST_KEYGEN(TestWin_KeyGen_null, win_key_null); 697 TEST_KEYGEN(TestWin_KeyGen_len, win_key_len); 698 #endif 699 700 #define TEST_ITER(testname, func)\ 701 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0) 702 TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null); 703 TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len); 704 TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null); 705 TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len); 706 707 #define TEST_ITER_ALL(testname, func)\ 708 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0) 709 TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null); 710 TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len); 711 TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null); 712 TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len); 713 714 #define TEST_QSORT(testname, func)\ 715 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0) 716 TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null); 717 TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len); 718 TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey); 719 TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null); 720 TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey); 721 #if U_PLATFORM_HAS_WIN32_API 722 TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null); 723 TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len); 724 TEST_QSORT(TestWin_qsort_usekey, win_cmpkey); 725 #endif 726 727 #define TEST_BIN(testname, func)\ 728 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func) 729 TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null); 730 TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len); 731 TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey); 732 TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp); 733 TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo); 734 TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null); 735 TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey); 736 #if U_PLATFORM_HAS_WIN32_API 737 TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null); 738 TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len); 739 #endif 740 TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey); 741 TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp); 742 743 name=""; 744 return NULL; 745 } 746 747 748 749 void prepareData(UErrorCode& status){ 750 if(U_FAILURE(status)) return; 751 if (icu_data) return; // prepared 752 753 icu_data = new CA_uchar(); 754 755 // Following code is borrowed from UPerfTest::getLines(); 756 const UChar* line=NULL; 757 int32_t len =0; 758 for (;;) { 759 line = ucbuf_readline(ucharBuf,&len,&status); 760 if(line == NULL || U_FAILURE(status)){break;} 761 762 // Refer to the source code of ucbuf_readline() 763 // 1. 'len' includs the line terminal symbols 764 // 2. The length of the line terminal symbols is only one character 765 // 3. The Windows CR LF line terminal symbols will be converted to CR 766 767 if (len == 1) { 768 continue; //skip empty line 769 } else { 770 icu_data->append_one(len); 771 memcpy(icu_data->last(), line, len * sizeof(UChar)); 772 icu_data->last()[len -1] = NULL; 773 } 774 } 775 if(U_FAILURE(status)) return; 776 777 // UTF-16 -> UTF-8 conversion. 778 UConverter *conv = ucnv_open("utf-8", &status); // just UTF-8 for now. 779 if (U_FAILURE(status)) return; 780 781 count = icu_data->count; 782 783 icu_data_all_len = icu_data->index[count]; // includes all NULLs 784 icu_data_all_len -= count; // excludes all NULLs 785 icu_data_all_len += 1; // the terminal NULL 786 icu_data_all = new UChar[icu_data_all_len]; 787 icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL 788 789 icu_key = new CA_uint8; 790 win_data = new CA_win_wchar; 791 win_key = new CA_char; 792 posix_data = new CA_char; 793 posix_key = new CA_char; 794 rnd_index = new DataIndex[count]; 795 DataIndex::win_langid = win_langid; 796 DataIndex::col = col; 797 798 799 UChar * p = icu_data_all; 800 int32_t s; 801 int32_t t; 802 for (int i=0; i < count; i++) { 803 // ICU all data 804 s = sizeof(UChar) * icu_data->lengthOf(i); 805 memcpy(p, icu_data->dataOf(i), s); 806 p += icu_data->lengthOf(i); 807 808 // ICU data 809 810 // ICU key 811 s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0); 812 icu_key->append_one(s); 813 t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s); 814 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;} 815 816 // POSIX data 817 s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status); 818 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){ 819 status = U_ZERO_ERROR; 820 } else { 821 return; 822 } 823 posix_data->append_one(s + 1); // plus terminal NULL 824 t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status); 825 if (U_FAILURE(status)) return; 826 if ( t != s){status = U_INVALID_FORMAT_ERROR;return;} 827 posix_data->last()[s] = 0; 828 829 // POSIX key 830 s = strxfrm(NULL, posix_data->dataOf(i), 0); 831 if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;} 832 posix_key->append_one(s); 833 t = strxfrm(posix_key->last(), posix_data->dataOf(i), s); 834 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;} 835 836 #if U_PLATFORM_HAS_WIN32_API 837 // Win data 838 s = icu_data->lengthOf(i) + 1; // plus terminal NULL 839 win_data->append_one(s); 840 memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s); 841 842 // Win key 843 s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0); 844 if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;} 845 win_key->append_one(s); 846 t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s); 847 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;} 848 #endif 849 }; 850 851 // append_one() will make points shifting, should not merge following code into previous iteration 852 for (int i=0; i < count; i++) { 853 rnd_index[i].icu_key = icu_key->dataOf(i); 854 rnd_index[i].icu_data = icu_data->dataOf(i); 855 rnd_index[i].icu_data_len = icu_data->lengthOf(i); 856 rnd_index[i].posix_key = posix_key->last(); 857 rnd_index[i].posix_data = posix_data->dataOf(i); 858 rnd_index[i].posix_data_len = posix_data->lengthOf(i); 859 #if U_PLATFORM_HAS_WIN32_API 860 rnd_index[i].win_key = win_key->dataOf(i); 861 rnd_index[i].win_data = win_data->dataOf(i); 862 rnd_index[i].win_data_len = win_data->lengthOf(i); 863 #endif 864 }; 865 866 ucnv_close(conv); 867 qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random); 868 869 #define SORT(data, func) \ 870 data = new DataIndex[count];\ 871 memcpy(data, rnd_index, count * sizeof(DataIndex));\ 872 qsort(data, count, sizeof(DataIndex), CmdQsort::func) 873 874 SORT(ord_icu_data, icu_strcoll_len); 875 SORT(ord_icu_key, icu_cmpkey); 876 SORT(ord_posix_data, posix_strcoll_null); 877 SORT(ord_posix_key, posix_cmpkey); 878 #if U_PLATFORM_HAS_WIN32_API 879 SORT(ord_win_data, win_cmp_len); 880 SORT(ord_win_key, win_cmpkey); 881 SORT(ord_win_wcscmp, win_wcscmp); 882 #endif 883 SORT(ord_icu_strcmp, icu_strcmp); 884 SORT(ord_icu_cmpcpo, icu_cmpcpo); 885 } 886 }; 887 888 889 int main(int argc, const char *argv[]) 890 { 891 892 UErrorCode status = U_ZERO_ERROR; 893 CollPerfTest test(argc, argv, status); 894 895 if (U_FAILURE(status)){ 896 printf("The error is %s\n", u_errorName(status)); 897 //TODO: print usage here 898 return status; 899 } 900 901 if (test.run() == FALSE){ 902 fprintf(stderr, "FAILED: Tests could not be run please check the " 903 "arguments.\n"); 904 return -1; 905 } 906 return 0; 907 } 908 909