Home | History | Annotate | Download | only in collperf
      1 /***********************************************************************
      2 *  2016 and later: Unicode, Inc. and others.
      3 * License & terms of use: http://www.unicode.org/copyright.html#License
      4 *
      5 ***********************************************************************
      6 ***********************************************************************
      7 * COPYRIGHT:
      8 * Copyright (C) 2001-2016 IBM, Inc.   All Rights Reserved.
      9 *
     10 ***********************************************************************/
     11 
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <locale.h>
     15 #include <limits.h>
     16 #include <string.h>
     17 #include "cmemory.h"
     18 #include "unicode/uperf.h"
     19 #include "uoptions.h"
     20 #include "unicode/coll.h"
     21 #include <unicode/ucoleitr.h>
     22 
     23 #if !U_PLATFORM_HAS_WIN32_API
     24 #define DWORD uint32_t
     25 #define WCHAR wchar_t
     26 #endif
     27 
     28 /* To store an array of string<UNIT> in continue space.
     29 Since string<UNIT> itself is treated as an array of UNIT, this
     30 class will ease our memory management for an array of string<UNIT>.
     31 */
     32 
     33 //template<typename UNIT>
     34 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
     35 struct CompactArrays{\
     36     CompactArrays(const CompactArrays & );\
     37     CompactArrays & operator=(const CompactArrays & );\
     38     int32_t   count;/*total number of the strings*/ \
     39     int32_t * index;/*relative offset in data*/ \
     40     UNIT    * data; /*the real space to hold strings*/ \
     41     \
     42     ~CompactArrays(){free(index);free(data);} \
     43     CompactArrays():data(NULL), index(NULL), count(0){ \
     44     index = (int32_t *) realloc(index, sizeof(int32_t)); \
     45     index[0] = 0; \
     46     } \
     47     void append_one(int32_t theLen){ /*include terminal NULL*/ \
     48     count++; \
     49     index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
     50     index[count] = index[count - 1] + theLen; \
     51     data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
     52     } \
     53     UNIT * last(){return data + index[count - 1];} \
     54     UNIT * dataOf(int32_t i){return data + index[i];} \
     55     int32_t lengthOf(int i){return index[i+1] - index[i] - 1; }	/*exclude terminating NULL*/  \
     56 };
     57 
     58 //typedef CompactArrays<UChar> CA_uchar;
     59 //typedef CompactArrays<char> CA_char;
     60 //typedef CompactArrays<uint8_t> CA_uint8;
     61 //typedef CompactArrays<WCHAR> CA_win_wchar;
     62 
     63 COMPATCT_ARRAY(CA_uchar, UChar)
     64 COMPATCT_ARRAY(CA_char, char)
     65 COMPATCT_ARRAY(CA_uint8, uint8_t)
     66 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
     67 
     68 
     69 struct DataIndex {
     70     static DWORD        win_langid;     // for qsort callback function
     71     static UCollator *  col;            // for qsort callback function
     72     uint8_t *   icu_key;
     73     UChar *     icu_data;
     74     int32_t     icu_data_len;
     75     char*       posix_key;
     76     char*       posix_data;
     77     int32_t     posix_data_len;
     78     char*       win_key;
     79     WCHAR *     win_data;
     80     int32_t     win_data_len;
     81 };
     82 DWORD DataIndex::win_langid;
     83 UCollator * DataIndex::col;
     84 
     85 
     86 
     87 class CmdKeyGen : public UPerfFunction {
     88     typedef	void (CmdKeyGen::* Func)(int32_t);
     89     enum{MAX_KEY_LENGTH = 5000};
     90     UCollator * col;
     91     DWORD       win_langid;
     92     int32_t     count;
     93     DataIndex * data;
     94     Func 	    fn;
     95 
     96     union { // to save sapce
     97         uint8_t		icu_key[MAX_KEY_LENGTH];
     98         char        posix_key[MAX_KEY_LENGTH];
     99         WCHAR		win_key[MAX_KEY_LENGTH];
    100     };
    101 public:
    102     CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
    103         :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
    104 
    105         virtual long getOperationsPerIteration(){return count;}
    106 
    107         virtual void call(UErrorCode* status){
    108             for(int32_t i = 0; i< count; i++){
    109                 (this->*fn)(i);
    110             }
    111         }
    112 
    113         void icu_key_null(int32_t i){
    114             ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
    115         }
    116 
    117         void icu_key_len(int32_t i){
    118             ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
    119         }
    120 
    121 #if U_PLATFORM_HAS_WIN32_API
    122         // pre-generated in CollPerfTest::prepareData(), need not to check error here
    123         void win_key_null(int32_t i){
    124             //LCMAP_SORTsk             0x00000400  // WC sort sk (normalize)
    125             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
    126         }
    127 
    128         void win_key_len(int32_t i){
    129             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
    130         }
    131 #endif
    132 
    133         void posix_key_null(int32_t i){
    134             strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
    135         }
    136 };
    137 
    138 
    139 class CmdIter : public UPerfFunction {
    140     typedef	void (CmdIter::* Func)(UErrorCode* , int32_t );
    141     int32_t             count;
    142     CA_uchar *          data;
    143     Func                fn;
    144     UCollationElements *iter;
    145     int32_t             exec_count;
    146 public:
    147     CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
    148         :count(count), data(data), fn(fn){
    149             exec_count = 0;
    150             UChar dummytext[] = {0, 0};
    151             iter = ucol_openElements(col, NULL, 0, &status);
    152             ucol_setText(iter, dummytext, 1, &status);
    153         }
    154         ~CmdIter(){
    155             ucol_closeElements(iter);
    156         }
    157 
    158         virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
    159 
    160         virtual void call(UErrorCode* status){
    161             exec_count = 0;
    162             for(int32_t i = 0; i< count; i++){
    163                 (this->*fn)(status, i);
    164             }
    165         }
    166 
    167         void icu_forward_null(UErrorCode* status, int32_t i){
    168             ucol_setText(iter, data->dataOf(i), -1, status);
    169             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
    170         }
    171 
    172         void icu_forward_len(UErrorCode* status, int32_t i){
    173             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
    174             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
    175         }
    176 
    177         void icu_backward_null(UErrorCode* status, int32_t i){
    178             ucol_setText(iter, data->dataOf(i), -1, status);
    179             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
    180         }
    181 
    182         void icu_backward_len(UErrorCode* status, int32_t i){
    183             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
    184             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
    185         }
    186 };
    187 
    188 class CmdIterAll : public UPerfFunction {
    189     typedef	void (CmdIterAll::* Func)(UErrorCode* status);
    190     int32_t     count;
    191     UChar *     data;
    192     Func        fn;
    193     UCollationElements *iter;
    194     int32_t     exec_count;
    195 
    196 public:
    197     enum CALL {forward_null, forward_len, backward_null, backward_len};
    198 
    199     ~CmdIterAll(){
    200         ucol_closeElements(iter);
    201     }
    202     CmdIterAll(UErrorCode & status, UCollator * col, int32_t count,  UChar * data, CALL call,int32_t,int32_t)
    203         :count(count),data(data)
    204     {
    205         exec_count = 0;
    206         if (call == forward_null || call == backward_null) {
    207             iter = ucol_openElements(col, data, -1, &status);
    208         } else {
    209             iter = ucol_openElements(col, data, count, &status);
    210         }
    211 
    212         if (call == forward_null || call == forward_len){
    213             fn = &CmdIterAll::icu_forward_all;
    214         } else {
    215             fn = &CmdIterAll::icu_backward_all;
    216         }
    217     }
    218     virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
    219 
    220     virtual void call(UErrorCode* status){
    221         (this->*fn)(status);
    222     }
    223 
    224     void icu_forward_all(UErrorCode* status){
    225         int strlen = count - 5;
    226         int count5 = 5;
    227         int strindex = 0;
    228         ucol_setOffset(iter, strindex, status);
    229         while (TRUE) {
    230             if (ucol_next(iter, status) == UCOL_NULLORDER) {
    231                 break;
    232             }
    233             exec_count++;
    234             count5 --;
    235             if (count5 == 0) {
    236                 strindex += 10;
    237                 if (strindex > strlen) {
    238                     break;
    239                 }
    240                 ucol_setOffset(iter, strindex, status);
    241                 count5 = 5;
    242             }
    243         }
    244     }
    245 
    246     void icu_backward_all(UErrorCode* status){
    247         int strlen = count;
    248         int count5 = 5;
    249         int strindex = 5;
    250         ucol_setOffset(iter, strindex, status);
    251         while (TRUE) {
    252             if (ucol_previous(iter, status) == UCOL_NULLORDER) {
    253                 break;
    254             }
    255             exec_count++;
    256             count5 --;
    257             if (count5 == 0) {
    258                 strindex += 10;
    259                 if (strindex > strlen) {
    260                     break;
    261                 }
    262                 ucol_setOffset(iter, strindex, status);
    263                 count5 = 5;
    264             }
    265         }
    266     }
    267 
    268 };
    269 
    270 struct CmdQsort : public UPerfFunction{
    271 
    272     static int q_random(const void * a, const void * b){
    273         uint8_t * key_a = ((DataIndex *)a)->icu_key;
    274         uint8_t * key_b = ((DataIndex *)b)->icu_key;
    275 
    276         int   val_a = 0;
    277         int   val_b = 0;
    278         while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
    279         while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
    280         return val_a - val_b;
    281     }
    282 
    283 #define QCAST() \
    284     DataIndex * da = (DataIndex *) a; \
    285     DataIndex * db = (DataIndex *) b; \
    286     ++exec_count
    287 
    288     static int icu_strcoll_null(const void *a, const void *b){
    289         QCAST();
    290         return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
    291     }
    292 
    293     static int icu_strcoll_len(const void *a, const void *b){
    294         QCAST();
    295         return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
    296     }
    297 
    298     static int icu_cmpkey (const void *a, const void *b){
    299         QCAST();
    300         return strcmp((char *) da->icu_key, (char *) db->icu_key);
    301     }
    302 
    303 #if U_PLATFORM_HAS_WIN32_API
    304     static int win_cmp_null(const void *a, const void *b) {
    305         QCAST();
    306         //CSTR_LESS_THAN		1
    307         //CSTR_EQUAL			2
    308         //CSTR_GREATER_THAN		3
    309         int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
    310         if (t == 0){
    311             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    312             exit(-1);
    313         } else{
    314             return t - CSTR_EQUAL;
    315         }
    316     }
    317 
    318     static int win_cmp_len(const void *a, const void *b) {
    319         QCAST();
    320         int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
    321         if (t == 0){
    322             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    323             exit(-1);
    324         } else{
    325             return t - CSTR_EQUAL;
    326         }
    327     }
    328 #endif
    329 
    330 #define QFUNC(name, func, data) \
    331     static int name (const void *a, const void *b){ \
    332     QCAST(); \
    333     return func(da->data, db->data); \
    334     }
    335 
    336     QFUNC(posix_strcoll_null, strcoll, posix_data)
    337         QFUNC(posix_cmpkey, strcmp, posix_key)
    338 #if U_PLATFORM_HAS_WIN32_API
    339         QFUNC(win_cmpkey, strcmp, win_key)
    340         QFUNC(win_wcscmp, wcscmp, win_data)
    341 #endif
    342         QFUNC(icu_strcmp, u_strcmp, icu_data)
    343         QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
    344 
    345 private:
    346     static int32_t exec_count; // potential muilt-thread problem
    347 
    348     typedef	int (* Func)(const void *, const void *);
    349 
    350     Func    fn;
    351     void *  base;   //Start of target array.
    352     int32_t num;    //Array size in elements.
    353     int32_t width;  //Element size in bytes.
    354 
    355     void *  backup; //copy source of base
    356 public:
    357     CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
    358         :backup(theBase),num(num),width(width),fn(fn){
    359             base = malloc(num * width);
    360             time_empty(100, &status); // warm memory/cache
    361         }
    362 
    363         ~CmdQsort(){
    364             free(base);
    365         }
    366 
    367         void empty_call(){
    368             exec_count = 0;
    369             memcpy(base, backup, num * width);
    370         }
    371 
    372         double time_empty(int32_t n, UErrorCode* status) {
    373             UTimer start, stop;
    374             utimer_getTime(&start);
    375             while (n-- > 0) {
    376                 empty_call();
    377             }
    378             utimer_getTime(&stop);
    379             return utimer_getDeltaSeconds(&start,&stop); // ms
    380         }
    381 
    382         virtual void call(UErrorCode* status){
    383             exec_count = 0;
    384             memcpy(base, backup, num * width);
    385             qsort(base, num, width, fn);
    386         }
    387         virtual double time(int32_t n, UErrorCode* status) {
    388             double t1 = time_empty(n,status);
    389             double t2 = UPerfFunction::time(n, status);
    390             return  t2-t1;// < 0 ? t2 : t2-t1;
    391         }
    392 
    393         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
    394 };
    395 int32_t CmdQsort::exec_count;
    396 
    397 
    398 class CmdBinSearch : public UPerfFunction{
    399 public:
    400     typedef	int (CmdBinSearch::* Func)(int, int);
    401 
    402     UCollator * col;
    403     DWORD       win_langid;
    404     int32_t     count;
    405     DataIndex * rnd;
    406     DataIndex * ord;
    407     Func 	    fn;
    408     int32_t     exec_count;
    409 
    410     CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
    411         :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
    412 
    413 
    414         virtual void call(UErrorCode* status){
    415             exec_count = 0;
    416             for(int32_t i = 0; i< count; i++){ // search all data
    417                 binary_search(i);
    418             }
    419         }
    420         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
    421 
    422         void binary_search(int32_t random)	{
    423             int low   = 0;
    424             int high  = count - 1;
    425             int guess;
    426             int last_guess = -1;
    427             int r;
    428             while (TRUE) {
    429                 guess = (high + low)/2;
    430                 if (last_guess == guess) break; // nothing to search
    431 
    432                 r = (this->*fn)(random, guess);
    433                 exec_count++;
    434 
    435                 if (r == 0)
    436                     return;	// found, search end.
    437                 if (r < 0) {
    438                     high = guess;
    439                 } else {
    440                     low  = guess;
    441                 }
    442                 last_guess = guess;
    443             }
    444         }
    445 
    446         int icu_strcoll_null(int32_t i, int32_t j){
    447             return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
    448         }
    449 
    450         int icu_strcoll_len(int32_t i, int32_t j){
    451             return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
    452         }
    453 
    454         int icu_cmpkey(int32_t i, int32_t j) {
    455             return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
    456         }
    457 
    458 #if U_PLATFORM_HAS_WIN32_API
    459         int win_cmp_null(int32_t i, int32_t j) {
    460             int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
    461             if (t == 0){
    462                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    463                 exit(-1);
    464             } else{
    465                 return t - CSTR_EQUAL;
    466             }
    467         }
    468 
    469         int win_cmp_len(int32_t i, int32_t j) {
    470             int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
    471             if (t == 0){
    472                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    473                 exit(-1);
    474             } else{
    475                 return t - CSTR_EQUAL;
    476             }
    477         }
    478 #endif
    479 
    480 #define BFUNC(name, func, data) \
    481     int name(int32_t i, int32_t j) { \
    482     return func(rnd[i].data, ord[j].data); \
    483     }
    484 
    485         BFUNC(posix_strcoll_null, strcoll, posix_data)
    486             BFUNC(posix_cmpkey, strcmp, posix_key)
    487             BFUNC(win_cmpkey, strcmp, win_key)
    488             BFUNC(win_wcscmp, wcscmp, win_data)
    489             BFUNC(icu_strcmp, u_strcmp, icu_data)
    490             BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
    491 };
    492 
    493 class CollPerfTest : public UPerfTest {
    494 public:
    495     UCollator *     col;
    496     DWORD           win_langid;
    497 
    498     UChar * icu_data_all;
    499     int32_t icu_data_all_len;
    500 
    501     int32_t         count;
    502     CA_uchar *      icu_data;
    503     CA_uint8 *      icu_key;
    504     CA_char *       posix_data;
    505     CA_char *       posix_key;
    506     CA_win_wchar *  win_data;
    507     CA_char *       win_key;
    508 
    509     DataIndex * rnd_index; // random by icu key
    510     DataIndex * ord_win_data;
    511     DataIndex * ord_win_key;
    512     DataIndex * ord_posix_data;
    513     DataIndex * ord_posix_key;
    514     DataIndex * ord_icu_data;
    515     DataIndex * ord_icu_key;
    516     DataIndex * ord_win_wcscmp;
    517     DataIndex * ord_icu_strcmp;
    518     DataIndex * ord_icu_cmpcpo;
    519 
    520     virtual ~CollPerfTest(){
    521         ucol_close(col);
    522         delete [] icu_data_all;
    523         delete icu_data;
    524         delete icu_key;
    525         delete posix_data;
    526         delete posix_key;
    527         delete win_data;
    528         delete win_key;
    529         delete[] rnd_index;
    530         delete[] ord_win_data;
    531         delete[] ord_win_key;
    532         delete[] ord_posix_data;
    533         delete[] ord_posix_key;
    534         delete[] ord_icu_data;
    535         delete[] ord_icu_key;
    536         delete[] ord_win_wcscmp;
    537         delete[] ord_icu_strcmp;
    538         delete[] ord_icu_cmpcpo;
    539     }
    540 
    541     CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
    542         col = NULL;
    543         icu_data_all = NULL;
    544         icu_data = NULL;
    545         icu_key = NULL;
    546         posix_data = NULL;
    547         posix_key = NULL;
    548         win_data =NULL;
    549         win_key = NULL;
    550 
    551         rnd_index = NULL;
    552         ord_win_data= NULL;
    553         ord_win_key= NULL;
    554         ord_posix_data= NULL;
    555         ord_posix_key= NULL;
    556         ord_icu_data= NULL;
    557         ord_icu_key= NULL;
    558         ord_win_wcscmp = NULL;
    559         ord_icu_strcmp = NULL;
    560         ord_icu_cmpcpo = NULL;
    561 
    562         if (U_FAILURE(status)){
    563             return;
    564         }
    565 
    566         // Parse additional arguments
    567 
    568         UOption options[] = {
    569             UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG),        // Windows Language ID number.
    570                 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG),      // --rulefile <filename>
    571                 // Collation related arguments. All are optional.
    572                 // To simplify parsing, two choice arguments are disigned as NO_ARG.
    573                 // The default value is UPPER word in the comment
    574                 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG),          // --french <on | OFF>
    575                 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG),       // --alternate <NON_IGNORE | shifted>
    576                 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
    577                 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG),       // --caselevel <on | OFF>
    578                 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG),          // --normal <on | OFF>
    579                 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG),  // --strength <1-5>
    580         };
    581         int32_t opt_len = UPRV_LENGTHOF(options);
    582         enum {i, r,f,a,c,l,n,s};   // The buffer between the option items' order and their references
    583 
    584         _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
    585 
    586         if (_remainingArgc < 0){
    587             status = U_ILLEGAL_ARGUMENT_ERROR;
    588             return;
    589         }
    590 
    591         if (locale == NULL){
    592             locale = "en_US";   // set default locale
    593         }
    594 
    595 #if U_PLATFORM_HAS_WIN32_API
    596         if (options[i].doesOccur) {
    597             char *endp;
    598             int tmp = strtol(options[i].value, &endp, 0);
    599             if (endp == options[i].value) {
    600                 status = U_ILLEGAL_ARGUMENT_ERROR;
    601                 return;
    602             }
    603             win_langid = MAKELCID(tmp, SORT_DEFAULT);
    604         } else {
    605             win_langid = uloc_getLCID(locale);
    606         }
    607 #endif
    608 
    609         //  Set up an ICU collator
    610         if (options[r].doesOccur) {
    611             // TODO: implement it
    612         } else {
    613             col = ucol_open(locale, &status);
    614             if (U_FAILURE(status)) {
    615                 return;
    616             }
    617         }
    618 
    619         if (options[f].doesOccur) {
    620             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
    621         } else {
    622             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
    623         }
    624 
    625         if (options[a].doesOccur) {
    626             ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    627         }
    628 
    629         if (options[c].doesOccur) { // strcmp() has i18n encoding problem
    630             if (strcmp("lower", options[c].value) == 0){
    631                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
    632             } else if (strcmp("upper", options[c].value) == 0) {
    633                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
    634             } else {
    635                 status = U_ILLEGAL_ARGUMENT_ERROR;
    636                 return;
    637             }
    638         }
    639 
    640         if (options[l].doesOccur){
    641             ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
    642         }
    643 
    644         if (options[n].doesOccur){
    645             ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    646         }
    647 
    648         if (options[s].doesOccur) {
    649             char *endp;
    650             int tmp = strtol(options[l].value, &endp, 0);
    651             if (endp == options[l].value) {
    652                 status = U_ILLEGAL_ARGUMENT_ERROR;
    653                 return;
    654             }
    655             switch (tmp) {
    656             case 1:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status);		break;
    657             case 2:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status);		break;
    658             case 3:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);		break;
    659             case 4:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);	break;
    660             case 5:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);		break;
    661             default: status = U_ILLEGAL_ARGUMENT_ERROR;					return;
    662             }
    663         }
    664         prepareData(status);
    665     }
    666 
    667     //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
    668 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
    669     if(temp == index) {\
    670     name = #testname;\
    671     if (exec) {\
    672     UErrorCode status = U_ZERO_ERROR;\
    673     UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
    674     if (U_FAILURE(status)) {\
    675     delete t;\
    676     return NULL;\
    677     } else {\
    678     return t;\
    679     }\
    680     } else {\
    681     return NULL;\
    682     }\
    683     }\
    684     temp++\
    685 
    686 
    687     virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
    688         int temp = 0;
    689 
    690 #define TEST_KEYGEN(testname, func)\
    691     TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
    692         TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
    693         TEST_KEYGEN(TestIcu_KeyGen_len,  icu_key_len);
    694         TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
    695 #if U_PLATFORM_HAS_WIN32_API
    696         TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
    697         TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
    698 #endif
    699 
    700 #define TEST_ITER(testname, func)\
    701     TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
    702         TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
    703         TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
    704         TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
    705         TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
    706 
    707 #define TEST_ITER_ALL(testname, func)\
    708     TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
    709         TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
    710         TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
    711         TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
    712         TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
    713 
    714 #define TEST_QSORT(testname, func)\
    715     TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
    716         TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
    717         TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
    718         TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
    719         TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
    720         TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
    721 #if U_PLATFORM_HAS_WIN32_API
    722         TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
    723         TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
    724         TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
    725 #endif
    726 
    727 #define TEST_BIN(testname, func)\
    728     TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
    729         TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
    730         TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
    731         TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
    732         TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
    733         TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
    734         TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
    735         TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
    736 #if U_PLATFORM_HAS_WIN32_API
    737         TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
    738         TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
    739 #endif
    740         TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
    741         TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
    742 
    743         name="";
    744         return NULL;
    745     }
    746 
    747 
    748 
    749     void prepareData(UErrorCode& status){
    750         if(U_FAILURE(status)) return;
    751         if (icu_data) return; // prepared
    752 
    753         icu_data = new CA_uchar();
    754 
    755         // Following code is borrowed from UPerfTest::getLines();
    756         const UChar*    line=NULL;
    757         int32_t         len =0;
    758         for (;;) {
    759             line = ucbuf_readline(ucharBuf,&len,&status);
    760             if(line == NULL || U_FAILURE(status)){break;}
    761 
    762             // Refer to the source code of ucbuf_readline()
    763             // 1. 'len' includs the line terminal symbols
    764             // 2. The length of the line terminal symbols is only one character
    765             // 3. The Windows CR LF line terminal symbols will be converted to CR
    766 
    767             if (len == 1) {
    768                 continue; //skip empty line
    769             } else {
    770                 icu_data->append_one(len);
    771                 memcpy(icu_data->last(), line, len * sizeof(UChar));
    772                 icu_data->last()[len -1] = NULL;
    773             }
    774         }
    775         if(U_FAILURE(status)) return;
    776 
    777         // UTF-16 -> UTF-8 conversion.
    778         UConverter   *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
    779         if (U_FAILURE(status)) return;
    780 
    781         count = icu_data->count;
    782 
    783         icu_data_all_len =  icu_data->index[count]; // includes all NULLs
    784         icu_data_all_len -= count;  // excludes all NULLs
    785         icu_data_all_len += 1;      // the terminal NULL
    786         icu_data_all = new UChar[icu_data_all_len];
    787         icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
    788 
    789         icu_key  = new CA_uint8;
    790         win_data = new CA_win_wchar;
    791         win_key  = new CA_char;
    792         posix_data = new CA_char;
    793         posix_key = new CA_char;
    794         rnd_index = new DataIndex[count];
    795         DataIndex::win_langid = win_langid;
    796         DataIndex::col        = col;
    797 
    798 
    799         UChar * p = icu_data_all;
    800         int32_t s;
    801         int32_t t;
    802         for (int i=0; i < count; i++) {
    803             // ICU all data
    804             s = sizeof(UChar) * icu_data->lengthOf(i);
    805             memcpy(p, icu_data->dataOf(i), s);
    806             p += icu_data->lengthOf(i);
    807 
    808             // ICU data
    809 
    810             // ICU key
    811             s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
    812             icu_key->append_one(s);
    813             t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
    814             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    815 
    816             // POSIX data
    817             s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
    818             if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
    819                 status = U_ZERO_ERROR;
    820             } else {
    821                 return;
    822             }
    823             posix_data->append_one(s + 1); // plus terminal NULL
    824             t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
    825             if (U_FAILURE(status)) return;
    826             if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
    827             posix_data->last()[s] = 0;
    828 
    829             // POSIX key
    830             s = strxfrm(NULL, posix_data->dataOf(i), 0);
    831             if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
    832             posix_key->append_one(s);
    833             t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
    834             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    835 
    836 #if U_PLATFORM_HAS_WIN32_API
    837             // Win data
    838             s = icu_data->lengthOf(i) + 1; // plus terminal NULL
    839             win_data->append_one(s);
    840             memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
    841 
    842             // Win key
    843             s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
    844             if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
    845             win_key->append_one(s);
    846             t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
    847             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    848 #endif
    849         };
    850 
    851         // append_one() will make points shifting, should not merge following code into previous iteration
    852         for (int i=0; i < count; i++) {
    853             rnd_index[i].icu_key = icu_key->dataOf(i);
    854             rnd_index[i].icu_data = icu_data->dataOf(i);
    855             rnd_index[i].icu_data_len = icu_data->lengthOf(i);
    856             rnd_index[i].posix_key = posix_key->last();
    857             rnd_index[i].posix_data = posix_data->dataOf(i);
    858             rnd_index[i].posix_data_len = posix_data->lengthOf(i);
    859 #if U_PLATFORM_HAS_WIN32_API
    860             rnd_index[i].win_key = win_key->dataOf(i);
    861             rnd_index[i].win_data = win_data->dataOf(i);
    862             rnd_index[i].win_data_len = win_data->lengthOf(i);
    863 #endif
    864         };
    865 
    866         ucnv_close(conv);
    867         qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
    868 
    869 #define SORT(data, func) \
    870     data = new DataIndex[count];\
    871     memcpy(data, rnd_index, count * sizeof(DataIndex));\
    872     qsort(data, count, sizeof(DataIndex), CmdQsort::func)
    873 
    874         SORT(ord_icu_data, icu_strcoll_len);
    875         SORT(ord_icu_key, icu_cmpkey);
    876         SORT(ord_posix_data, posix_strcoll_null);
    877         SORT(ord_posix_key, posix_cmpkey);
    878 #if U_PLATFORM_HAS_WIN32_API
    879         SORT(ord_win_data, win_cmp_len);
    880         SORT(ord_win_key, win_cmpkey);
    881         SORT(ord_win_wcscmp, win_wcscmp);
    882 #endif
    883         SORT(ord_icu_strcmp, icu_strcmp);
    884         SORT(ord_icu_cmpcpo, icu_cmpcpo);
    885     }
    886 };
    887 
    888 
    889 int main(int argc, const char *argv[])
    890 {
    891 
    892     UErrorCode status = U_ZERO_ERROR;
    893     CollPerfTest test(argc, argv, status);
    894 
    895     if (U_FAILURE(status)){
    896         printf("The error is %s\n", u_errorName(status));
    897         //TODO: print usage here
    898         return status;
    899     }
    900 
    901     if (test.run() == FALSE){
    902         fprintf(stderr, "FAILED: Tests could not be run please check the "
    903             "arguments.\n");
    904         return -1;
    905     }
    906     return 0;
    907 }
    908 
    909