Home | History | Annotate | Download | only in collperf
      1 /********************************************************************
      2 * COPYRIGHT:
      3 * Copyright (C) 2001-2012 IBM, Inc.   All Rights Reserved.
      4 *
      5 ********************************************************************/
      6 
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <locale.h>
     10 #include <limits.h>
     11 #include <string.h>
     12 #include "unicode/uperf.h"
     13 #include "uoptions.h"
     14 #include "unicode/coll.h"
     15 #include <unicode/ucoleitr.h>
     16 
     17 #if !U_PLATFORM_HAS_WIN32_API
     18 #define DWORD uint32_t
     19 #define WCHAR wchar_t
     20 #endif
     21 
     22 /* To store an array of string<UNIT> in continue space.
     23 Since string<UNIT> itself is treated as an array of UNIT, this
     24 class will ease our memory management for an array of string<UNIT>.
     25 */
     26 
     27 //template<typename UNIT>
     28 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
     29 struct CompactArrays{\
     30     CompactArrays(const CompactArrays & );\
     31     CompactArrays & operator=(const CompactArrays & );\
     32     int32_t   count;/*total number of the strings*/ \
     33     int32_t * index;/*relative offset in data*/ \
     34     UNIT    * data; /*the real space to hold strings*/ \
     35     \
     36     ~CompactArrays(){free(index);free(data);} \
     37     CompactArrays():data(NULL), index(NULL), count(0){ \
     38     index = (int32_t *) realloc(index, sizeof(int32_t)); \
     39     index[0] = 0; \
     40     } \
     41     void append_one(int32_t theLen){ /*include terminal NULL*/ \
     42     count++; \
     43     index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
     44     index[count] = index[count - 1] + theLen; \
     45     data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
     46     } \
     47     UNIT * last(){return data + index[count - 1];} \
     48     UNIT * dataOf(int32_t i){return data + index[i];} \
     49     int32_t lengthOf(int i){return index[i+1] - index[i] - 1; }	/*exclude terminating NULL*/  \
     50 };
     51 
     52 //typedef CompactArrays<UChar> CA_uchar;
     53 //typedef CompactArrays<char> CA_char;
     54 //typedef CompactArrays<uint8_t> CA_uint8;
     55 //typedef CompactArrays<WCHAR> CA_win_wchar;
     56 
     57 COMPATCT_ARRAY(CA_uchar, UChar)
     58 COMPATCT_ARRAY(CA_char, char)
     59 COMPATCT_ARRAY(CA_uint8, uint8_t)
     60 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
     61 
     62 
     63 struct DataIndex {
     64     static DWORD        win_langid;     // for qsort callback function
     65     static UCollator *  col;            // for qsort callback function
     66     uint8_t *   icu_key;
     67     UChar *     icu_data;
     68     int32_t     icu_data_len;
     69     char*       posix_key;
     70     char*       posix_data;
     71     int32_t     posix_data_len;
     72     char*       win_key;
     73     WCHAR *     win_data;
     74     int32_t     win_data_len;
     75 };
     76 DWORD DataIndex::win_langid;
     77 UCollator * DataIndex::col;
     78 
     79 
     80 
     81 class CmdKeyGen : public UPerfFunction {
     82     typedef	void (CmdKeyGen::* Func)(int32_t);
     83     enum{MAX_KEY_LENGTH = 5000};
     84     UCollator * col;
     85     DWORD       win_langid;
     86     int32_t     count;
     87     DataIndex * data;
     88     Func 	    fn;
     89 
     90     union { // to save sapce
     91         uint8_t		icu_key[MAX_KEY_LENGTH];
     92         char        posix_key[MAX_KEY_LENGTH];
     93         WCHAR		win_key[MAX_KEY_LENGTH];
     94     };
     95 public:
     96     CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
     97         :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
     98 
     99         virtual long getOperationsPerIteration(){return count;}
    100 
    101         virtual void call(UErrorCode* status){
    102             for(int32_t i = 0; i< count; i++){
    103                 (this->*fn)(i);
    104             }
    105         }
    106 
    107         void icu_key_null(int32_t i){
    108             ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
    109         }
    110 
    111         void icu_key_len(int32_t i){
    112             ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
    113         }
    114 
    115 #if U_PLATFORM_HAS_WIN32_API
    116         // pre-generated in CollPerfTest::prepareData(), need not to check error here
    117         void win_key_null(int32_t i){
    118             //LCMAP_SORTsk             0x00000400  // WC sort sk (normalize)
    119             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
    120         }
    121 
    122         void win_key_len(int32_t i){
    123             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
    124         }
    125 #endif
    126 
    127         void posix_key_null(int32_t i){
    128             strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
    129         }
    130 };
    131 
    132 
    133 class CmdIter : public UPerfFunction {
    134     typedef	void (CmdIter::* Func)(UErrorCode* , int32_t );
    135     int32_t             count;
    136     CA_uchar *          data;
    137     Func                fn;
    138     UCollationElements *iter;
    139     int32_t             exec_count;
    140 public:
    141     CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
    142         :count(count), data(data), fn(fn){
    143             exec_count = 0;
    144             UChar dummytext[] = {0, 0};
    145             iter = ucol_openElements(col, NULL, 0, &status);
    146             ucol_setText(iter, dummytext, 1, &status);
    147         }
    148         ~CmdIter(){
    149             ucol_closeElements(iter);
    150         }
    151 
    152         virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
    153 
    154         virtual void call(UErrorCode* status){
    155             exec_count = 0;
    156             for(int32_t i = 0; i< count; i++){
    157                 (this->*fn)(status, i);
    158             }
    159         }
    160 
    161         void icu_forward_null(UErrorCode* status, int32_t i){
    162             ucol_setText(iter, data->dataOf(i), -1, status);
    163             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
    164         }
    165 
    166         void icu_forward_len(UErrorCode* status, int32_t i){
    167             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
    168             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
    169         }
    170 
    171         void icu_backward_null(UErrorCode* status, int32_t i){
    172             ucol_setText(iter, data->dataOf(i), -1, status);
    173             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
    174         }
    175 
    176         void icu_backward_len(UErrorCode* status, int32_t i){
    177             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
    178             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
    179         }
    180 };
    181 
    182 class CmdIterAll : public UPerfFunction {
    183     typedef	void (CmdIterAll::* Func)(UErrorCode* status);
    184     int32_t     count;
    185     UChar *     data;
    186     Func        fn;
    187     UCollationElements *iter;
    188     int32_t     exec_count;
    189 
    190 public:
    191     enum CALL {forward_null, forward_len, backward_null, backward_len};
    192 
    193     ~CmdIterAll(){
    194         ucol_closeElements(iter);
    195     }
    196     CmdIterAll(UErrorCode & status, UCollator * col, int32_t count,  UChar * data, CALL call,int32_t,int32_t)
    197         :count(count),data(data)
    198     {
    199         exec_count = 0;
    200         if (call == forward_null || call == backward_null) {
    201             iter = ucol_openElements(col, data, -1, &status);
    202         } else {
    203             iter = ucol_openElements(col, data, count, &status);
    204         }
    205 
    206         if (call == forward_null || call == forward_len){
    207             fn = &CmdIterAll::icu_forward_all;
    208         } else {
    209             fn = &CmdIterAll::icu_backward_all;
    210         }
    211     }
    212     virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
    213 
    214     virtual void call(UErrorCode* status){
    215         (this->*fn)(status);
    216     }
    217 
    218     void icu_forward_all(UErrorCode* status){
    219         int strlen = count - 5;
    220         int count5 = 5;
    221         int strindex = 0;
    222         ucol_setOffset(iter, strindex, status);
    223         while (TRUE) {
    224             if (ucol_next(iter, status) == UCOL_NULLORDER) {
    225                 break;
    226             }
    227             exec_count++;
    228             count5 --;
    229             if (count5 == 0) {
    230                 strindex += 10;
    231                 if (strindex > strlen) {
    232                     break;
    233                 }
    234                 ucol_setOffset(iter, strindex, status);
    235                 count5 = 5;
    236             }
    237         }
    238     }
    239 
    240     void icu_backward_all(UErrorCode* status){
    241         int strlen = count;
    242         int count5 = 5;
    243         int strindex = 5;
    244         ucol_setOffset(iter, strindex, status);
    245         while (TRUE) {
    246             if (ucol_previous(iter, status) == UCOL_NULLORDER) {
    247                 break;
    248             }
    249             exec_count++;
    250             count5 --;
    251             if (count5 == 0) {
    252                 strindex += 10;
    253                 if (strindex > strlen) {
    254                     break;
    255                 }
    256                 ucol_setOffset(iter, strindex, status);
    257                 count5 = 5;
    258             }
    259         }
    260     }
    261 
    262 };
    263 
    264 struct CmdQsort : public UPerfFunction{
    265 
    266     static int q_random(const void * a, const void * b){
    267         uint8_t * key_a = ((DataIndex *)a)->icu_key;
    268         uint8_t * key_b = ((DataIndex *)b)->icu_key;
    269 
    270         int   val_a = 0;
    271         int   val_b = 0;
    272         while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
    273         while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
    274         return val_a - val_b;
    275     }
    276 
    277 #define QCAST() \
    278     DataIndex * da = (DataIndex *) a; \
    279     DataIndex * db = (DataIndex *) b; \
    280     ++exec_count
    281 
    282     static int icu_strcoll_null(const void *a, const void *b){
    283         QCAST();
    284         return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
    285     }
    286 
    287     static int icu_strcoll_len(const void *a, const void *b){
    288         QCAST();
    289         return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
    290     }
    291 
    292     static int icu_cmpkey (const void *a, const void *b){
    293         QCAST();
    294         return strcmp((char *) da->icu_key, (char *) db->icu_key);
    295     }
    296 
    297 #if U_PLATFORM_HAS_WIN32_API
    298     static int win_cmp_null(const void *a, const void *b) {
    299         QCAST();
    300         //CSTR_LESS_THAN		1
    301         //CSTR_EQUAL			2
    302         //CSTR_GREATER_THAN		3
    303         int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
    304         if (t == 0){
    305             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    306             exit(-1);
    307         } else{
    308             return t - CSTR_EQUAL;
    309         }
    310     }
    311 
    312     static int win_cmp_len(const void *a, const void *b) {
    313         QCAST();
    314         int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
    315         if (t == 0){
    316             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    317             exit(-1);
    318         } else{
    319             return t - CSTR_EQUAL;
    320         }
    321     }
    322 #endif
    323 
    324 #define QFUNC(name, func, data) \
    325     static int name (const void *a, const void *b){ \
    326     QCAST(); \
    327     return func(da->data, db->data); \
    328     }
    329 
    330     QFUNC(posix_strcoll_null, strcoll, posix_data)
    331         QFUNC(posix_cmpkey, strcmp, posix_key)
    332 #if U_PLATFORM_HAS_WIN32_API
    333         QFUNC(win_cmpkey, strcmp, win_key)
    334         QFUNC(win_wcscmp, wcscmp, win_data)
    335 #endif
    336         QFUNC(icu_strcmp, u_strcmp, icu_data)
    337         QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
    338 
    339 private:
    340     static int32_t exec_count; // potential muilt-thread problem
    341 
    342     typedef	int (* Func)(const void *, const void *);
    343 
    344     Func    fn;
    345     void *  base;   //Start of target array.
    346     int32_t num;    //Array size in elements.
    347     int32_t width;  //Element size in bytes.
    348 
    349     void *  backup; //copy source of base
    350 public:
    351     CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
    352         :backup(theBase),num(num),width(width),fn(fn){
    353             base = malloc(num * width);
    354             time_empty(100, &status); // warm memory/cache
    355         }
    356 
    357         ~CmdQsort(){
    358             free(base);
    359         }
    360 
    361         void empty_call(){
    362             exec_count = 0;
    363             memcpy(base, backup, num * width);
    364         }
    365 
    366         double time_empty(int32_t n, UErrorCode* status) {
    367             UTimer start, stop;
    368             utimer_getTime(&start);
    369             while (n-- > 0) {
    370                 empty_call();
    371             }
    372             utimer_getTime(&stop);
    373             return utimer_getDeltaSeconds(&start,&stop); // ms
    374         }
    375 
    376         virtual void call(UErrorCode* status){
    377             exec_count = 0;
    378             memcpy(base, backup, num * width);
    379             qsort(base, num, width, fn);
    380         }
    381         virtual double time(int32_t n, UErrorCode* status) {
    382             double t1 = time_empty(n,status);
    383             double t2 = UPerfFunction::time(n, status);
    384             return  t2-t1;// < 0 ? t2 : t2-t1;
    385         }
    386 
    387         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
    388 };
    389 int32_t CmdQsort::exec_count;
    390 
    391 
    392 class CmdBinSearch : public UPerfFunction{
    393 public:
    394     typedef	int (CmdBinSearch::* Func)(int, int);
    395 
    396     UCollator * col;
    397     DWORD       win_langid;
    398     int32_t     count;
    399     DataIndex * rnd;
    400     DataIndex * ord;
    401     Func 	    fn;
    402     int32_t     exec_count;
    403 
    404     CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
    405         :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
    406 
    407 
    408         virtual void call(UErrorCode* status){
    409             exec_count = 0;
    410             for(int32_t i = 0; i< count; i++){ // search all data
    411                 binary_search(i);
    412             }
    413         }
    414         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
    415 
    416         void binary_search(int32_t random)	{
    417             int low   = 0;
    418             int high  = count - 1;
    419             int guess;
    420             int last_guess = -1;
    421             int r;
    422             while (TRUE) {
    423                 guess = (high + low)/2;
    424                 if (last_guess == guess) break; // nothing to search
    425 
    426                 r = (this->*fn)(random, guess);
    427                 exec_count++;
    428 
    429                 if (r == 0)
    430                     return;	// found, search end.
    431                 if (r < 0) {
    432                     high = guess;
    433                 } else {
    434                     low  = guess;
    435                 }
    436                 last_guess = guess;
    437             }
    438         }
    439 
    440         int icu_strcoll_null(int32_t i, int32_t j){
    441             return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
    442         }
    443 
    444         int icu_strcoll_len(int32_t i, int32_t j){
    445             return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
    446         }
    447 
    448         int icu_cmpkey(int32_t i, int32_t j) {
    449             return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
    450         }
    451 
    452 #if U_PLATFORM_HAS_WIN32_API
    453         int win_cmp_null(int32_t i, int32_t j) {
    454             int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
    455             if (t == 0){
    456                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    457                 exit(-1);
    458             } else{
    459                 return t - CSTR_EQUAL;
    460             }
    461         }
    462 
    463         int win_cmp_len(int32_t i, int32_t j) {
    464             int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
    465             if (t == 0){
    466                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    467                 exit(-1);
    468             } else{
    469                 return t - CSTR_EQUAL;
    470             }
    471         }
    472 #endif
    473 
    474 #define BFUNC(name, func, data) \
    475     int name(int32_t i, int32_t j) { \
    476     return func(rnd[i].data, ord[j].data); \
    477     }
    478 
    479         BFUNC(posix_strcoll_null, strcoll, posix_data)
    480             BFUNC(posix_cmpkey, strcmp, posix_key)
    481             BFUNC(win_cmpkey, strcmp, win_key)
    482             BFUNC(win_wcscmp, wcscmp, win_data)
    483             BFUNC(icu_strcmp, u_strcmp, icu_data)
    484             BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
    485 };
    486 
    487 class CollPerfTest : public UPerfTest {
    488 public:
    489     UCollator *     col;
    490     DWORD           win_langid;
    491 
    492     UChar * icu_data_all;
    493     int32_t icu_data_all_len;
    494 
    495     int32_t         count;
    496     CA_uchar *      icu_data;
    497     CA_uint8 *      icu_key;
    498     CA_char *       posix_data;
    499     CA_char *       posix_key;
    500     CA_win_wchar *  win_data;
    501     CA_char *       win_key;
    502 
    503     DataIndex * rnd_index; // random by icu key
    504     DataIndex * ord_win_data;
    505     DataIndex * ord_win_key;
    506     DataIndex * ord_posix_data;
    507     DataIndex * ord_posix_key;
    508     DataIndex * ord_icu_data;
    509     DataIndex * ord_icu_key;
    510     DataIndex * ord_win_wcscmp;
    511     DataIndex * ord_icu_strcmp;
    512     DataIndex * ord_icu_cmpcpo;
    513 
    514     virtual ~CollPerfTest(){
    515         ucol_close(col);
    516         delete [] icu_data_all;
    517         delete icu_data;
    518         delete icu_key;
    519         delete posix_data;
    520         delete posix_key;
    521         delete win_data;
    522         delete win_key;
    523         delete[] rnd_index;
    524         delete[] ord_win_data;
    525         delete[] ord_win_key;
    526         delete[] ord_posix_data;
    527         delete[] ord_posix_key;
    528         delete[] ord_icu_data;
    529         delete[] ord_icu_key;
    530         delete[] ord_win_wcscmp;
    531         delete[] ord_icu_strcmp;
    532         delete[] ord_icu_cmpcpo;
    533     }
    534 
    535     CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
    536         col = NULL;
    537         icu_data_all = NULL;
    538         icu_data = NULL;
    539         icu_key = NULL;
    540         posix_data = NULL;
    541         posix_key = NULL;
    542         win_data =NULL;
    543         win_key = NULL;
    544 
    545         rnd_index = NULL;
    546         ord_win_data= NULL;
    547         ord_win_key= NULL;
    548         ord_posix_data= NULL;
    549         ord_posix_key= NULL;
    550         ord_icu_data= NULL;
    551         ord_icu_key= NULL;
    552         ord_win_wcscmp = NULL;
    553         ord_icu_strcmp = NULL;
    554         ord_icu_cmpcpo = NULL;
    555 
    556         if (U_FAILURE(status)){
    557             return;
    558         }
    559 
    560         // Parse additional arguments
    561 
    562         UOption options[] = {
    563             UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG),        // Windows Language ID number.
    564                 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG),      // --rulefile <filename>
    565                 // Collation related arguments. All are optional.
    566                 // To simplify parsing, two choice arguments are disigned as NO_ARG.
    567                 // The default value is UPPER word in the comment
    568                 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG),          // --french <on | OFF>
    569                 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG),       // --alternate <NON_IGNORE | shifted>
    570                 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
    571                 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG),       // --caselevel <on | OFF>
    572                 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG),          // --normal <on | OFF>
    573                 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG),  // --strength <1-5>
    574         };
    575         int32_t opt_len = (sizeof(options)/sizeof(options[0]));
    576         enum {i, r,f,a,c,l,n,s};   // The buffer between the option items' order and their references
    577 
    578         _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
    579 
    580         if (_remainingArgc < 0){
    581             status = U_ILLEGAL_ARGUMENT_ERROR;
    582             return;
    583         }
    584 
    585         if (locale == NULL){
    586             locale = "en_US";   // set default locale
    587         }
    588 
    589 #if U_PLATFORM_HAS_WIN32_API
    590         if (options[i].doesOccur) {
    591             char *endp;
    592             int tmp = strtol(options[i].value, &endp, 0);
    593             if (endp == options[i].value) {
    594                 status = U_ILLEGAL_ARGUMENT_ERROR;
    595                 return;
    596             }
    597             win_langid = MAKELCID(tmp, SORT_DEFAULT);
    598         } else {
    599             win_langid = uloc_getLCID(locale);
    600         }
    601 #endif
    602 
    603         //  Set up an ICU collator
    604         if (options[r].doesOccur) {
    605             // TODO: implement it
    606         } else {
    607             col = ucol_open(locale, &status);
    608             if (U_FAILURE(status)) {
    609                 return;
    610             }
    611         }
    612 
    613         if (options[f].doesOccur) {
    614             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
    615         } else {
    616             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
    617         }
    618 
    619         if (options[a].doesOccur) {
    620             ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    621         }
    622 
    623         if (options[c].doesOccur) { // strcmp() has i18n encoding problem
    624             if (strcmp("lower", options[c].value) == 0){
    625                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
    626             } else if (strcmp("upper", options[c].value) == 0) {
    627                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
    628             } else {
    629                 status = U_ILLEGAL_ARGUMENT_ERROR;
    630                 return;
    631             }
    632         }
    633 
    634         if (options[l].doesOccur){
    635             ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
    636         }
    637 
    638         if (options[n].doesOccur){
    639             ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    640         }
    641 
    642         if (options[s].doesOccur) {
    643             char *endp;
    644             int tmp = strtol(options[l].value, &endp, 0);
    645             if (endp == options[l].value) {
    646                 status = U_ILLEGAL_ARGUMENT_ERROR;
    647                 return;
    648             }
    649             switch (tmp) {
    650             case 1:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status);		break;
    651             case 2:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status);		break;
    652             case 3:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);		break;
    653             case 4:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);	break;
    654             case 5:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);		break;
    655             default: status = U_ILLEGAL_ARGUMENT_ERROR;					return;
    656             }
    657         }
    658         prepareData(status);
    659     }
    660 
    661     //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
    662 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
    663     if(temp == index) {\
    664     name = #testname;\
    665     if (exec) {\
    666     UErrorCode status = U_ZERO_ERROR;\
    667     UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
    668     if (U_FAILURE(status)) {\
    669     delete t;\
    670     return NULL;\
    671     } else {\
    672     return t;\
    673     }\
    674     } else {\
    675     return NULL;\
    676     }\
    677     }\
    678     temp++\
    679 
    680 
    681     virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
    682         int temp = 0;
    683 
    684 #define TEST_KEYGEN(testname, func)\
    685     TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
    686         TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
    687         TEST_KEYGEN(TestIcu_KeyGen_len,  icu_key_len);
    688         TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
    689 #if U_PLATFORM_HAS_WIN32_API
    690         TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
    691         TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
    692 #endif
    693 
    694 #define TEST_ITER(testname, func)\
    695     TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
    696         TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
    697         TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
    698         TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
    699         TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
    700 
    701 #define TEST_ITER_ALL(testname, func)\
    702     TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
    703         TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
    704         TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
    705         TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
    706         TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
    707 
    708 #define TEST_QSORT(testname, func)\
    709     TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
    710         TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
    711         TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
    712         TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
    713         TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
    714         TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
    715 #if U_PLATFORM_HAS_WIN32_API
    716         TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
    717         TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
    718         TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
    719 #endif
    720 
    721 #define TEST_BIN(testname, func)\
    722     TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
    723         TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
    724         TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
    725         TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
    726         TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
    727         TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
    728         TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
    729         TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
    730 #if U_PLATFORM_HAS_WIN32_API
    731         TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
    732         TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
    733 #endif
    734         TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
    735         TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
    736 
    737         name="";
    738         return NULL;
    739     }
    740 
    741 
    742 
    743     void prepareData(UErrorCode& status){
    744         if(U_FAILURE(status)) return;
    745         if (icu_data) return; // prepared
    746 
    747         icu_data = new CA_uchar();
    748 
    749         // Following code is borrowed from UPerfTest::getLines();
    750         const UChar*    line=NULL;
    751         int32_t         len =0;
    752         for (;;) {
    753             line = ucbuf_readline(ucharBuf,&len,&status);
    754             if(line == NULL || U_FAILURE(status)){break;}
    755 
    756             // Refer to the source code of ucbuf_readline()
    757             // 1. 'len' includs the line terminal symbols
    758             // 2. The length of the line terminal symbols is only one character
    759             // 3. The Windows CR LF line terminal symbols will be converted to CR
    760 
    761             if (len == 1) {
    762                 continue; //skip empty line
    763             } else {
    764                 icu_data->append_one(len);
    765                 memcpy(icu_data->last(), line, len * sizeof(UChar));
    766                 icu_data->last()[len -1] = NULL;
    767             }
    768         }
    769         if(U_FAILURE(status)) return;
    770 
    771         // UTF-16 -> UTF-8 conversion.
    772         UConverter   *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
    773         if (U_FAILURE(status)) return;
    774 
    775         count = icu_data->count;
    776 
    777         icu_data_all_len =  icu_data->index[count]; // includes all NULLs
    778         icu_data_all_len -= count;  // excludes all NULLs
    779         icu_data_all_len += 1;      // the terminal NULL
    780         icu_data_all = new UChar[icu_data_all_len];
    781         icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
    782 
    783         icu_key  = new CA_uint8;
    784         win_data = new CA_win_wchar;
    785         win_key  = new CA_char;
    786         posix_data = new CA_char;
    787         posix_key = new CA_char;
    788         rnd_index = new DataIndex[count];
    789         DataIndex::win_langid = win_langid;
    790         DataIndex::col        = col;
    791 
    792 
    793         UChar * p = icu_data_all;
    794         int32_t s;
    795         int32_t t;
    796         for (int i=0; i < count; i++) {
    797             // ICU all data
    798             s = sizeof(UChar) * icu_data->lengthOf(i);
    799             memcpy(p, icu_data->dataOf(i), s);
    800             p += icu_data->lengthOf(i);
    801 
    802             // ICU data
    803 
    804             // ICU key
    805             s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
    806             icu_key->append_one(s);
    807             t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
    808             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    809 
    810             // POSIX data
    811             s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
    812             if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
    813                 status = U_ZERO_ERROR;
    814             } else {
    815                 return;
    816             }
    817             posix_data->append_one(s + 1); // plus terminal NULL
    818             t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
    819             if (U_FAILURE(status)) return;
    820             if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
    821             posix_data->last()[s] = 0;
    822 
    823             // POSIX key
    824             s = strxfrm(NULL, posix_data->dataOf(i), 0);
    825             if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
    826             posix_key->append_one(s);
    827             t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
    828             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    829 
    830 #if U_PLATFORM_HAS_WIN32_API
    831             // Win data
    832             s = icu_data->lengthOf(i) + 1; // plus terminal NULL
    833             win_data->append_one(s);
    834             memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
    835 
    836             // Win key
    837             s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
    838             if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
    839             win_key->append_one(s);
    840             t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
    841             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    842 #endif
    843         };
    844 
    845         // append_one() will make points shifting, should not merge following code into previous iteration
    846         for (int i=0; i < count; i++) {
    847             rnd_index[i].icu_key = icu_key->dataOf(i);
    848             rnd_index[i].icu_data = icu_data->dataOf(i);
    849             rnd_index[i].icu_data_len = icu_data->lengthOf(i);
    850             rnd_index[i].posix_key = posix_key->last();
    851             rnd_index[i].posix_data = posix_data->dataOf(i);
    852             rnd_index[i].posix_data_len = posix_data->lengthOf(i);
    853 #if U_PLATFORM_HAS_WIN32_API
    854             rnd_index[i].win_key = win_key->dataOf(i);
    855             rnd_index[i].win_data = win_data->dataOf(i);
    856             rnd_index[i].win_data_len = win_data->lengthOf(i);
    857 #endif
    858         };
    859 
    860         ucnv_close(conv);
    861         qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
    862 
    863 #define SORT(data, func) \
    864     data = new DataIndex[count];\
    865     memcpy(data, rnd_index, count * sizeof(DataIndex));\
    866     qsort(data, count, sizeof(DataIndex), CmdQsort::func)
    867 
    868         SORT(ord_icu_data, icu_strcoll_len);
    869         SORT(ord_icu_key, icu_cmpkey);
    870         SORT(ord_posix_data, posix_strcoll_null);
    871         SORT(ord_posix_key, posix_cmpkey);
    872 #if U_PLATFORM_HAS_WIN32_API
    873         SORT(ord_win_data, win_cmp_len);
    874         SORT(ord_win_key, win_cmpkey);
    875         SORT(ord_win_wcscmp, win_wcscmp);
    876 #endif
    877         SORT(ord_icu_strcmp, icu_strcmp);
    878         SORT(ord_icu_cmpcpo, icu_cmpcpo);
    879     }
    880 };
    881 
    882 
    883 int main(int argc, const char *argv[])
    884 {
    885 
    886     UErrorCode status = U_ZERO_ERROR;
    887     CollPerfTest test(argc, argv, status);
    888 
    889     if (U_FAILURE(status)){
    890         printf("The error is %s\n", u_errorName(status));
    891         //TODO: print usage here
    892         return status;
    893     }
    894 
    895     if (test.run() == FALSE){
    896         fprintf(stderr, "FAILED: Tests could not be run please check the "
    897             "arguments.\n");
    898         return -1;
    899     }
    900     return 0;
    901 }
    902 
    903