Home | History | Annotate | Download | only in collperf
      1 /********************************************************************
      2 * COPYRIGHT:
      3 * Copyright (C) 2001-2006 IBM, Inc.   All Rights Reserved.
      4 *
      5 ********************************************************************/
      6 
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <locale.h>
     10 #include <limits.h>
     11 #include <string.h>
     12 #include "unicode/uperf.h"
     13 #include "uoptions.h"
     14 #include "unicode/coll.h"
     15 #include <unicode/ucoleitr.h>
     16 
     17 
     18 
     19 /* To store an array of string<UNIT> in continue space.
     20 Since string<UNIT> itself is treated as an array of UNIT, this
     21 class will ease our memory management for an array of string<UNIT>.
     22 */
     23 
     24 //template<typename UNIT>
     25 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
     26 struct CompactArrays{\
     27     CompactArrays(const CompactArrays & );\
     28     CompactArrays & operator=(const CompactArrays & );\
     29     int32_t   count;/*total number of the strings*/ \
     30     int32_t * index;/*relative offset in data*/ \
     31     UNIT    * data; /*the real space to hold strings*/ \
     32     \
     33     ~CompactArrays(){free(index);free(data);} \
     34     CompactArrays():data(NULL), index(NULL), count(0){ \
     35     index = (int32_t *) realloc(index, sizeof(int32_t)); \
     36     index[0] = 0; \
     37     } \
     38     void append_one(int32_t theLen){ /*include terminal NULL*/ \
     39     count++; \
     40     index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
     41     index[count] = index[count - 1] + theLen; \
     42     data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
     43     } \
     44     UNIT * last(){return data + index[count - 1];} \
     45     UNIT * dataOf(int32_t i){return data + index[i];} \
     46     int32_t lengthOf(int i){return index[i+1] - index[i] - 1; }	/*exclude terminating NULL*/  \
     47 };
     48 
     49 //typedef CompactArrays<UChar> CA_uchar;
     50 //typedef CompactArrays<char> CA_char;
     51 //typedef CompactArrays<uint8_t> CA_uint8;
     52 //typedef CompactArrays<WCHAR> CA_win_wchar;
     53 
     54 COMPATCT_ARRAY(CA_uchar, UChar)
     55 COMPATCT_ARRAY(CA_char, char)
     56 COMPATCT_ARRAY(CA_uint8, uint8_t)
     57 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
     58 
     59 
     60 struct DataIndex {
     61     static DWORD        win_langid;     // for qsort callback function
     62     static UCollator *  col;            // for qsort callback function
     63     uint8_t *   icu_key;
     64     UChar *     icu_data;
     65     int32_t     icu_data_len;
     66     char*       posix_key;
     67     char*       posix_data;
     68     int32_t     posix_data_len;
     69     char*       win_key;
     70     WCHAR *     win_data;
     71     int32_t     win_data_len;
     72 };
     73 DWORD DataIndex::win_langid;
     74 UCollator * DataIndex::col;
     75 
     76 
     77 
     78 class CmdKeyGen : public UPerfFunction {
     79     typedef	void (CmdKeyGen::* Func)(int32_t);
     80     enum{MAX_KEY_LENGTH = 5000};
     81     UCollator * col;
     82     DWORD       win_langid;
     83     int32_t     count;
     84     DataIndex * data;
     85     Func 	    fn;
     86 
     87     union { // to save sapce
     88         uint8_t		icu_key[MAX_KEY_LENGTH];
     89         char        posix_key[MAX_KEY_LENGTH];
     90         WCHAR		win_key[MAX_KEY_LENGTH];
     91     };
     92 public:
     93     CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
     94         :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
     95 
     96         virtual long getOperationsPerIteration(){return count;}
     97 
     98         virtual void call(UErrorCode* status){
     99             for(int32_t i = 0; i< count; i++){
    100                 (this->*fn)(i);
    101             }
    102         }
    103 
    104         void icu_key_null(int32_t i){
    105             ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
    106         }
    107 
    108         void icu_key_len(int32_t i){
    109             ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
    110         }
    111 
    112         // pre-generated in CollPerfTest::prepareData(), need not to check error here
    113         void win_key_null(int32_t i){
    114             //LCMAP_SORTsk             0x00000400  // WC sort sk (normalize)
    115             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
    116         }
    117 
    118         void win_key_len(int32_t i){
    119             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
    120         }
    121 
    122         void posix_key_null(int32_t i){
    123             strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
    124         }
    125 };
    126 
    127 
    128 class CmdIter : public UPerfFunction {
    129     typedef	void (CmdIter::* Func)(UErrorCode* , int32_t );
    130     int32_t             count;
    131     CA_uchar *          data;
    132     Func                fn;
    133     UCollationElements *iter;
    134     int32_t             exec_count;
    135 public:
    136     CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
    137         :count(count), data(data), fn(fn){
    138             exec_count = 0;
    139             UChar dummytext[] = {0, 0};
    140             iter = ucol_openElements(col, NULL, 0, &status);
    141             ucol_setText(iter, dummytext, 1, &status);
    142         }
    143         ~CmdIter(){
    144             ucol_closeElements(iter);
    145         }
    146 
    147         virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
    148 
    149         virtual void call(UErrorCode* status){
    150             exec_count = 0;
    151             for(int32_t i = 0; i< count; i++){
    152                 (this->*fn)(status, i);
    153             }
    154         }
    155 
    156         void icu_forward_null(UErrorCode* status, int32_t i){
    157             ucol_setText(iter, data->dataOf(i), -1, status);
    158             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
    159         }
    160 
    161         void icu_forward_len(UErrorCode* status, int32_t i){
    162             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
    163             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
    164         }
    165 
    166         void icu_backward_null(UErrorCode* status, int32_t i){
    167             ucol_setText(iter, data->dataOf(i), -1, status);
    168             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
    169         }
    170 
    171         void icu_backward_len(UErrorCode* status, int32_t i){
    172             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
    173             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
    174         }
    175 };
    176 
    177 class CmdIterAll : public UPerfFunction {
    178     typedef	void (CmdIterAll::* Func)(UErrorCode* status);
    179     int32_t     count;
    180     UChar *     data;
    181     Func        fn;
    182     UCollationElements *iter;
    183     int32_t     exec_count;
    184 
    185 public:
    186     enum CALL {forward_null, forward_len, backward_null, backward_len};
    187 
    188     ~CmdIterAll(){
    189         ucol_closeElements(iter);
    190     }
    191     CmdIterAll(UErrorCode & status, UCollator * col, int32_t count,  UChar * data, CALL call,int32_t,int32_t)
    192         :count(count),data(data)
    193     {
    194         exec_count = 0;
    195         if (call == forward_null || call == backward_null) {
    196             iter = ucol_openElements(col, data, -1, &status);
    197         } else {
    198             iter = ucol_openElements(col, data, count, &status);
    199         }
    200 
    201         if (call == forward_null || call == forward_len){
    202             fn = &CmdIterAll::icu_forward_all;
    203         } else {
    204             fn = &CmdIterAll::icu_backward_all;
    205         }
    206     }
    207     virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
    208 
    209     virtual void call(UErrorCode* status){
    210         (this->*fn)(status);
    211     }
    212 
    213     void icu_forward_all(UErrorCode* status){
    214         int strlen = count - 5;
    215         int count5 = 5;
    216         int strindex = 0;
    217         ucol_setOffset(iter, strindex, status);
    218         while (TRUE) {
    219             if (ucol_next(iter, status) == UCOL_NULLORDER) {
    220                 break;
    221             }
    222             exec_count++;
    223             count5 --;
    224             if (count5 == 0) {
    225                 strindex += 10;
    226                 if (strindex > strlen) {
    227                     break;
    228                 }
    229                 ucol_setOffset(iter, strindex, status);
    230                 count5 = 5;
    231             }
    232         }
    233     }
    234 
    235     void icu_backward_all(UErrorCode* status){
    236         int strlen = count;
    237         int count5 = 5;
    238         int strindex = 5;
    239         ucol_setOffset(iter, strindex, status);
    240         while (TRUE) {
    241             if (ucol_previous(iter, status) == UCOL_NULLORDER) {
    242                 break;
    243             }
    244             exec_count++;
    245             count5 --;
    246             if (count5 == 0) {
    247                 strindex += 10;
    248                 if (strindex > strlen) {
    249                     break;
    250                 }
    251                 ucol_setOffset(iter, strindex, status);
    252                 count5 = 5;
    253             }
    254         }
    255     }
    256 
    257 };
    258 
    259 struct CmdQsort : public UPerfFunction{
    260 
    261     static int q_random(const void * a, const void * b){
    262         uint8_t * key_a = ((DataIndex *)a)->icu_key;
    263         uint8_t * key_b = ((DataIndex *)b)->icu_key;
    264 
    265         int   val_a = 0;
    266         int   val_b = 0;
    267         while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
    268         while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
    269         return val_a - val_b;
    270     }
    271 
    272 #define QCAST() \
    273     DataIndex * da = (DataIndex *) a; \
    274     DataIndex * db = (DataIndex *) b; \
    275     ++exec_count
    276 
    277     static int icu_strcoll_null(const void *a, const void *b){
    278         QCAST();
    279         return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
    280     }
    281 
    282     static int icu_strcoll_len(const void *a, const void *b){
    283         QCAST();
    284         return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
    285     }
    286 
    287     static int icu_cmpkey (const void *a, const void *b){
    288         QCAST();
    289         return strcmp((char *) da->icu_key, (char *) db->icu_key);
    290     }
    291 
    292     static int win_cmp_null(const void *a, const void *b) {
    293         QCAST();
    294         //CSTR_LESS_THAN		1
    295         //CSTR_EQUAL			2
    296         //CSTR_GREATER_THAN		3
    297         int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
    298         if (t == 0){
    299             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    300             exit(-1);
    301         } else{
    302             return t - CSTR_EQUAL;
    303         }
    304     }
    305 
    306     static int win_cmp_len(const void *a, const void *b) {
    307         QCAST();
    308         int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
    309         if (t == 0){
    310             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    311             exit(-1);
    312         } else{
    313             return t - CSTR_EQUAL;
    314         }
    315     }
    316 
    317 #define QFUNC(name, func, data) \
    318     static int name (const void *a, const void *b){ \
    319     QCAST(); \
    320     return func(da->data, db->data); \
    321     }
    322 
    323     QFUNC(posix_strcoll_null, strcoll, posix_data)
    324         QFUNC(posix_cmpkey, strcmp, posix_key)
    325         QFUNC(win_cmpkey, strcmp, win_key)
    326         QFUNC(win_wcscmp, wcscmp, win_data)
    327         QFUNC(icu_strcmp, u_strcmp, icu_data)
    328         QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
    329 
    330 private:
    331     static int32_t exec_count; // potential muilt-thread problem
    332 
    333     typedef	int (* Func)(const void *, const void *);
    334 
    335     Func    fn;
    336     void *  base;   //Start of target array.
    337     int32_t num;    //Array size in elements.
    338     int32_t width;  //Element size in bytes.
    339 
    340     void *  backup; //copy source of base
    341 public:
    342     CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
    343         :backup(theBase),num(num),width(width),fn(fn){
    344             base = malloc(num * width);
    345             time_empty(100, &status); // warm memory/cache
    346         }
    347 
    348         ~CmdQsort(){
    349             free(base);
    350         }
    351 
    352         void empty_call(){
    353             exec_count = 0;
    354             memcpy(base, backup, num * width);
    355         }
    356 
    357         double time_empty(int32_t n, UErrorCode* status) {
    358             UTimer start, stop;
    359             utimer_getTime(&start);
    360             while (n-- > 0) {
    361                 empty_call();
    362             }
    363             utimer_getTime(&stop);
    364             return utimer_getDeltaSeconds(&start,&stop); // ms
    365         }
    366 
    367         virtual void call(UErrorCode* status){
    368             exec_count = 0;
    369             memcpy(base, backup, num * width);
    370             qsort(base, num, width, fn);
    371         }
    372         virtual double time(int32_t n, UErrorCode* status) {
    373             double t1 = time_empty(n,status);
    374             double t2 = UPerfFunction::time(n, status);
    375             return  t2-t1;// < 0 ? t2 : t2-t1;
    376         }
    377 
    378         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
    379 };
    380 int32_t CmdQsort::exec_count;
    381 
    382 
    383 class CmdBinSearch : public UPerfFunction{
    384 public:
    385     typedef	int (CmdBinSearch::* Func)(int, int);
    386 
    387     UCollator * col;
    388     DWORD       win_langid;
    389     int32_t     count;
    390     DataIndex * rnd;
    391     DataIndex * ord;
    392     Func 	    fn;
    393     int32_t     exec_count;
    394 
    395     CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
    396         :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
    397 
    398 
    399         virtual void call(UErrorCode* status){
    400             exec_count = 0;
    401             for(int32_t i = 0; i< count; i++){ // search all data
    402                 binary_search(i);
    403             }
    404         }
    405         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
    406 
    407         void binary_search(int32_t random)	{
    408             int low   = 0;
    409             int high  = count - 1;
    410             int guess;
    411             int last_guess = -1;
    412             int r;
    413             while (TRUE) {
    414                 guess = (high + low)/2;
    415                 if (last_guess == guess) break; // nothing to search
    416 
    417                 r = (this->*fn)(random, guess);
    418                 exec_count++;
    419 
    420                 if (r == 0)
    421                     return;	// found, search end.
    422                 if (r < 0) {
    423                     high = guess;
    424                 } else {
    425                     low  = guess;
    426                 }
    427                 last_guess = guess;
    428             }
    429         }
    430 
    431         int icu_strcoll_null(int32_t i, int32_t j){
    432             return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
    433         }
    434 
    435         int icu_strcoll_len(int32_t i, int32_t j){
    436             return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
    437         }
    438 
    439         int icu_cmpkey(int32_t i, int32_t j) {
    440             return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
    441         }
    442 
    443         int win_cmp_null(int32_t i, int32_t j) {
    444             int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
    445             if (t == 0){
    446                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    447                 exit(-1);
    448             } else{
    449                 return t - CSTR_EQUAL;
    450             }
    451         }
    452 
    453         int win_cmp_len(int32_t i, int32_t j) {
    454             int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
    455             if (t == 0){
    456                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
    457                 exit(-1);
    458             } else{
    459                 return t - CSTR_EQUAL;
    460             }
    461         }
    462 
    463 #define BFUNC(name, func, data) \
    464     int name(int32_t i, int32_t j) { \
    465     return func(rnd[i].data, ord[j].data); \
    466     }
    467 
    468         BFUNC(posix_strcoll_null, strcoll, posix_data)
    469             BFUNC(posix_cmpkey, strcmp, posix_key)
    470             BFUNC(win_cmpkey, strcmp, win_key)
    471             BFUNC(win_wcscmp, wcscmp, win_data)
    472             BFUNC(icu_strcmp, u_strcmp, icu_data)
    473             BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
    474 };
    475 
    476 class CollPerfTest : public UPerfTest {
    477 public:
    478     UCollator *     col;
    479     DWORD           win_langid;
    480 
    481     UChar * icu_data_all;
    482     int32_t icu_data_all_len;
    483 
    484     int32_t         count;
    485     CA_uchar *      icu_data;
    486     CA_uint8 *      icu_key;
    487     CA_char *       posix_data;
    488     CA_char *       posix_key;
    489     CA_win_wchar *  win_data;
    490     CA_char *       win_key;
    491 
    492     DataIndex * rnd_index; // random by icu key
    493     DataIndex * ord_win_data;
    494     DataIndex * ord_win_key;
    495     DataIndex * ord_posix_data;
    496     DataIndex * ord_posix_key;
    497     DataIndex * ord_icu_data;
    498     DataIndex * ord_icu_key;
    499     DataIndex * ord_win_wcscmp;
    500     DataIndex * ord_icu_strcmp;
    501     DataIndex * ord_icu_cmpcpo;
    502 
    503     virtual ~CollPerfTest(){
    504         ucol_close(col);
    505         delete [] icu_data_all;
    506         delete icu_data;
    507         delete icu_key;
    508         delete posix_data;
    509         delete posix_key;
    510         delete win_data;
    511         delete win_key;
    512         delete[] rnd_index;
    513         delete[] ord_win_data;
    514         delete[] ord_win_key;
    515         delete[] ord_posix_data;
    516         delete[] ord_posix_key;
    517         delete[] ord_icu_data;
    518         delete[] ord_icu_key;
    519         delete[] ord_win_wcscmp;
    520         delete[] ord_icu_strcmp;
    521         delete[] ord_icu_cmpcpo;
    522     }
    523 
    524     CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
    525         col = NULL;
    526         icu_data_all = NULL;
    527         icu_data = NULL;
    528         icu_key = NULL;
    529         posix_data = NULL;
    530         posix_key = NULL;
    531         win_data =NULL;
    532         win_key = NULL;
    533 
    534         rnd_index = NULL;
    535         ord_win_data= NULL;
    536         ord_win_key= NULL;
    537         ord_posix_data= NULL;
    538         ord_posix_key= NULL;
    539         ord_icu_data= NULL;
    540         ord_icu_key= NULL;
    541         ord_win_wcscmp = NULL;
    542         ord_icu_strcmp = NULL;
    543         ord_icu_cmpcpo = NULL;
    544 
    545         if (U_FAILURE(status)){
    546             return;
    547         }
    548 
    549         // Parse additional arguments
    550 
    551         UOption options[] = {
    552             UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG),        // Windows Language ID number.
    553                 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG),      // --rulefile <filename>
    554                 // Collation related arguments. All are optional.
    555                 // To simplify parsing, two choice arguments are disigned as NO_ARG.
    556                 // The default value is UPPER word in the comment
    557                 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG),          // --french <on | OFF>
    558                 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG),       // --alternate <NON_IGNORE | shifted>
    559                 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
    560                 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG),       // --caselevel <on | OFF>
    561                 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG),          // --normal <on | OFF>
    562                 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG),  // --strength <1-5>
    563         };
    564         int32_t opt_len = (sizeof(options)/sizeof(options[0]));
    565         enum {i, r,f,a,c,l,n,s};   // The buffer between the option items' order and their references
    566 
    567         _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
    568 
    569         if (_remainingArgc < 0){
    570             status = U_ILLEGAL_ARGUMENT_ERROR;
    571             return;
    572         }
    573 
    574         if (locale == NULL){
    575             locale = "en_US";   // set default locale
    576         }
    577 
    578         //#ifdef U_WINDOWS
    579         if (options[i].doesOccur) {
    580             char *endp;
    581             int tmp = strtol(options[i].value, &endp, 0);
    582             if (endp == options[i].value) {
    583                 status = U_ILLEGAL_ARGUMENT_ERROR;
    584                 return;
    585             }
    586             win_langid = MAKELCID(tmp, SORT_DEFAULT);
    587         } else {
    588             win_langid = uloc_getLCID(locale);
    589         }
    590         //#endif
    591 
    592         //  Set up an ICU collator
    593         if (options[r].doesOccur) {
    594             // TODO: implement it
    595         } else {
    596             col = ucol_open(locale, &status);
    597             if (U_FAILURE(status)) {
    598                 return;
    599             }
    600         }
    601 
    602         if (options[f].doesOccur) {
    603             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
    604         } else {
    605             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
    606         }
    607 
    608         if (options[a].doesOccur) {
    609             ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    610         }
    611 
    612         if (options[c].doesOccur) { // strcmp() has i18n encoding problem
    613             if (strcmp("lower", options[c].value) == 0){
    614                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
    615             } else if (strcmp("upper", options[c].value) == 0) {
    616                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
    617             } else {
    618                 status = U_ILLEGAL_ARGUMENT_ERROR;
    619                 return;
    620             }
    621         }
    622 
    623         if (options[l].doesOccur){
    624             ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
    625         }
    626 
    627         if (options[n].doesOccur){
    628             ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    629         }
    630 
    631         if (options[s].doesOccur) {
    632             char *endp;
    633             int tmp = strtol(options[l].value, &endp, 0);
    634             if (endp == options[l].value) {
    635                 status = U_ILLEGAL_ARGUMENT_ERROR;
    636                 return;
    637             }
    638             switch (tmp) {
    639             case 1:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status);		break;
    640             case 2:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status);		break;
    641             case 3:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);		break;
    642             case 4:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);	break;
    643             case 5:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);		break;
    644             default: status = U_ILLEGAL_ARGUMENT_ERROR;					return;
    645             }
    646         }
    647         prepareData(status);
    648     }
    649 
    650     //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
    651 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
    652     if(temp == index) {\
    653     name = #testname;\
    654     if (exec) {\
    655     UErrorCode status = U_ZERO_ERROR;\
    656     UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
    657     if (U_FAILURE(status)) {\
    658     delete t;\
    659     return NULL;\
    660     } else {\
    661     return t;\
    662     }\
    663     } else {\
    664     return NULL;\
    665     }\
    666     }\
    667     temp++\
    668 
    669 
    670     virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
    671         int temp = 0;
    672 
    673 #define TEST_KEYGEN(testname, func)\
    674     TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
    675         TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
    676         TEST_KEYGEN(TestIcu_KeyGen_len,  icu_key_len);
    677         TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
    678         TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
    679         TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
    680 
    681 #define TEST_ITER(testname, func)\
    682     TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
    683         TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
    684         TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
    685         TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
    686         TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
    687 
    688 #define TEST_ITER_ALL(testname, func)\
    689     TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
    690         TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
    691         TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
    692         TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
    693         TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
    694 
    695 #define TEST_QSORT(testname, func)\
    696     TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
    697         TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
    698         TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
    699         TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
    700         TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
    701         TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
    702         TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
    703         TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
    704         TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
    705 
    706 #define TEST_BIN(testname, func)\
    707     TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
    708         TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
    709         TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
    710         TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
    711         TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
    712         TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
    713         TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
    714         TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
    715         TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
    716         TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
    717         TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
    718         TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
    719 
    720         name="";
    721         return NULL;
    722     }
    723 
    724 
    725 
    726     void prepareData(UErrorCode& status){
    727         if(U_FAILURE(status)) return;
    728         if (icu_data) return; // prepared
    729 
    730         icu_data = new CA_uchar();
    731 
    732         // Following code is borrowed from UPerfTest::getLines();
    733         const UChar*    line=NULL;
    734         int32_t         len =0;
    735         for (;;) {
    736             line = ucbuf_readline(ucharBuf,&len,&status);
    737             if(line == NULL || U_FAILURE(status)){break;}
    738 
    739             // Refer to the source code of ucbuf_readline()
    740             // 1. 'len' includs the line terminal symbols
    741             // 2. The length of the line terminal symbols is only one character
    742             // 3. The Windows CR LF line terminal symbols will be converted to CR
    743 
    744             if (len == 1) {
    745                 continue; //skip empty line
    746             } else {
    747                 icu_data->append_one(len);
    748                 memcpy(icu_data->last(), line, len * sizeof(UChar));
    749                 icu_data->last()[len -1] = NULL;
    750             }
    751         }
    752         if(U_FAILURE(status)) return;
    753 
    754         // UTF-16 -> UTF-8 conversion.
    755         UConverter   *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
    756         if (U_FAILURE(status)) return;
    757 
    758         count = icu_data->count;
    759 
    760         icu_data_all_len =  icu_data->index[count]; // includes all NULLs
    761         icu_data_all_len -= count;  // excludes all NULLs
    762         icu_data_all_len += 1;      // the terminal NULL
    763         icu_data_all = new UChar[icu_data_all_len];
    764         icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
    765 
    766         icu_key  = new CA_uint8;
    767         win_data = new CA_win_wchar;
    768         win_key  = new CA_char;
    769         posix_data = new CA_char;
    770         posix_key = new CA_char;
    771         rnd_index = new DataIndex[count];
    772         DataIndex::win_langid = win_langid;
    773         DataIndex::col        = col;
    774 
    775 
    776         UChar * p = icu_data_all;
    777         int32_t s;
    778         int32_t t;
    779         for (int i=0; i < count; i++) {
    780             // ICU all data
    781             s = sizeof(UChar) * icu_data->lengthOf(i);
    782             memcpy(p, icu_data->dataOf(i), s);
    783             p += icu_data->lengthOf(i);
    784 
    785             // ICU data
    786 
    787             // ICU key
    788             s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
    789             icu_key->append_one(s);
    790             t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
    791             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    792 
    793             // POSIX data
    794             s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
    795             if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
    796                 status = U_ZERO_ERROR;
    797             } else {
    798                 return;
    799             }
    800             posix_data->append_one(s + 1); // plus terminal NULL
    801             t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
    802             if (U_FAILURE(status)) return;
    803             if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
    804             posix_data->last()[s] = 0;
    805 
    806             // POSIX key
    807             s = strxfrm(NULL, posix_data->dataOf(i), 0);
    808             if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
    809             posix_key->append_one(s);
    810             t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
    811             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    812 
    813             // Win data
    814             s = icu_data->lengthOf(i) + 1; // plus terminal NULL
    815             win_data->append_one(s);
    816             memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
    817 
    818             // Win key
    819             s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
    820             if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
    821             win_key->append_one(s);
    822             t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
    823             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
    824 
    825         };
    826 
    827         // append_one() will make points shifting, should not merge following code into previous iteration
    828         for (int i=0; i < count; i++) {
    829             rnd_index[i].icu_key = icu_key->dataOf(i);
    830             rnd_index[i].icu_data = icu_data->dataOf(i);
    831             rnd_index[i].icu_data_len = icu_data->lengthOf(i);
    832             rnd_index[i].posix_key = posix_key->last();
    833             rnd_index[i].posix_data = posix_data->dataOf(i);
    834             rnd_index[i].posix_data_len = posix_data->lengthOf(i);
    835             rnd_index[i].win_key = win_key->dataOf(i);
    836             rnd_index[i].win_data = win_data->dataOf(i);
    837             rnd_index[i].win_data_len = win_data->lengthOf(i);
    838         };
    839 
    840         ucnv_close(conv);
    841         qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
    842 
    843 #define SORT(data, func) \
    844     data = new DataIndex[count];\
    845     memcpy(data, rnd_index, count * sizeof(DataIndex));\
    846     qsort(data, count, sizeof(DataIndex), CmdQsort::func)
    847 
    848         SORT(ord_icu_data, icu_strcoll_len);
    849         SORT(ord_icu_key, icu_cmpkey);
    850         SORT(ord_posix_data, posix_strcoll_null);
    851         SORT(ord_posix_key, posix_cmpkey);
    852         SORT(ord_win_data, win_cmp_len);
    853         SORT(ord_win_key, win_cmpkey);
    854         SORT(ord_win_wcscmp, win_wcscmp);
    855         SORT(ord_icu_strcmp, icu_strcmp);
    856         SORT(ord_icu_cmpcpo, icu_cmpcpo);
    857     }
    858 };
    859 
    860 
    861 int main(int argc, const char *argv[])
    862 {
    863 
    864     UErrorCode status = U_ZERO_ERROR;
    865     CollPerfTest test(argc, argv, status);
    866 
    867     if (U_FAILURE(status)){
    868         printf("The error is %s\n", u_errorName(status));
    869         //TODO: print usage here
    870         return status;
    871     }
    872 
    873     if (test.run() == FALSE){
    874         fprintf(stderr, "FAILED: Tests could not be run please check the "
    875             "arguments.\n");
    876         return -1;
    877     }
    878     return 0;
    879 }
    880 
    881