Home | History | Annotate | Download | only in perf
      1 /**
      2 *******************************************************************************
      3 * Copyright (C) 2002-2007, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 
      8 package com.ibm.icu.dev.test.perf;
      9 
     10 import com.ibm.icu.text.*;
     11 import java.util.*;
     12 import java.io.*;
     13 import com.ibm.icu.impl.LocaleUtility;
     14 
     15 public class CollationPerformanceTest {
     16     static final String usageString =
     17         "usage:  collperf options...\n"
     18         + "-help                      Display this message.\n"
     19         + "-file file_name            utf-16 format file of names.\n"
     20         + "-locale name               ICU locale to use.  Default is en_US\n"
     21         + "-rules file_name           Collation rules file (overrides locale)\n"
     22         //+ "-langid 0x1234             Windows Language ID number.  Default to value for -locale option\n"
     23         //+ "                              see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
     24         //+ "-win                       Run test using Windows native services.  (ICU is default)\n"
     25         //+ "-unix                      Run test using Unix strxfrm, strcoll services.\n"
     26         //+ "-uselen                    Use API with string lengths.  Default is null-terminated strings\n"
     27         + "-usekeys                   Run tests using sortkeys rather than strcoll\n"
     28         + "-strcmp                    Run tests using u_strcmp rather than strcoll\n"
     29         + "-strcmpCPO                 Run tests using u_strcmpCodePointOrder rather than strcoll\n"
     30         + "-loop nnnn                 Loopcount for test.  Adjust for reasonable total running time.\n"
     31         + "-iloop n                   Inner Loop Count.  Default = 1.  Number of calls to function\n"
     32         + "                               under test at each call point.  For measuring test overhead.\n"
     33         + "-terse                     Terse numbers-only output.  Intended for use by scripts.\n"
     34         + "-french                    French accent ordering\n"
     35         + "-frenchoff                 No French accent ordering (for use with French locales.)\n"
     36         + "-norm                      Normalizing mode on\n"
     37         + "-shifted                   Shifted mode\n"
     38         + "-lower                     Lower case first\n"
     39         + "-upper                     Upper case first\n"
     40         + "-case                      Enable separate case level\n"
     41         + "-level n                   Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
     42         + "-keyhist                   Produce a table sort key size vs. string length\n"
     43         + "-binsearch                 Binary Search timing test\n"
     44         + "-keygen                    Sort Key Generation timing test\n"
     45         + "-qsort                     Quicksort timing test\n"
     46         + "-iter                      Iteration Performance Test\n"
     47         + "-dump                      Display strings, sort keys and CEs.\n"
     48         + "-java                      Run test using java.text.Collator.\n";
     49 
     50     //enum {FLAG, NUM, STRING} type;
     51     static StringBuffer temp_opt_fName      = new StringBuffer("");
     52     static StringBuffer temp_opt_locale     = new StringBuffer("en_US");
     53     //static StringBuffer temp_opt_langid     = new StringBuffer("0");         // Defaults to value corresponding to opt_locale.
     54     static StringBuffer temp_opt_rules      = new StringBuffer("");
     55     static StringBuffer temp_opt_help       = new StringBuffer("");
     56     static StringBuffer temp_opt_loopCount  = new StringBuffer("1");
     57     static StringBuffer temp_opt_iLoopCount = new StringBuffer("1");
     58     static StringBuffer temp_opt_terse      = new StringBuffer("false");
     59     static StringBuffer temp_opt_qsort      = new StringBuffer("");
     60     static StringBuffer temp_opt_binsearch  = new StringBuffer("");
     61     static StringBuffer temp_opt_icu        = new StringBuffer("true");
     62     //static StringBuffer opt_win        = new StringBuffer("");      // Run with Windows native functions.
     63     //static StringBuffer opt_unix       = new StringBuffer("");      // Run with UNIX strcoll, strxfrm functions.
     64     //static StringBuffer opt_uselen     = new StringBuffer("");
     65     static StringBuffer temp_opt_usekeys    = new StringBuffer("");
     66     static StringBuffer temp_opt_strcmp     = new StringBuffer("");
     67     static StringBuffer temp_opt_strcmpCPO  = new StringBuffer("");
     68     static StringBuffer temp_opt_norm       = new StringBuffer("");
     69     static StringBuffer temp_opt_keygen     = new StringBuffer("");
     70     static StringBuffer temp_opt_french     = new StringBuffer("");
     71     static StringBuffer temp_opt_frenchoff  = new StringBuffer("");
     72     static StringBuffer temp_opt_shifted    = new StringBuffer("");
     73     static StringBuffer temp_opt_lower      = new StringBuffer("");
     74     static StringBuffer temp_opt_upper      = new StringBuffer("");
     75     static StringBuffer temp_opt_case       = new StringBuffer("");
     76     static StringBuffer temp_opt_level      = new StringBuffer("0");
     77     static StringBuffer temp_opt_keyhist    = new StringBuffer("");
     78     static StringBuffer temp_opt_itertest   = new StringBuffer("");
     79     static StringBuffer temp_opt_dump       = new StringBuffer("");
     80     static StringBuffer temp_opt_java       = new StringBuffer("");
     81 
     82 
     83     static String   opt_fName      = "";
     84     static String   opt_locale     = "en_US";
     85     //static int      opt_langid     = 0;         // Defaults to value corresponding to opt_locale.
     86     static String   opt_rules      = "";
     87     static boolean  opt_help       = false;
     88     static int      opt_loopCount  = 1;
     89     static int      opt_iLoopCount = 1;
     90     static boolean  opt_terse      = false;
     91     static boolean  opt_qsort      = false;
     92     static boolean  opt_binsearch  = false;
     93     static boolean  opt_icu        = true;
     94     //static boolean  opt_win        = false;      // Run with Windows native functions.
     95     //static boolean  opt_unix       = false;      // Run with UNIX strcoll, strxfrm functions.
     96     //static boolean  opt_uselen     = false;
     97     static boolean  opt_usekeys    = false;
     98     static boolean  opt_strcmp     = false;
     99     static boolean  opt_strcmpCPO  = false;
    100     static boolean  opt_norm       = false;
    101     static boolean  opt_keygen     = false;
    102     static boolean  opt_french     = false;
    103     static boolean  opt_frenchoff  = false;
    104     static boolean  opt_shifted    = false;
    105     static boolean  opt_lower      = false;
    106     static boolean  opt_upper      = false;
    107     static boolean  opt_case       = false;
    108     static int      opt_level      = 0;
    109     static boolean  opt_keyhist    = false;
    110     static boolean  opt_itertest   = false;
    111     static boolean  opt_dump       = false;
    112     static boolean  opt_java       = false;
    113 
    114     static OptionSpec[] options = {
    115         new OptionSpec("-file", 2, temp_opt_fName),
    116         new OptionSpec("-locale", 2, temp_opt_locale),
    117         //new OptionSpec("-langid", 1, temp_opt_langid),
    118         new OptionSpec("-rules", 2, temp_opt_rules),
    119         new OptionSpec("-qsort", 0, temp_opt_qsort),
    120         new OptionSpec("-binsearch", 0, temp_opt_binsearch),
    121         new OptionSpec("-iter", 0, temp_opt_itertest),
    122         //new OptionSpec("-win", 0, temp_opt_win),
    123         //new OptionSpec("-unix", 0, temp_opt_unix),
    124         //new OptionSpec("-uselen", 0, temp_opt_uselen),
    125         new OptionSpec("-usekeys", 0, temp_opt_usekeys),
    126         new OptionSpec("-strcmp", 0, temp_opt_strcmp),
    127         new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO),
    128         new OptionSpec("-norm", 0, temp_opt_norm),
    129         new OptionSpec("-french", 0, temp_opt_french),
    130         new OptionSpec("-frenchoff", 0, temp_opt_frenchoff),
    131         new OptionSpec("-shifted", 0, temp_opt_shifted),
    132         new OptionSpec("-lower", 0, temp_opt_lower),
    133         new OptionSpec("-upper", 0, temp_opt_upper),
    134         new OptionSpec("-case", 0, temp_opt_case),
    135         new OptionSpec("-level", 1, temp_opt_level),
    136         new OptionSpec("-keyhist", 0, temp_opt_keyhist),
    137         new OptionSpec("-keygen", 0, temp_opt_keygen),
    138         new OptionSpec("-loop", 1, temp_opt_loopCount),
    139         new OptionSpec("-iloop", 1, temp_opt_iLoopCount),
    140         new OptionSpec("-terse", 0, temp_opt_terse),
    141         new OptionSpec("-dump", 0, temp_opt_dump),
    142         new OptionSpec("-help", 0, temp_opt_help),
    143         new OptionSpec("-?", 0, temp_opt_help),
    144         new OptionSpec("-java", 0, temp_opt_java),
    145     };
    146 
    147     static java.text.Collator javaCol = null;
    148     static com.ibm.icu.text.Collator icuCol = null;
    149     static NumberFormat nf = null;
    150     static NumberFormat percent = null;
    151     ArrayList list = null;
    152     String[] tests = null;
    153     int globalCount = 0;
    154 
    155     public static void main(String[] args) {
    156         CollationPerformanceTest collPerf = new CollationPerformanceTest();
    157         if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) {
    158             System.out.println(usageString);
    159             System.exit(1);
    160         }
    161 
    162         nf = NumberFormat.getInstance();
    163         nf.setMaximumFractionDigits(2);
    164         percent = NumberFormat.getPercentInstance();
    165 
    166         collPerf.setOptions();
    167         collPerf.readDataLines();
    168 
    169         if (opt_dump) {
    170             collPerf.doDump();
    171         }
    172 
    173         if (opt_qsort) {
    174             collPerf.doQSort();
    175         }
    176 
    177         if (opt_binsearch) {
    178             collPerf.doBinarySearch();
    179         }
    180 
    181         if (opt_keygen) {
    182             collPerf.doKeyGen();
    183         }
    184 
    185         if (opt_keyhist) {
    186             collPerf.doKeyHist();
    187         }
    188 
    189         if (opt_itertest) {
    190             collPerf.doIterTest();
    191         }
    192 
    193     }
    194 
    195     //Dump file lines, CEs, Sort Keys if requested
    196     void doDump() {
    197         for(int i = 0; i < list.size(); i++) {
    198             //print the line
    199             String line = com.ibm.icu.impl.Utility.escape((String)list.get(i));
    200             System.out.println(line);
    201 
    202             System.out.print("  CEs:  ");
    203             CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line);
    204             int ce;
    205             int j = 0;
    206             for(;;) {
    207                 ce = CEiter.next();
    208                 if (ce == CollationElementIterator.NULLORDER) {
    209                     break;
    210                 }
    211                 //System.out.print();
    212                 String outStr = Integer.toHexString(ce);
    213                 for (int len = 0; len < 8 - outStr.length(); len++) {
    214                     outStr ='0' + outStr;
    215                 }
    216                 System.out.print(outStr + "  ");
    217                 if(++j >8) {
    218                     System.out.print("\n        ");
    219                     j = 0;
    220                 }
    221             }
    222 
    223             System.out.print("\n   ICU Sort Key: ");
    224             CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line);
    225             byte[] cks = ck.toByteArray();
    226             j = 0;
    227             for(int k = 0; k < cks.length; k++) {
    228                 String outStr = Integer.toHexString(cks[k]);
    229                 switch (outStr.length()) {
    230                 case 1:     outStr = '0' + outStr;
    231                             break;
    232                 case 8:     outStr = outStr.substring(6);
    233                             break;
    234                 }
    235                 System.out.print(outStr);
    236                 System.out.print("  ");
    237                 j++;
    238                 if(j > 0 && j % 20 == 0) {
    239                     System.out.print("\n                 ");
    240                 }
    241             }
    242             System.out.println("\n");
    243         }
    244     }
    245 
    246     /**---------------------------------------------------------------------------------------
    247      *
    248      *   doQSort()    The quick sort timing test.
    249      *
    250      *---------------------------------------------------------------------------------------
    251      */
    252     void doQSort() {
    253         callGC();
    254         //String[] sortTests = (String[]) tests.clone();
    255         //Adjust loop count to compensate for file size. QSort should be nlog(n)
    256         double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length));
    257 
    258         if(opt_usekeys) {
    259             dLoopCount *= 5;
    260         }
    261 
    262         int adj_loopCount = (int)dLoopCount;
    263         if(adj_loopCount < 1) {
    264             adj_loopCount = 1;
    265         }
    266 
    267         globalCount = 0;
    268         long startTime = 0;
    269         long endTime = 0;
    270         if (opt_icu && opt_usekeys) {
    271             startTime = System.currentTimeMillis();
    272             qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol);
    273             endTime = System.currentTimeMillis();
    274         }
    275         if (opt_icu && !opt_usekeys){
    276             startTime = System.currentTimeMillis();
    277             qSortImpl_nokeys(tests, 0, tests.length -1, icuCol);
    278             endTime = System.currentTimeMillis();
    279         }
    280         if (opt_java && opt_usekeys) {
    281             startTime = System.currentTimeMillis();
    282             qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol);
    283             endTime = System.currentTimeMillis();
    284         }
    285         if (opt_java && !opt_usekeys){
    286             startTime = System.currentTimeMillis();
    287             qSortImpl_nokeys(tests, 0, tests.length -1, javaCol);
    288             endTime = System.currentTimeMillis();
    289         }
    290         long elapsedTime = endTime - startTime;
    291         int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0));
    292         if (!opt_terse) {
    293             System.out.println("qsort:  total # of string compares = " + globalCount);
    294             System.out.println("qsort:  time per compare = " + ns);
    295         } else {
    296             System.out.println(ns);
    297         }
    298     }
    299 
    300     /**---------------------------------------------------------------------------------------
    301      *
    302      *    doBinarySearch()    Binary Search timing test.  Each name from the list
    303      *                        is looked up in the full sorted list of names.
    304      *
    305      *---------------------------------------------------------------------------------------
    306      */
    307     void doBinarySearch() {
    308         callGC();
    309         int gCount = 0;
    310         int loops = 0;
    311         double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length);
    312         long startTime = 0;
    313         long elapsedTime = 0;
    314 
    315         if(opt_usekeys) {
    316             dLoopCount *= 5;
    317         }
    318         int adj_loopCount = (int)dLoopCount;
    319         if(adj_loopCount < 1) {
    320             adj_loopCount = 1;
    321         }
    322 
    323         //int opt2 = 0;
    324 
    325         for(;;) {   //not really a loop, just allows "break" to work, to simplify
    326                     //inadvertantly running more than one test through here
    327             if(opt_strcmp) {
    328                 int r = 0;
    329                 startTime = System.currentTimeMillis();
    330                 for(loops = 0; loops < adj_loopCount; loops++) {
    331                     for (int j = 0; j < tests.length; j++) {
    332                         int hi = tests.length-1;
    333                         int lo = 0;
    334                         int guess = -1;
    335                         for(;;) {
    336                             int newGuess = (hi + lo) / 2;
    337                             if(newGuess == guess){
    338                                 break;
    339                             }
    340                             guess = newGuess;
    341                             r = tests[j].compareTo(tests[guess]);
    342                             gCount++;
    343                             if(r == 0) {
    344                                 break;
    345                             }
    346                             if (r < 0) {
    347                                 hi = guess;
    348                             } else {
    349                                 lo = guess;
    350                             }
    351                         }
    352                     }
    353                 }
    354                 elapsedTime = System.currentTimeMillis() - startTime;
    355                 break;
    356             }
    357 
    358             if (opt_strcmpCPO) {
    359                 int r = 0;
    360                 startTime = System.currentTimeMillis();
    361                 for(loops = 0; loops < adj_loopCount; loops++) {
    362                     for (int j = 0; j < tests.length; j++) {
    363                         int hi = tests.length-1;
    364                         int lo = 0;
    365                         int guess = -1;
    366                         for(;;) {
    367                             int newGuess = (hi + lo) / 2;
    368                             if(newGuess == guess){
    369                                 break;
    370                             }
    371                             guess = newGuess;
    372                             r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER);
    373                             gCount++;
    374                             if(r == 0) {
    375                                 break;
    376                             }
    377                             if (r < 0) {
    378                                 hi = guess;
    379                             } else {
    380                                 lo = guess;
    381                             }
    382                         }
    383                     }
    384                 }
    385                 elapsedTime = System.currentTimeMillis() - startTime;
    386                 break;
    387             }
    388 
    389             if (opt_icu) {
    390 
    391                 int r = 0;
    392                 startTime = System.currentTimeMillis();
    393                 for (loops = 0; loops < adj_loopCount; loops++) {
    394                     for (int j = 0; j < tests.length; j++) {
    395                         int hi = tests.length - 1;
    396                         int lo = 0;
    397                         int guess = -1;
    398                         for (;;) {
    399                             int newGuess = (hi + lo) / 2;
    400                             if (newGuess == guess) {
    401                                 break;
    402                             }
    403                             guess = newGuess;
    404                             if (opt_usekeys) {
    405                                 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]);
    406                                 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]);
    407                                 r = sortKey1.compareTo(sortKey2);
    408                                 gCount ++;
    409                             } else {
    410                                 r = icuCol.compare(tests[j], tests[guess]);
    411                                 gCount++;
    412                             }
    413                             if (r == 0) {
    414                                 break;
    415                             }
    416                             if (r < 0) {
    417                                 hi = guess;
    418                             } else {
    419                                 lo = guess;
    420                             }
    421                         }
    422                     }
    423                 }
    424                 elapsedTime = System.currentTimeMillis() - startTime;
    425                 break;
    426             }
    427             if (opt_java) {
    428 
    429                 int r = 0;
    430                 startTime = System.currentTimeMillis();
    431                 for (loops = 0; loops < adj_loopCount; loops++) {
    432                     for (int j = 0; j < tests.length; j++) {
    433                         int hi = tests.length - 1;
    434                         int lo = 0;
    435                         int guess = -1;
    436                         for (;;) {
    437                             int newGuess = (hi + lo) / 2;
    438                             if (newGuess == guess) {
    439                                 break;
    440                             }
    441                             guess = newGuess;
    442                             if (opt_usekeys) {
    443                                 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]);
    444                                 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]);
    445                                 r = sortKey1.compareTo(sortKey2);
    446                                 gCount ++;
    447                             } else {
    448                                 r = javaCol.compare(tests[j], tests[guess]);
    449                                 gCount++;
    450                             }
    451                             if (r == 0) {
    452                                 break;
    453                             }
    454                             if (r < 0) {
    455                                 hi = guess;
    456                             } else {
    457                                 lo = guess;
    458                             }
    459                         }
    460                     }
    461                 }
    462                 elapsedTime = System.currentTimeMillis() - startTime;
    463                 break;
    464             }
    465             break;
    466         }
    467         int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount);
    468         if (!opt_terse) {
    469             System.out.println("binary search:  total # of string compares = " + gCount);
    470             System.out.println("binary search:  compares per loop = " + gCount / loops);
    471             System.out.println("binary search:  time per compare = " + ns);
    472         } else {
    473             System.out.println(ns);
    474         }
    475     }
    476 
    477     /**---------------------------------------------------------------------------------------
    478      *
    479      *   doKeyGen()     Key Generation Timing Test
    480      *
    481      *---------------------------------------------------------------------------------------
    482      */
    483     void doKeyGen() {
    484         callGC();
    485 
    486         // Adjust loop count to compensate for file size.   Should be order n
    487         double dLoopCount = opt_loopCount * (1000.0 /  (double)list.size());
    488         int adj_loopCount = (int)dLoopCount;
    489         if (adj_loopCount < 1) adj_loopCount = 1;
    490 
    491         long startTime = 0;
    492         long totalKeyLen = 0;
    493         long totalChars = 0;
    494         if (opt_java) {
    495             startTime = System.currentTimeMillis();
    496             for (int loops=0; loops<adj_loopCount; loops++) {
    497                 for (int line=0; line < tests.length; line++) {
    498                     for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
    499                         totalChars += tests[line].length();
    500                         byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray();
    501                         totalKeyLen += sortKey.length;
    502                     }
    503                 }
    504             }
    505         } else {
    506             startTime = System.currentTimeMillis();
    507             for (int loops=0; loops<adj_loopCount; loops++) {
    508                 for (int line=0; line < tests.length; line++) {
    509                     for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
    510                         totalChars += tests[line].length();
    511                         byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray();
    512                         totalKeyLen += sortKey.length;
    513                     }
    514                 }
    515             }
    516         }
    517 
    518         long elapsedTime = System.currentTimeMillis() - startTime;
    519         long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0));
    520         if (!opt_terse) {
    521             System.out.println("Sort Key Generation:  total # of keys =" + adj_loopCount * tests.length);
    522             System.out.println("Sort Key Generation:  time per key = " + ns + " ns");
    523             System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0)));
    524         }
    525         else {
    526             System.out.print(ns + ",  ");
    527             System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", ");
    528         }
    529     }
    530 
    531     /**---------------------------------------------------------------------------------------
    532      *
    533      *    doKeyHist()       Output a table of data for average sort key size vs. string length.
    534      *
    535      *---------------------------------------------------------------------------------------
    536      */
    537     void doKeyHist() {
    538         callGC();
    539         int     maxLen = 0;
    540 
    541         // Find the maximum string length
    542         for (int i = 0; i < tests.length; i++) {
    543             if (tests[i].length() > maxLen) maxLen = tests[i].length();
    544         }
    545 
    546         int[] accumulatedLen  = new int[maxLen + 1];
    547         int[] numKeysOfSize   = new int[maxLen + 1];
    548 
    549         // Fill the arrays...
    550         for (int i = 0; i < tests.length; i++) {
    551             int len = tests[i].length();
    552             accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length;
    553             numKeysOfSize[len]  += 1;
    554         }
    555 
    556         // And write out averages
    557         System.out.println("String Length,  Avg Key Length,  Avg Key Len per char");
    558         for (int i = 1; i <= maxLen; i++) {
    559             if (numKeysOfSize[i] > 0) {
    560                 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", "
    561                     + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0)));
    562             }
    563         }
    564 
    565     }
    566 
    567     void doForwardIterTest() {
    568         callGC();
    569         System.out.print("\n\nPerforming forward iteration performance test with ");
    570         System.out.println("performance test on strings from file -----------");
    571 
    572         CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
    573 
    574         int gCount = 0;
    575         int count = 0;
    576         long startTime = System.currentTimeMillis();
    577         while (count < opt_loopCount) {
    578             int linecount = 0;
    579             while (linecount < tests.length) {
    580                 String str = tests[linecount];
    581                 iter.setText(str);
    582                 while (iter.next() != CollationElementIterator.NULLORDER) {
    583                     gCount++;
    584                 }
    585                 linecount ++;
    586             }
    587             count ++;
    588         }
    589 
    590         long elapsedTime = System.currentTimeMillis() - startTime;
    591         System.out.println("elapsedTime " + elapsedTime + " ms");
    592 
    593         // empty loop recalculation
    594         count = 0;
    595         startTime = System.currentTimeMillis();
    596         while (count < opt_loopCount) {
    597             int linecount = 0;
    598             while (linecount < tests.length) {
    599                 String str = tests[linecount];
    600                 iter.setText(str);
    601                 linecount ++;
    602             }
    603             count ++;
    604         }
    605         elapsedTime -= (System.currentTimeMillis() - startTime);
    606         System.out.println("elapsedTime " + elapsedTime + " ms");
    607 
    608         int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
    609         System.out.println("Total number of strings compared " + tests.length
    610                             + "in " + opt_loopCount + " loops");
    611         System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
    612         System.out.println("performance test on skipped-5 concatenated strings from file -----------");
    613 
    614         String totalStr = "";
    615         int    strlen = 0;
    616         // appending all the strings
    617         int linecount = 0;
    618         while (linecount < tests.length) {
    619             totalStr += tests[linecount];
    620             strlen += tests[linecount].length();
    621             linecount ++;
    622         }
    623         System.out.println("Total size of strings " + strlen);
    624 
    625         gCount = 0;
    626         count  = 0;
    627         iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
    628         strlen -= 5; // any left over characters are not iterated,
    629                      // this is to ensure the backwards and forwards iterators
    630                      // gets the same position
    631         int strindex = 0;
    632         startTime = System.currentTimeMillis();
    633         while (count < opt_loopCount) {
    634             int count5 = 5;
    635             strindex = 0;
    636             iter.setOffset(strindex);
    637             while (true) {
    638                 if (iter.next() == CollationElementIterator.NULLORDER) {
    639                     break;
    640                 }
    641                 gCount++;
    642                 count5 --;
    643                 if (count5 == 0) {
    644                     strindex += 10;
    645                     if (strindex > strlen) {
    646                         break;
    647                     }
    648                     iter.setOffset(strindex);
    649                     count5 = 5;
    650                 }
    651             }
    652             count ++;
    653         }
    654 
    655         elapsedTime = System.currentTimeMillis() - startTime;
    656         System.out.println("elapsedTime " + elapsedTime);
    657 
    658         // empty loop recalculation
    659         int tempgCount = 0;
    660         count = 0;
    661         startTime = System.currentTimeMillis();
    662         while (count < opt_loopCount) {
    663             int count5 = 5;
    664             strindex = 0;
    665             iter.setOffset(strindex);
    666             while (true) {
    667                 tempgCount ++;
    668                 count5 --;
    669                 if (count5 == 0) {
    670                     strindex += 10;
    671                     if (strindex > strlen) {
    672                         break;
    673                     }
    674                     iter.setOffset(strindex);
    675                     count5 = 5;
    676                 }
    677             }
    678             count ++;
    679         }
    680         elapsedTime -= (System.currentTimeMillis() - startTime);
    681         System.out.println("elapsedTime " + elapsedTime);
    682 
    683         System.out.println("gCount " + gCount);
    684         ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
    685         System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
    686     }
    687 
    688     void doBackwardIterTest() {
    689         System.out.print("\n\nPerforming backward iteration performance test with ");
    690         System.out.println("performance test on strings from file -----------\n");
    691 
    692         CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
    693 
    694         int gCount = 0;
    695         int count = 0;
    696         long startTime = System.currentTimeMillis();
    697         while (count < opt_loopCount) {
    698             int linecount = 0;
    699             while (linecount < tests.length) {
    700                 String str = tests[linecount];
    701                 iter.setText(str);
    702                 while (iter.previous() != CollationElementIterator.NULLORDER) {
    703                     gCount++;
    704                 }
    705                 linecount ++;
    706             }
    707             count ++;
    708         }
    709         long elapsedTime = System.currentTimeMillis() - startTime;
    710         System.out.println("elapsedTime " + elapsedTime + " ms");
    711 
    712         // empty loop recalculation
    713         count = 0;
    714         startTime = System.currentTimeMillis();
    715         while (count < opt_loopCount) {
    716             int linecount = 0;
    717             while (linecount < tests.length) {
    718                 String str = tests[linecount];
    719                 iter.setText(str);
    720                 linecount ++;
    721             }
    722             count ++;
    723         }
    724         elapsedTime -= (System.currentTimeMillis() - startTime);
    725         System.out.println("elapsedTime " + elapsedTime + " ms");
    726 
    727         int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
    728         System.out.println("Total number of strings compared " + tests.length
    729                             + "in " + opt_loopCount + " loops");
    730         System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
    731         System.out.println("performance test on skipped-5 concatenated strings from file -----------");
    732 
    733         String totalStr = "";
    734         int    strlen = 0;
    735         // appending all the strings
    736         int linecount = 0;
    737         while (linecount < tests.length) {
    738             totalStr += tests[linecount];
    739             strlen += tests[linecount].length();
    740             linecount ++;
    741         }
    742         System.out.println("Total size of strings " + strlen);
    743 
    744         gCount = 0;
    745         count  = 0;
    746 
    747         iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
    748         int strindex = 0;
    749         startTime = System.currentTimeMillis();
    750         while (count < opt_loopCount) {
    751             int count5 = 5;
    752             strindex = 5;
    753             iter.setOffset(strindex);
    754             while (true) {
    755                 if (iter.previous() == CollationElementIterator.NULLORDER) {
    756                     break;
    757                 }
    758                  gCount ++;
    759                  count5 --;
    760                  if (count5 == 0) {
    761                      strindex += 10;
    762                      if (strindex > strlen) {
    763                         break;
    764                      }
    765                      iter.setOffset(strindex);
    766                      count5 = 5;
    767                  }
    768             }
    769             count ++;
    770         }
    771 
    772         elapsedTime = System.currentTimeMillis() - startTime;
    773         System.out.println("elapsedTime " + elapsedTime);
    774 
    775         // empty loop recalculation
    776         count = 0;
    777         int tempgCount = 0;
    778         startTime = System.currentTimeMillis();
    779         while (count < opt_loopCount) {
    780             int count5 = 5;
    781             strindex = 5;
    782             iter.setOffset(strindex);
    783             while (true) {
    784                  tempgCount ++;
    785                  count5 --;
    786                  if (count5 == 0) {
    787                      strindex += 10;
    788                      if (strindex > strlen) {
    789                         break;
    790                      }
    791                      iter.setOffset(strindex);
    792                      count5 = 5;
    793                  }
    794             }
    795             count ++;
    796         }
    797         elapsedTime -= (System.currentTimeMillis() - startTime);
    798         System.out.println("elapsedTime " + elapsedTime);
    799 
    800         System.out.println("gCount " + gCount);
    801         ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
    802         System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
    803     }
    804 
    805 
    806     /**---------------------------------------------------------------------------------------
    807      *
    808      *    doIterTest()       Iteration test
    809      *
    810      *---------------------------------------------------------------------------------------
    811      */
    812     void doIterTest() {
    813         doForwardIterTest();
    814         doBackwardIterTest();
    815     }
    816 
    817     void setOptions() {
    818 
    819         if (opt_java) {
    820             opt_icu = false;
    821         }
    822 
    823         if (opt_rules.length() != 0) {
    824             try {
    825                 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules));
    826             } catch (Exception e) {
    827                 System.out.println("Cannot open rules:" + e.getMessage());
    828                 System.exit(1);
    829             }
    830         } else {
    831             icuCol = com.ibm.icu.text.Collator.getInstance(
    832                                 LocaleUtility.getLocaleFromName(opt_locale));
    833         }
    834 
    835         javaCol = java.text.Collator.getInstance(
    836                                 LocaleUtility.getLocaleFromName(opt_locale));
    837 
    838         if (opt_norm) {
    839             javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION);
    840             icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION);
    841         }
    842 
    843         if (opt_french && opt_frenchoff) {
    844             System.err.println("Error: specified both -french and -frenchoff options.");
    845         }
    846 
    847         if (opt_french) {
    848             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true);
    849         }
    850         if (opt_frenchoff) {
    851             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false);
    852         }
    853 
    854         if (opt_lower) {
    855             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true);
    856         }
    857 
    858         if (opt_upper) {
    859             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true);
    860         }
    861 
    862         if (opt_shifted) {
    863             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true);
    864         }
    865 
    866         if (opt_level != 0) {
    867             switch (opt_level) {
    868                 case 1 :
    869                         javaCol.setStrength(java.text.Collator.PRIMARY);
    870                         icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY);
    871                         break;
    872                 case 2 :
    873                         javaCol.setStrength(java.text.Collator.SECONDARY);
    874                         icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY);
    875                         break;
    876                 case 3 :
    877                         javaCol.setStrength(java.text.Collator.TERTIARY);
    878                         icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY);
    879                         break;
    880                 case 4 :
    881                         icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY);
    882                         break;
    883                 case 5 :
    884                         javaCol.setStrength(java.text.Collator.IDENTICAL);
    885                         icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL);
    886                         break;
    887                 default:
    888                     System.err.println("-level param must be between 1 and 5\n");
    889                     System.exit(1);
    890             }
    891         }
    892         // load classes at least once before starting
    893         javaCol.compare("a", "b");
    894         icuCol.compare("a", "b");
    895     }
    896 
    897     static boolean processOptions(String[] args) {
    898         int argNum;
    899         for (argNum =0; argNum < args.length; argNum++) {
    900             for (int i = 0; i < options.length; i++) {
    901                 if (args[argNum].equalsIgnoreCase(options[i].name)) {
    902                     switch (options[i].type) {
    903                         case 0:
    904                                 options[i].value.delete(0, options[i].value.capacity()).append("true");
    905                                 break;
    906                         case 1:
    907                                 argNum++;
    908                                 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
    909                                     System.err.println("value expected for"+ options[i].name +"option.\n");
    910                                     return false;
    911                                 }
    912                                 try {
    913                                    /* int value =*/ Integer.parseInt(args[argNum]);
    914                                     options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
    915                                 } catch (NumberFormatException e) {
    916                                     System.err.println("Expected: a number value");
    917                                     return false;
    918                                 }
    919                                 break;
    920                         case 2:
    921                                 argNum++;
    922                                 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
    923                                     System.err.println("value expected for"+ options[i].name +"option.\n");
    924                                     return false;
    925                                 }
    926                                 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
    927                                 break;
    928                         default:
    929                                 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}");
    930                                 return false;
    931                     }
    932                 }
    933             }
    934         }
    935 
    936         opt_fName      = temp_opt_fName.toString();
    937         opt_locale     = temp_opt_locale.toString();
    938         opt_rules      = temp_opt_rules.toString();
    939         if (temp_opt_help.toString().equalsIgnoreCase("true")) {
    940             opt_help = true;
    941         }
    942         opt_loopCount  = Integer.parseInt(temp_opt_loopCount.toString());
    943         opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString());
    944         if (temp_opt_terse.toString().equalsIgnoreCase("true")) {
    945             opt_terse = true;
    946         }
    947         if (temp_opt_qsort.toString().equalsIgnoreCase("true")) {
    948             opt_qsort = true;
    949         }
    950         if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) {
    951             opt_binsearch = true;
    952         }
    953         if (temp_opt_icu.toString().equalsIgnoreCase("true")) {
    954             opt_icu = true;
    955         }
    956         if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) {
    957             opt_usekeys = true;
    958         }
    959         if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) {
    960             opt_strcmp = true;
    961         }
    962         if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) {
    963             opt_strcmpCPO = true;
    964         }
    965         if (temp_opt_keygen.toString().equalsIgnoreCase("true")) {
    966             opt_keygen = true;
    967         }
    968         if (temp_opt_norm.toString().equalsIgnoreCase("true")) {
    969             opt_norm = true;
    970         }
    971         if (temp_opt_french.toString().equalsIgnoreCase("true")) {
    972             opt_french = true;
    973         }
    974         if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) {
    975             opt_frenchoff = true;
    976         }
    977         if (temp_opt_shifted.toString().equalsIgnoreCase("true")) {
    978             opt_shifted = true;
    979         }
    980         if (temp_opt_lower.toString().equalsIgnoreCase("true")) {
    981             opt_lower = true;
    982         }
    983         if (temp_opt_upper.toString().equalsIgnoreCase("true")) {
    984             opt_upper = true;
    985         }
    986         if (temp_opt_case.toString().equalsIgnoreCase("true")) {
    987             opt_case = true;
    988         }
    989         opt_level      = Integer.parseInt(temp_opt_level.toString());
    990         if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) {
    991             opt_keyhist = true;
    992         }
    993         if (temp_opt_itertest.toString().equalsIgnoreCase("true")) {
    994             opt_itertest = true;
    995         }
    996         if (temp_opt_dump.toString().equalsIgnoreCase("true")) {
    997             opt_dump = true;
    998         }
    999         if (temp_opt_java.toString().equalsIgnoreCase("true")) {
   1000             opt_java = true;
   1001         }
   1002 
   1003         return true;
   1004     }
   1005 
   1006     /**
   1007      * Invoke the runtime's garbage collection procedure repeatedly
   1008      * until the amount of free memory stabilizes to within 10%.
   1009      */
   1010     private void callGC() {
   1011         // From "Java Platform Performance".  This is the procedure
   1012         // recommended by Javasoft.
   1013         try {
   1014             System.gc();
   1015             Thread.sleep(100);
   1016             System.runFinalization();
   1017             Thread.sleep(100);
   1018 
   1019             System.gc();
   1020             Thread.sleep(100);
   1021             System.runFinalization();
   1022             Thread.sleep(100);
   1023         } catch (InterruptedException e) {}
   1024     }
   1025 
   1026     //private boolean needCRLF = false;
   1027 
   1028     public int DOTMASK = 0x7FF;
   1029 
   1030     void dot(int i) {
   1031         if ((i % DOTMASK) == 0) {
   1032             //needCRLF = true;
   1033             // I do not know why print the dot here
   1034             //System.out.print('.');
   1035         }
   1036     }
   1037 
   1038     String readDataLine(BufferedReader br) throws Exception {
   1039         String originalLine = "";
   1040         String line = "";
   1041 
   1042         try {
   1043             line = originalLine = br.readLine();
   1044             if (line == null) return null;
   1045             if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
   1046             int commentPos = line.indexOf('#');
   1047             if (commentPos >= 0) line = line.substring(0, commentPos);
   1048             line = line.trim();
   1049         } catch (Exception e) {
   1050             throw new Exception("Line \"{0}\",  \"{1}\"" + originalLine + " "
   1051                                 + line + " " + e.toString());
   1052         }
   1053         return line;
   1054     }
   1055 
   1056     void readDataLines() {
   1057         // Read in  the input file.
   1058         //   File assumed to be utf-16.
   1059         //   Lines go onto heap buffers.  Global index array to line starts is created.
   1060         //   Lines themselves are null terminated.
   1061         //
   1062         FileInputStream fis = null;
   1063         InputStreamReader isr = null;
   1064         BufferedReader br = null;
   1065         try {
   1066             fis = new FileInputStream(opt_fName);
   1067             isr = new InputStreamReader(fis, "UTF-8");
   1068             br= new BufferedReader(isr, 32*1024);
   1069         } catch (Exception e) {
   1070             System.err.println("Error: File access exception: " + e.getMessage() + "!");
   1071             System.exit(2);
   1072         }
   1073 
   1074         int counter = 0;
   1075 
   1076         list = new ArrayList();
   1077         while (true) {
   1078             String line = null;
   1079             try {
   1080                 line = readDataLine(br);
   1081             } catch (Exception e) {
   1082                 System.err.println("Read File Error" + e.getMessage() + "!");
   1083                 System.exit(1);
   1084             }
   1085 
   1086             if (line == null) break;
   1087             if (line.length() == 0) continue;
   1088             dot(counter++);
   1089             list.add(line);
   1090         }
   1091         if (!opt_terse) {
   1092             System.out.println("Read " + counter + " lines in file");
   1093         }
   1094 
   1095         int size = list.size();
   1096         tests = new String [size];
   1097 
   1098         for (int i = 0; i < size; ++i) {
   1099             tests[i] = (String) list.get(i);
   1100         }
   1101     }
   1102 
   1103     /**
   1104      * Get the Collator Rules
   1105      * The Rule File format:
   1106      * 1. leading and trailing whitespaces will be omitted
   1107      * 2. lines with the leading character '#' will be treated as comments
   1108      * 3. File encoding is ISO-8859-1
   1109      */
   1110     String getCollationRules(String ruleFileName) {
   1111         FileInputStream fis = null;
   1112         InputStreamReader isr = null;
   1113         BufferedReader br = null;
   1114         try {
   1115             fis = new FileInputStream(opt_rules);
   1116             isr = new InputStreamReader(fis,"ISO-8859-1");
   1117             br= new BufferedReader(isr);
   1118         } catch (Exception e) {
   1119             System.err.println("Error: File access exception: " + e.getMessage() + "!");
   1120             System.exit(2);
   1121         }
   1122         String rules = "";
   1123         String line = "";
   1124         while (true) {
   1125             try {
   1126                 line = br.readLine();
   1127             } catch (IOException e) {
   1128                 System.err.println("Read File Error" + e.getMessage() + "!");
   1129                 System.exit(1);
   1130             }
   1131             if (line == null) {
   1132                 break;
   1133             }
   1134             int commentPos = line.indexOf('#');
   1135             if (commentPos >= 0) line = line.substring(0, commentPos);
   1136             line = line.trim();
   1137             rules = rules + line;
   1138         }
   1139         return rules;
   1140     }
   1141 
   1142     //Implementing qsort
   1143     void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) {
   1144         int low = fromIndex;
   1145         int high = toIndex;
   1146         String middle = "";
   1147         if (high > low) {
   1148             middle = src[ (low + high) / 2 ];
   1149             while(low <= high) {
   1150                 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
   1151                     ++low;
   1152                 }
   1153                 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
   1154                     --high;
   1155                 }
   1156                 if(low <= high) {
   1157                     String swap = src[low];
   1158                     src[low] = src[high];
   1159                     src[high] = swap;
   1160                     ++low;
   1161                     --high;
   1162                 }
   1163             }
   1164             if(fromIndex < high) {
   1165                 qSortImpl_java_usekeys(src, fromIndex, high, c);
   1166             }
   1167 
   1168             if(low < toIndex) {
   1169                 qSortImpl_java_usekeys(src, low, toIndex, c);
   1170             }
   1171         }
   1172     }
   1173 
   1174     void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) {
   1175         int low = fromIndex;
   1176         int high = toIndex;
   1177         String middle = "";
   1178         if (high > low) {
   1179             middle = src[ (low + high) / 2 ];
   1180             while(low <= high) {
   1181                 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
   1182                     ++low;
   1183                 }
   1184                 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
   1185                     --high;
   1186                 }
   1187                 if(low <= high) {
   1188                     String swap = src[low];
   1189                     src[low] = src[high];
   1190                     src[high] = swap;
   1191                     ++low;
   1192                     --high;
   1193                 }
   1194             }
   1195             if(fromIndex < high) {
   1196                 qSortImpl_icu_usekeys(src, fromIndex, high, c);
   1197             }
   1198 
   1199             if(low < toIndex) {
   1200                 qSortImpl_icu_usekeys(src, low, toIndex, c);
   1201             }
   1202         }
   1203     }
   1204 
   1205     void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) {
   1206         int low = fromIndex;
   1207         int high = toIndex;
   1208         String middle = "";
   1209         if (high > low) {
   1210             middle = src[ (low + high) / 2 ];
   1211             while(low <= high) {
   1212                 while((low < toIndex) && (compare(src[low], middle, c) < 0)) {
   1213                     ++low;
   1214                 }
   1215                 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) {
   1216                     --high;
   1217                 }
   1218                 if(low <= high) {
   1219                     String swap = src[low];
   1220                     src[low] = src[high];
   1221                     src[high] = swap;
   1222                     ++low;
   1223                     --high;
   1224                 }
   1225             }
   1226             if(fromIndex < high) {
   1227                 qSortImpl_nokeys(src, fromIndex, high, c);
   1228             }
   1229 
   1230             if(low < toIndex) {
   1231                 qSortImpl_nokeys(src, low, toIndex, c);
   1232             }
   1233         }
   1234     }
   1235 
   1236     int compare(String source, String target, Comparator c) {
   1237         globalCount++;
   1238         return c.compare(source, target);
   1239     }
   1240 
   1241     int compare(java.text.CollationKey source, java.text.CollationKey target) {
   1242         globalCount++;
   1243         return source.compareTo(target);
   1244     }
   1245 
   1246     int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) {
   1247         globalCount++;
   1248         return source.compareTo(target);
   1249     }
   1250 
   1251     //Class for command line option
   1252     static class OptionSpec {
   1253         String name;
   1254         int type;
   1255         StringBuffer value;
   1256         public OptionSpec(String name, int type, StringBuffer value) {
   1257             this.name = name;
   1258             this.type = type;
   1259             this.value = value;
   1260         }
   1261     }
   1262 }