1 /** 2 ******************************************************************************* 3 * Copyright (C) 2002-2007, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.dev.test.perf; 9 10 import com.ibm.icu.text.*; 11 import java.util.*; 12 import java.io.*; 13 import com.ibm.icu.impl.LocaleUtility; 14 15 public class CollationPerformanceTest { 16 static final String usageString = 17 "usage: collperf options...\n" 18 + "-help Display this message.\n" 19 + "-file file_name utf-16 format file of names.\n" 20 + "-locale name ICU locale to use. Default is en_US\n" 21 + "-rules file_name Collation rules file (overrides locale)\n" 22 //+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" 23 //+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" 24 //+ "-win Run test using Windows native services. (ICU is default)\n" 25 //+ "-unix Run test using Unix strxfrm, strcoll services.\n" 26 //+ "-uselen Use API with string lengths. Default is null-terminated strings\n" 27 + "-usekeys Run tests using sortkeys rather than strcoll\n" 28 + "-strcmp Run tests using u_strcmp rather than strcoll\n" 29 + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n" 30 + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" 31 + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" 32 + " under test at each call point. For measuring test overhead.\n" 33 + "-terse Terse numbers-only output. Intended for use by scripts.\n" 34 + "-french French accent ordering\n" 35 + "-frenchoff No French accent ordering (for use with French locales.)\n" 36 + "-norm Normalizing mode on\n" 37 + "-shifted Shifted mode\n" 38 + "-lower Lower case first\n" 39 + "-upper Upper case first\n" 40 + "-case Enable separate case level\n" 41 + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 42 + "-keyhist Produce a table sort key size vs. string length\n" 43 + "-binsearch Binary Search timing test\n" 44 + "-keygen Sort Key Generation timing test\n" 45 + "-qsort Quicksort timing test\n" 46 + "-iter Iteration Performance Test\n" 47 + "-dump Display strings, sort keys and CEs.\n" 48 + "-java Run test using java.text.Collator.\n"; 49 50 //enum {FLAG, NUM, STRING} type; 51 static StringBuffer temp_opt_fName = new StringBuffer(""); 52 static StringBuffer temp_opt_locale = new StringBuffer("en_US"); 53 //static StringBuffer temp_opt_langid = new StringBuffer("0"); // Defaults to value corresponding to opt_locale. 54 static StringBuffer temp_opt_rules = new StringBuffer(""); 55 static StringBuffer temp_opt_help = new StringBuffer(""); 56 static StringBuffer temp_opt_loopCount = new StringBuffer("1"); 57 static StringBuffer temp_opt_iLoopCount = new StringBuffer("1"); 58 static StringBuffer temp_opt_terse = new StringBuffer("false"); 59 static StringBuffer temp_opt_qsort = new StringBuffer(""); 60 static StringBuffer temp_opt_binsearch = new StringBuffer(""); 61 static StringBuffer temp_opt_icu = new StringBuffer("true"); 62 //static StringBuffer opt_win = new StringBuffer(""); // Run with Windows native functions. 63 //static StringBuffer opt_unix = new StringBuffer(""); // Run with UNIX strcoll, strxfrm functions. 64 //static StringBuffer opt_uselen = new StringBuffer(""); 65 static StringBuffer temp_opt_usekeys = new StringBuffer(""); 66 static StringBuffer temp_opt_strcmp = new StringBuffer(""); 67 static StringBuffer temp_opt_strcmpCPO = new StringBuffer(""); 68 static StringBuffer temp_opt_norm = new StringBuffer(""); 69 static StringBuffer temp_opt_keygen = new StringBuffer(""); 70 static StringBuffer temp_opt_french = new StringBuffer(""); 71 static StringBuffer temp_opt_frenchoff = new StringBuffer(""); 72 static StringBuffer temp_opt_shifted = new StringBuffer(""); 73 static StringBuffer temp_opt_lower = new StringBuffer(""); 74 static StringBuffer temp_opt_upper = new StringBuffer(""); 75 static StringBuffer temp_opt_case = new StringBuffer(""); 76 static StringBuffer temp_opt_level = new StringBuffer("0"); 77 static StringBuffer temp_opt_keyhist = new StringBuffer(""); 78 static StringBuffer temp_opt_itertest = new StringBuffer(""); 79 static StringBuffer temp_opt_dump = new StringBuffer(""); 80 static StringBuffer temp_opt_java = new StringBuffer(""); 81 82 83 static String opt_fName = ""; 84 static String opt_locale = "en_US"; 85 //static int opt_langid = 0; // Defaults to value corresponding to opt_locale. 86 static String opt_rules = ""; 87 static boolean opt_help = false; 88 static int opt_loopCount = 1; 89 static int opt_iLoopCount = 1; 90 static boolean opt_terse = false; 91 static boolean opt_qsort = false; 92 static boolean opt_binsearch = false; 93 static boolean opt_icu = true; 94 //static boolean opt_win = false; // Run with Windows native functions. 95 //static boolean opt_unix = false; // Run with UNIX strcoll, strxfrm functions. 96 //static boolean opt_uselen = false; 97 static boolean opt_usekeys = false; 98 static boolean opt_strcmp = false; 99 static boolean opt_strcmpCPO = false; 100 static boolean opt_norm = false; 101 static boolean opt_keygen = false; 102 static boolean opt_french = false; 103 static boolean opt_frenchoff = false; 104 static boolean opt_shifted = false; 105 static boolean opt_lower = false; 106 static boolean opt_upper = false; 107 static boolean opt_case = false; 108 static int opt_level = 0; 109 static boolean opt_keyhist = false; 110 static boolean opt_itertest = false; 111 static boolean opt_dump = false; 112 static boolean opt_java = false; 113 114 static OptionSpec[] options = { 115 new OptionSpec("-file", 2, temp_opt_fName), 116 new OptionSpec("-locale", 2, temp_opt_locale), 117 //new OptionSpec("-langid", 1, temp_opt_langid), 118 new OptionSpec("-rules", 2, temp_opt_rules), 119 new OptionSpec("-qsort", 0, temp_opt_qsort), 120 new OptionSpec("-binsearch", 0, temp_opt_binsearch), 121 new OptionSpec("-iter", 0, temp_opt_itertest), 122 //new OptionSpec("-win", 0, temp_opt_win), 123 //new OptionSpec("-unix", 0, temp_opt_unix), 124 //new OptionSpec("-uselen", 0, temp_opt_uselen), 125 new OptionSpec("-usekeys", 0, temp_opt_usekeys), 126 new OptionSpec("-strcmp", 0, temp_opt_strcmp), 127 new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO), 128 new OptionSpec("-norm", 0, temp_opt_norm), 129 new OptionSpec("-french", 0, temp_opt_french), 130 new OptionSpec("-frenchoff", 0, temp_opt_frenchoff), 131 new OptionSpec("-shifted", 0, temp_opt_shifted), 132 new OptionSpec("-lower", 0, temp_opt_lower), 133 new OptionSpec("-upper", 0, temp_opt_upper), 134 new OptionSpec("-case", 0, temp_opt_case), 135 new OptionSpec("-level", 1, temp_opt_level), 136 new OptionSpec("-keyhist", 0, temp_opt_keyhist), 137 new OptionSpec("-keygen", 0, temp_opt_keygen), 138 new OptionSpec("-loop", 1, temp_opt_loopCount), 139 new OptionSpec("-iloop", 1, temp_opt_iLoopCount), 140 new OptionSpec("-terse", 0, temp_opt_terse), 141 new OptionSpec("-dump", 0, temp_opt_dump), 142 new OptionSpec("-help", 0, temp_opt_help), 143 new OptionSpec("-?", 0, temp_opt_help), 144 new OptionSpec("-java", 0, temp_opt_java), 145 }; 146 147 static java.text.Collator javaCol = null; 148 static com.ibm.icu.text.Collator icuCol = null; 149 static NumberFormat nf = null; 150 static NumberFormat percent = null; 151 ArrayList list = null; 152 String[] tests = null; 153 int globalCount = 0; 154 155 public static void main(String[] args) { 156 CollationPerformanceTest collPerf = new CollationPerformanceTest(); 157 if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) { 158 System.out.println(usageString); 159 System.exit(1); 160 } 161 162 nf = NumberFormat.getInstance(); 163 nf.setMaximumFractionDigits(2); 164 percent = NumberFormat.getPercentInstance(); 165 166 collPerf.setOptions(); 167 collPerf.readDataLines(); 168 169 if (opt_dump) { 170 collPerf.doDump(); 171 } 172 173 if (opt_qsort) { 174 collPerf.doQSort(); 175 } 176 177 if (opt_binsearch) { 178 collPerf.doBinarySearch(); 179 } 180 181 if (opt_keygen) { 182 collPerf.doKeyGen(); 183 } 184 185 if (opt_keyhist) { 186 collPerf.doKeyHist(); 187 } 188 189 if (opt_itertest) { 190 collPerf.doIterTest(); 191 } 192 193 } 194 195 //Dump file lines, CEs, Sort Keys if requested 196 void doDump() { 197 for(int i = 0; i < list.size(); i++) { 198 //print the line 199 String line = com.ibm.icu.impl.Utility.escape((String)list.get(i)); 200 System.out.println(line); 201 202 System.out.print(" CEs: "); 203 CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line); 204 int ce; 205 int j = 0; 206 for(;;) { 207 ce = CEiter.next(); 208 if (ce == CollationElementIterator.NULLORDER) { 209 break; 210 } 211 //System.out.print(); 212 String outStr = Integer.toHexString(ce); 213 for (int len = 0; len < 8 - outStr.length(); len++) { 214 outStr ='0' + outStr; 215 } 216 System.out.print(outStr + " "); 217 if(++j >8) { 218 System.out.print("\n "); 219 j = 0; 220 } 221 } 222 223 System.out.print("\n ICU Sort Key: "); 224 CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line); 225 byte[] cks = ck.toByteArray(); 226 j = 0; 227 for(int k = 0; k < cks.length; k++) { 228 String outStr = Integer.toHexString(cks[k]); 229 switch (outStr.length()) { 230 case 1: outStr = '0' + outStr; 231 break; 232 case 8: outStr = outStr.substring(6); 233 break; 234 } 235 System.out.print(outStr); 236 System.out.print(" "); 237 j++; 238 if(j > 0 && j % 20 == 0) { 239 System.out.print("\n "); 240 } 241 } 242 System.out.println("\n"); 243 } 244 } 245 246 /**--------------------------------------------------------------------------------------- 247 * 248 * doQSort() The quick sort timing test. 249 * 250 *--------------------------------------------------------------------------------------- 251 */ 252 void doQSort() { 253 callGC(); 254 //String[] sortTests = (String[]) tests.clone(); 255 //Adjust loop count to compensate for file size. QSort should be nlog(n) 256 double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length)); 257 258 if(opt_usekeys) { 259 dLoopCount *= 5; 260 } 261 262 int adj_loopCount = (int)dLoopCount; 263 if(adj_loopCount < 1) { 264 adj_loopCount = 1; 265 } 266 267 globalCount = 0; 268 long startTime = 0; 269 long endTime = 0; 270 if (opt_icu && opt_usekeys) { 271 startTime = System.currentTimeMillis(); 272 qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol); 273 endTime = System.currentTimeMillis(); 274 } 275 if (opt_icu && !opt_usekeys){ 276 startTime = System.currentTimeMillis(); 277 qSortImpl_nokeys(tests, 0, tests.length -1, icuCol); 278 endTime = System.currentTimeMillis(); 279 } 280 if (opt_java && opt_usekeys) { 281 startTime = System.currentTimeMillis(); 282 qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol); 283 endTime = System.currentTimeMillis(); 284 } 285 if (opt_java && !opt_usekeys){ 286 startTime = System.currentTimeMillis(); 287 qSortImpl_nokeys(tests, 0, tests.length -1, javaCol); 288 endTime = System.currentTimeMillis(); 289 } 290 long elapsedTime = endTime - startTime; 291 int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0)); 292 if (!opt_terse) { 293 System.out.println("qsort: total # of string compares = " + globalCount); 294 System.out.println("qsort: time per compare = " + ns); 295 } else { 296 System.out.println(ns); 297 } 298 } 299 300 /**--------------------------------------------------------------------------------------- 301 * 302 * doBinarySearch() Binary Search timing test. Each name from the list 303 * is looked up in the full sorted list of names. 304 * 305 *--------------------------------------------------------------------------------------- 306 */ 307 void doBinarySearch() { 308 callGC(); 309 int gCount = 0; 310 int loops = 0; 311 double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length); 312 long startTime = 0; 313 long elapsedTime = 0; 314 315 if(opt_usekeys) { 316 dLoopCount *= 5; 317 } 318 int adj_loopCount = (int)dLoopCount; 319 if(adj_loopCount < 1) { 320 adj_loopCount = 1; 321 } 322 323 //int opt2 = 0; 324 325 for(;;) { //not really a loop, just allows "break" to work, to simplify 326 //inadvertantly running more than one test through here 327 if(opt_strcmp) { 328 int r = 0; 329 startTime = System.currentTimeMillis(); 330 for(loops = 0; loops < adj_loopCount; loops++) { 331 for (int j = 0; j < tests.length; j++) { 332 int hi = tests.length-1; 333 int lo = 0; 334 int guess = -1; 335 for(;;) { 336 int newGuess = (hi + lo) / 2; 337 if(newGuess == guess){ 338 break; 339 } 340 guess = newGuess; 341 r = tests[j].compareTo(tests[guess]); 342 gCount++; 343 if(r == 0) { 344 break; 345 } 346 if (r < 0) { 347 hi = guess; 348 } else { 349 lo = guess; 350 } 351 } 352 } 353 } 354 elapsedTime = System.currentTimeMillis() - startTime; 355 break; 356 } 357 358 if (opt_strcmpCPO) { 359 int r = 0; 360 startTime = System.currentTimeMillis(); 361 for(loops = 0; loops < adj_loopCount; loops++) { 362 for (int j = 0; j < tests.length; j++) { 363 int hi = tests.length-1; 364 int lo = 0; 365 int guess = -1; 366 for(;;) { 367 int newGuess = (hi + lo) / 2; 368 if(newGuess == guess){ 369 break; 370 } 371 guess = newGuess; 372 r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER); 373 gCount++; 374 if(r == 0) { 375 break; 376 } 377 if (r < 0) { 378 hi = guess; 379 } else { 380 lo = guess; 381 } 382 } 383 } 384 } 385 elapsedTime = System.currentTimeMillis() - startTime; 386 break; 387 } 388 389 if (opt_icu) { 390 391 int r = 0; 392 startTime = System.currentTimeMillis(); 393 for (loops = 0; loops < adj_loopCount; loops++) { 394 for (int j = 0; j < tests.length; j++) { 395 int hi = tests.length - 1; 396 int lo = 0; 397 int guess = -1; 398 for (;;) { 399 int newGuess = (hi + lo) / 2; 400 if (newGuess == guess) { 401 break; 402 } 403 guess = newGuess; 404 if (opt_usekeys) { 405 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]); 406 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]); 407 r = sortKey1.compareTo(sortKey2); 408 gCount ++; 409 } else { 410 r = icuCol.compare(tests[j], tests[guess]); 411 gCount++; 412 } 413 if (r == 0) { 414 break; 415 } 416 if (r < 0) { 417 hi = guess; 418 } else { 419 lo = guess; 420 } 421 } 422 } 423 } 424 elapsedTime = System.currentTimeMillis() - startTime; 425 break; 426 } 427 if (opt_java) { 428 429 int r = 0; 430 startTime = System.currentTimeMillis(); 431 for (loops = 0; loops < adj_loopCount; loops++) { 432 for (int j = 0; j < tests.length; j++) { 433 int hi = tests.length - 1; 434 int lo = 0; 435 int guess = -1; 436 for (;;) { 437 int newGuess = (hi + lo) / 2; 438 if (newGuess == guess) { 439 break; 440 } 441 guess = newGuess; 442 if (opt_usekeys) { 443 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]); 444 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]); 445 r = sortKey1.compareTo(sortKey2); 446 gCount ++; 447 } else { 448 r = javaCol.compare(tests[j], tests[guess]); 449 gCount++; 450 } 451 if (r == 0) { 452 break; 453 } 454 if (r < 0) { 455 hi = guess; 456 } else { 457 lo = guess; 458 } 459 } 460 } 461 } 462 elapsedTime = System.currentTimeMillis() - startTime; 463 break; 464 } 465 break; 466 } 467 int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount); 468 if (!opt_terse) { 469 System.out.println("binary search: total # of string compares = " + gCount); 470 System.out.println("binary search: compares per loop = " + gCount / loops); 471 System.out.println("binary search: time per compare = " + ns); 472 } else { 473 System.out.println(ns); 474 } 475 } 476 477 /**--------------------------------------------------------------------------------------- 478 * 479 * doKeyGen() Key Generation Timing Test 480 * 481 *--------------------------------------------------------------------------------------- 482 */ 483 void doKeyGen() { 484 callGC(); 485 486 // Adjust loop count to compensate for file size. Should be order n 487 double dLoopCount = opt_loopCount * (1000.0 / (double)list.size()); 488 int adj_loopCount = (int)dLoopCount; 489 if (adj_loopCount < 1) adj_loopCount = 1; 490 491 long startTime = 0; 492 long totalKeyLen = 0; 493 long totalChars = 0; 494 if (opt_java) { 495 startTime = System.currentTimeMillis(); 496 for (int loops=0; loops<adj_loopCount; loops++) { 497 for (int line=0; line < tests.length; line++) { 498 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 499 totalChars += tests[line].length(); 500 byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray(); 501 totalKeyLen += sortKey.length; 502 } 503 } 504 } 505 } else { 506 startTime = System.currentTimeMillis(); 507 for (int loops=0; loops<adj_loopCount; loops++) { 508 for (int line=0; line < tests.length; line++) { 509 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 510 totalChars += tests[line].length(); 511 byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray(); 512 totalKeyLen += sortKey.length; 513 } 514 } 515 } 516 } 517 518 long elapsedTime = System.currentTimeMillis() - startTime; 519 long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0)); 520 if (!opt_terse) { 521 System.out.println("Sort Key Generation: total # of keys =" + adj_loopCount * tests.length); 522 System.out.println("Sort Key Generation: time per key = " + ns + " ns"); 523 System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0))); 524 } 525 else { 526 System.out.print(ns + ", "); 527 System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", "); 528 } 529 } 530 531 /**--------------------------------------------------------------------------------------- 532 * 533 * doKeyHist() Output a table of data for average sort key size vs. string length. 534 * 535 *--------------------------------------------------------------------------------------- 536 */ 537 void doKeyHist() { 538 callGC(); 539 int maxLen = 0; 540 541 // Find the maximum string length 542 for (int i = 0; i < tests.length; i++) { 543 if (tests[i].length() > maxLen) maxLen = tests[i].length(); 544 } 545 546 int[] accumulatedLen = new int[maxLen + 1]; 547 int[] numKeysOfSize = new int[maxLen + 1]; 548 549 // Fill the arrays... 550 for (int i = 0; i < tests.length; i++) { 551 int len = tests[i].length(); 552 accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length; 553 numKeysOfSize[len] += 1; 554 } 555 556 // And write out averages 557 System.out.println("String Length, Avg Key Length, Avg Key Len per char"); 558 for (int i = 1; i <= maxLen; i++) { 559 if (numKeysOfSize[i] > 0) { 560 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", " 561 + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0))); 562 } 563 } 564 565 } 566 567 void doForwardIterTest() { 568 callGC(); 569 System.out.print("\n\nPerforming forward iteration performance test with "); 570 System.out.println("performance test on strings from file -----------"); 571 572 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(""); 573 574 int gCount = 0; 575 int count = 0; 576 long startTime = System.currentTimeMillis(); 577 while (count < opt_loopCount) { 578 int linecount = 0; 579 while (linecount < tests.length) { 580 String str = tests[linecount]; 581 iter.setText(str); 582 while (iter.next() != CollationElementIterator.NULLORDER) { 583 gCount++; 584 } 585 linecount ++; 586 } 587 count ++; 588 } 589 590 long elapsedTime = System.currentTimeMillis() - startTime; 591 System.out.println("elapsedTime " + elapsedTime + " ms"); 592 593 // empty loop recalculation 594 count = 0; 595 startTime = System.currentTimeMillis(); 596 while (count < opt_loopCount) { 597 int linecount = 0; 598 while (linecount < tests.length) { 599 String str = tests[linecount]; 600 iter.setText(str); 601 linecount ++; 602 } 603 count ++; 604 } 605 elapsedTime -= (System.currentTimeMillis() - startTime); 606 System.out.println("elapsedTime " + elapsedTime + " ms"); 607 608 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 609 System.out.println("Total number of strings compared " + tests.length 610 + "in " + opt_loopCount + " loops"); 611 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns); 612 System.out.println("performance test on skipped-5 concatenated strings from file -----------"); 613 614 String totalStr = ""; 615 int strlen = 0; 616 // appending all the strings 617 int linecount = 0; 618 while (linecount < tests.length) { 619 totalStr += tests[linecount]; 620 strlen += tests[linecount].length(); 621 linecount ++; 622 } 623 System.out.println("Total size of strings " + strlen); 624 625 gCount = 0; 626 count = 0; 627 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr); 628 strlen -= 5; // any left over characters are not iterated, 629 // this is to ensure the backwards and forwards iterators 630 // gets the same position 631 int strindex = 0; 632 startTime = System.currentTimeMillis(); 633 while (count < opt_loopCount) { 634 int count5 = 5; 635 strindex = 0; 636 iter.setOffset(strindex); 637 while (true) { 638 if (iter.next() == CollationElementIterator.NULLORDER) { 639 break; 640 } 641 gCount++; 642 count5 --; 643 if (count5 == 0) { 644 strindex += 10; 645 if (strindex > strlen) { 646 break; 647 } 648 iter.setOffset(strindex); 649 count5 = 5; 650 } 651 } 652 count ++; 653 } 654 655 elapsedTime = System.currentTimeMillis() - startTime; 656 System.out.println("elapsedTime " + elapsedTime); 657 658 // empty loop recalculation 659 int tempgCount = 0; 660 count = 0; 661 startTime = System.currentTimeMillis(); 662 while (count < opt_loopCount) { 663 int count5 = 5; 664 strindex = 0; 665 iter.setOffset(strindex); 666 while (true) { 667 tempgCount ++; 668 count5 --; 669 if (count5 == 0) { 670 strindex += 10; 671 if (strindex > strlen) { 672 break; 673 } 674 iter.setOffset(strindex); 675 count5 = 5; 676 } 677 } 678 count ++; 679 } 680 elapsedTime -= (System.currentTimeMillis() - startTime); 681 System.out.println("elapsedTime " + elapsedTime); 682 683 System.out.println("gCount " + gCount); 684 ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 685 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns); 686 } 687 688 void doBackwardIterTest() { 689 System.out.print("\n\nPerforming backward iteration performance test with "); 690 System.out.println("performance test on strings from file -----------\n"); 691 692 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(""); 693 694 int gCount = 0; 695 int count = 0; 696 long startTime = System.currentTimeMillis(); 697 while (count < opt_loopCount) { 698 int linecount = 0; 699 while (linecount < tests.length) { 700 String str = tests[linecount]; 701 iter.setText(str); 702 while (iter.previous() != CollationElementIterator.NULLORDER) { 703 gCount++; 704 } 705 linecount ++; 706 } 707 count ++; 708 } 709 long elapsedTime = System.currentTimeMillis() - startTime; 710 System.out.println("elapsedTime " + elapsedTime + " ms"); 711 712 // empty loop recalculation 713 count = 0; 714 startTime = System.currentTimeMillis(); 715 while (count < opt_loopCount) { 716 int linecount = 0; 717 while (linecount < tests.length) { 718 String str = tests[linecount]; 719 iter.setText(str); 720 linecount ++; 721 } 722 count ++; 723 } 724 elapsedTime -= (System.currentTimeMillis() - startTime); 725 System.out.println("elapsedTime " + elapsedTime + " ms"); 726 727 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 728 System.out.println("Total number of strings compared " + tests.length 729 + "in " + opt_loopCount + " loops"); 730 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns); 731 System.out.println("performance test on skipped-5 concatenated strings from file -----------"); 732 733 String totalStr = ""; 734 int strlen = 0; 735 // appending all the strings 736 int linecount = 0; 737 while (linecount < tests.length) { 738 totalStr += tests[linecount]; 739 strlen += tests[linecount].length(); 740 linecount ++; 741 } 742 System.out.println("Total size of strings " + strlen); 743 744 gCount = 0; 745 count = 0; 746 747 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr); 748 int strindex = 0; 749 startTime = System.currentTimeMillis(); 750 while (count < opt_loopCount) { 751 int count5 = 5; 752 strindex = 5; 753 iter.setOffset(strindex); 754 while (true) { 755 if (iter.previous() == CollationElementIterator.NULLORDER) { 756 break; 757 } 758 gCount ++; 759 count5 --; 760 if (count5 == 0) { 761 strindex += 10; 762 if (strindex > strlen) { 763 break; 764 } 765 iter.setOffset(strindex); 766 count5 = 5; 767 } 768 } 769 count ++; 770 } 771 772 elapsedTime = System.currentTimeMillis() - startTime; 773 System.out.println("elapsedTime " + elapsedTime); 774 775 // empty loop recalculation 776 count = 0; 777 int tempgCount = 0; 778 startTime = System.currentTimeMillis(); 779 while (count < opt_loopCount) { 780 int count5 = 5; 781 strindex = 5; 782 iter.setOffset(strindex); 783 while (true) { 784 tempgCount ++; 785 count5 --; 786 if (count5 == 0) { 787 strindex += 10; 788 if (strindex > strlen) { 789 break; 790 } 791 iter.setOffset(strindex); 792 count5 = 5; 793 } 794 } 795 count ++; 796 } 797 elapsedTime -= (System.currentTimeMillis() - startTime); 798 System.out.println("elapsedTime " + elapsedTime); 799 800 System.out.println("gCount " + gCount); 801 ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 802 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns); 803 } 804 805 806 /**--------------------------------------------------------------------------------------- 807 * 808 * doIterTest() Iteration test 809 * 810 *--------------------------------------------------------------------------------------- 811 */ 812 void doIterTest() { 813 doForwardIterTest(); 814 doBackwardIterTest(); 815 } 816 817 void setOptions() { 818 819 if (opt_java) { 820 opt_icu = false; 821 } 822 823 if (opt_rules.length() != 0) { 824 try { 825 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules)); 826 } catch (Exception e) { 827 System.out.println("Cannot open rules:" + e.getMessage()); 828 System.exit(1); 829 } 830 } else { 831 icuCol = com.ibm.icu.text.Collator.getInstance( 832 LocaleUtility.getLocaleFromName(opt_locale)); 833 } 834 835 javaCol = java.text.Collator.getInstance( 836 LocaleUtility.getLocaleFromName(opt_locale)); 837 838 if (opt_norm) { 839 javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION); 840 icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION); 841 } 842 843 if (opt_french && opt_frenchoff) { 844 System.err.println("Error: specified both -french and -frenchoff options."); 845 } 846 847 if (opt_french) { 848 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true); 849 } 850 if (opt_frenchoff) { 851 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false); 852 } 853 854 if (opt_lower) { 855 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true); 856 } 857 858 if (opt_upper) { 859 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true); 860 } 861 862 if (opt_shifted) { 863 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true); 864 } 865 866 if (opt_level != 0) { 867 switch (opt_level) { 868 case 1 : 869 javaCol.setStrength(java.text.Collator.PRIMARY); 870 icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY); 871 break; 872 case 2 : 873 javaCol.setStrength(java.text.Collator.SECONDARY); 874 icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY); 875 break; 876 case 3 : 877 javaCol.setStrength(java.text.Collator.TERTIARY); 878 icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY); 879 break; 880 case 4 : 881 icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY); 882 break; 883 case 5 : 884 javaCol.setStrength(java.text.Collator.IDENTICAL); 885 icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL); 886 break; 887 default: 888 System.err.println("-level param must be between 1 and 5\n"); 889 System.exit(1); 890 } 891 } 892 // load classes at least once before starting 893 javaCol.compare("a", "b"); 894 icuCol.compare("a", "b"); 895 } 896 897 static boolean processOptions(String[] args) { 898 int argNum; 899 for (argNum =0; argNum < args.length; argNum++) { 900 for (int i = 0; i < options.length; i++) { 901 if (args[argNum].equalsIgnoreCase(options[i].name)) { 902 switch (options[i].type) { 903 case 0: 904 options[i].value.delete(0, options[i].value.capacity()).append("true"); 905 break; 906 case 1: 907 argNum++; 908 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) { 909 System.err.println("value expected for"+ options[i].name +"option.\n"); 910 return false; 911 } 912 try { 913 /* int value =*/ Integer.parseInt(args[argNum]); 914 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]); 915 } catch (NumberFormatException e) { 916 System.err.println("Expected: a number value"); 917 return false; 918 } 919 break; 920 case 2: 921 argNum++; 922 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) { 923 System.err.println("value expected for"+ options[i].name +"option.\n"); 924 return false; 925 } 926 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]); 927 break; 928 default: 929 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}"); 930 return false; 931 } 932 } 933 } 934 } 935 936 opt_fName = temp_opt_fName.toString(); 937 opt_locale = temp_opt_locale.toString(); 938 opt_rules = temp_opt_rules.toString(); 939 if (temp_opt_help.toString().equalsIgnoreCase("true")) { 940 opt_help = true; 941 } 942 opt_loopCount = Integer.parseInt(temp_opt_loopCount.toString()); 943 opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString()); 944 if (temp_opt_terse.toString().equalsIgnoreCase("true")) { 945 opt_terse = true; 946 } 947 if (temp_opt_qsort.toString().equalsIgnoreCase("true")) { 948 opt_qsort = true; 949 } 950 if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) { 951 opt_binsearch = true; 952 } 953 if (temp_opt_icu.toString().equalsIgnoreCase("true")) { 954 opt_icu = true; 955 } 956 if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) { 957 opt_usekeys = true; 958 } 959 if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) { 960 opt_strcmp = true; 961 } 962 if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) { 963 opt_strcmpCPO = true; 964 } 965 if (temp_opt_keygen.toString().equalsIgnoreCase("true")) { 966 opt_keygen = true; 967 } 968 if (temp_opt_norm.toString().equalsIgnoreCase("true")) { 969 opt_norm = true; 970 } 971 if (temp_opt_french.toString().equalsIgnoreCase("true")) { 972 opt_french = true; 973 } 974 if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) { 975 opt_frenchoff = true; 976 } 977 if (temp_opt_shifted.toString().equalsIgnoreCase("true")) { 978 opt_shifted = true; 979 } 980 if (temp_opt_lower.toString().equalsIgnoreCase("true")) { 981 opt_lower = true; 982 } 983 if (temp_opt_upper.toString().equalsIgnoreCase("true")) { 984 opt_upper = true; 985 } 986 if (temp_opt_case.toString().equalsIgnoreCase("true")) { 987 opt_case = true; 988 } 989 opt_level = Integer.parseInt(temp_opt_level.toString()); 990 if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) { 991 opt_keyhist = true; 992 } 993 if (temp_opt_itertest.toString().equalsIgnoreCase("true")) { 994 opt_itertest = true; 995 } 996 if (temp_opt_dump.toString().equalsIgnoreCase("true")) { 997 opt_dump = true; 998 } 999 if (temp_opt_java.toString().equalsIgnoreCase("true")) { 1000 opt_java = true; 1001 } 1002 1003 return true; 1004 } 1005 1006 /** 1007 * Invoke the runtime's garbage collection procedure repeatedly 1008 * until the amount of free memory stabilizes to within 10%. 1009 */ 1010 private void callGC() { 1011 // From "Java Platform Performance". This is the procedure 1012 // recommended by Javasoft. 1013 try { 1014 System.gc(); 1015 Thread.sleep(100); 1016 System.runFinalization(); 1017 Thread.sleep(100); 1018 1019 System.gc(); 1020 Thread.sleep(100); 1021 System.runFinalization(); 1022 Thread.sleep(100); 1023 } catch (InterruptedException e) {} 1024 } 1025 1026 //private boolean needCRLF = false; 1027 1028 public int DOTMASK = 0x7FF; 1029 1030 void dot(int i) { 1031 if ((i % DOTMASK) == 0) { 1032 //needCRLF = true; 1033 // I do not know why print the dot here 1034 //System.out.print('.'); 1035 } 1036 } 1037 1038 String readDataLine(BufferedReader br) throws Exception { 1039 String originalLine = ""; 1040 String line = ""; 1041 1042 try { 1043 line = originalLine = br.readLine(); 1044 if (line == null) return null; 1045 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1); 1046 int commentPos = line.indexOf('#'); 1047 if (commentPos >= 0) line = line.substring(0, commentPos); 1048 line = line.trim(); 1049 } catch (Exception e) { 1050 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " " 1051 + line + " " + e.toString()); 1052 } 1053 return line; 1054 } 1055 1056 void readDataLines() { 1057 // Read in the input file. 1058 // File assumed to be utf-16. 1059 // Lines go onto heap buffers. Global index array to line starts is created. 1060 // Lines themselves are null terminated. 1061 // 1062 FileInputStream fis = null; 1063 InputStreamReader isr = null; 1064 BufferedReader br = null; 1065 try { 1066 fis = new FileInputStream(opt_fName); 1067 isr = new InputStreamReader(fis, "UTF-8"); 1068 br= new BufferedReader(isr, 32*1024); 1069 } catch (Exception e) { 1070 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 1071 System.exit(2); 1072 } 1073 1074 int counter = 0; 1075 1076 list = new ArrayList(); 1077 while (true) { 1078 String line = null; 1079 try { 1080 line = readDataLine(br); 1081 } catch (Exception e) { 1082 System.err.println("Read File Error" + e.getMessage() + "!"); 1083 System.exit(1); 1084 } 1085 1086 if (line == null) break; 1087 if (line.length() == 0) continue; 1088 dot(counter++); 1089 list.add(line); 1090 } 1091 if (!opt_terse) { 1092 System.out.println("Read " + counter + " lines in file"); 1093 } 1094 1095 int size = list.size(); 1096 tests = new String [size]; 1097 1098 for (int i = 0; i < size; ++i) { 1099 tests[i] = (String) list.get(i); 1100 } 1101 } 1102 1103 /** 1104 * Get the Collator Rules 1105 * The Rule File format: 1106 * 1. leading and trailing whitespaces will be omitted 1107 * 2. lines with the leading character '#' will be treated as comments 1108 * 3. File encoding is ISO-8859-1 1109 */ 1110 String getCollationRules(String ruleFileName) { 1111 FileInputStream fis = null; 1112 InputStreamReader isr = null; 1113 BufferedReader br = null; 1114 try { 1115 fis = new FileInputStream(opt_rules); 1116 isr = new InputStreamReader(fis,"ISO-8859-1"); 1117 br= new BufferedReader(isr); 1118 } catch (Exception e) { 1119 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 1120 System.exit(2); 1121 } 1122 String rules = ""; 1123 String line = ""; 1124 while (true) { 1125 try { 1126 line = br.readLine(); 1127 } catch (IOException e) { 1128 System.err.println("Read File Error" + e.getMessage() + "!"); 1129 System.exit(1); 1130 } 1131 if (line == null) { 1132 break; 1133 } 1134 int commentPos = line.indexOf('#'); 1135 if (commentPos >= 0) line = line.substring(0, commentPos); 1136 line = line.trim(); 1137 rules = rules + line; 1138 } 1139 return rules; 1140 } 1141 1142 //Implementing qsort 1143 void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) { 1144 int low = fromIndex; 1145 int high = toIndex; 1146 String middle = ""; 1147 if (high > low) { 1148 middle = src[ (low + high) / 2 ]; 1149 while(low <= high) { 1150 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) { 1151 ++low; 1152 } 1153 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) { 1154 --high; 1155 } 1156 if(low <= high) { 1157 String swap = src[low]; 1158 src[low] = src[high]; 1159 src[high] = swap; 1160 ++low; 1161 --high; 1162 } 1163 } 1164 if(fromIndex < high) { 1165 qSortImpl_java_usekeys(src, fromIndex, high, c); 1166 } 1167 1168 if(low < toIndex) { 1169 qSortImpl_java_usekeys(src, low, toIndex, c); 1170 } 1171 } 1172 } 1173 1174 void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) { 1175 int low = fromIndex; 1176 int high = toIndex; 1177 String middle = ""; 1178 if (high > low) { 1179 middle = src[ (low + high) / 2 ]; 1180 while(low <= high) { 1181 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) { 1182 ++low; 1183 } 1184 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) { 1185 --high; 1186 } 1187 if(low <= high) { 1188 String swap = src[low]; 1189 src[low] = src[high]; 1190 src[high] = swap; 1191 ++low; 1192 --high; 1193 } 1194 } 1195 if(fromIndex < high) { 1196 qSortImpl_icu_usekeys(src, fromIndex, high, c); 1197 } 1198 1199 if(low < toIndex) { 1200 qSortImpl_icu_usekeys(src, low, toIndex, c); 1201 } 1202 } 1203 } 1204 1205 void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) { 1206 int low = fromIndex; 1207 int high = toIndex; 1208 String middle = ""; 1209 if (high > low) { 1210 middle = src[ (low + high) / 2 ]; 1211 while(low <= high) { 1212 while((low < toIndex) && (compare(src[low], middle, c) < 0)) { 1213 ++low; 1214 } 1215 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) { 1216 --high; 1217 } 1218 if(low <= high) { 1219 String swap = src[low]; 1220 src[low] = src[high]; 1221 src[high] = swap; 1222 ++low; 1223 --high; 1224 } 1225 } 1226 if(fromIndex < high) { 1227 qSortImpl_nokeys(src, fromIndex, high, c); 1228 } 1229 1230 if(low < toIndex) { 1231 qSortImpl_nokeys(src, low, toIndex, c); 1232 } 1233 } 1234 } 1235 1236 int compare(String source, String target, Comparator c) { 1237 globalCount++; 1238 return c.compare(source, target); 1239 } 1240 1241 int compare(java.text.CollationKey source, java.text.CollationKey target) { 1242 globalCount++; 1243 return source.compareTo(target); 1244 } 1245 1246 int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) { 1247 globalCount++; 1248 return source.compareTo(target); 1249 } 1250 1251 //Class for command line option 1252 static class OptionSpec { 1253 String name; 1254 int type; 1255 StringBuffer value; 1256 public OptionSpec(String name, int type, StringBuffer value) { 1257 this.name = name; 1258 this.type = type; 1259 this.value = value; 1260 } 1261 } 1262 }