1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2002-2007, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.perf; 11 12 import com.ibm.icu.text.*; 13 import java.util.*; 14 import java.io.*; 15 import com.ibm.icu.impl.LocaleUtility; 16 17 public class CollationPerformanceTest { 18 static final String usageString = 19 "usage: collperf options...\n" 20 + "-help Display this message.\n" 21 + "-file file_name utf-16 format file of names.\n" 22 + "-locale name ICU locale to use. Default is en_US\n" 23 + "-rules file_name Collation rules file (overrides locale)\n" 24 //+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" 25 //+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" 26 //+ "-win Run test using Windows native services. (ICU is default)\n" 27 //+ "-unix Run test using Unix strxfrm, strcoll services.\n" 28 //+ "-uselen Use API with string lengths. Default is null-terminated strings\n" 29 + "-usekeys Run tests using sortkeys rather than strcoll\n" 30 + "-strcmp Run tests using u_strcmp rather than strcoll\n" 31 + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n" 32 + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" 33 + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" 34 + " under test at each call point. For measuring test overhead.\n" 35 + "-terse Terse numbers-only output. Intended for use by scripts.\n" 36 + "-french French accent ordering\n" 37 + "-frenchoff No French accent ordering (for use with French locales.)\n" 38 + "-norm Normalizing mode on\n" 39 + "-shifted Shifted mode\n" 40 + "-lower Lower case first\n" 41 + "-upper Upper case first\n" 42 + "-case Enable separate case level\n" 43 + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 44 + "-keyhist Produce a table sort key size vs. string length\n" 45 + "-binsearch Binary Search timing test\n" 46 + "-keygen Sort Key Generation timing test\n" 47 + "-qsort Quicksort timing test\n" 48 + "-iter Iteration Performance Test\n" 49 + "-dump Display strings, sort keys and CEs.\n" 50 + "-java Run test using java.text.Collator.\n"; 51 52 //enum {FLAG, NUM, STRING} type; 53 static StringBuffer temp_opt_fName = new StringBuffer(""); 54 static StringBuffer temp_opt_locale = new StringBuffer("en_US"); 55 //static StringBuffer temp_opt_langid = new StringBuffer("0"); // Defaults to value corresponding to opt_locale. 56 static StringBuffer temp_opt_rules = new StringBuffer(""); 57 static StringBuffer temp_opt_help = new StringBuffer(""); 58 static StringBuffer temp_opt_loopCount = new StringBuffer("1"); 59 static StringBuffer temp_opt_iLoopCount = new StringBuffer("1"); 60 static StringBuffer temp_opt_terse = new StringBuffer("false"); 61 static StringBuffer temp_opt_qsort = new StringBuffer(""); 62 static StringBuffer temp_opt_binsearch = new StringBuffer(""); 63 static StringBuffer temp_opt_icu = new StringBuffer("true"); 64 //static StringBuffer opt_win = new StringBuffer(""); // Run with Windows native functions. 65 //static StringBuffer opt_unix = new StringBuffer(""); // Run with UNIX strcoll, strxfrm functions. 66 //static StringBuffer opt_uselen = new StringBuffer(""); 67 static StringBuffer temp_opt_usekeys = new StringBuffer(""); 68 static StringBuffer temp_opt_strcmp = new StringBuffer(""); 69 static StringBuffer temp_opt_strcmpCPO = new StringBuffer(""); 70 static StringBuffer temp_opt_norm = new StringBuffer(""); 71 static StringBuffer temp_opt_keygen = new StringBuffer(""); 72 static StringBuffer temp_opt_french = new StringBuffer(""); 73 static StringBuffer temp_opt_frenchoff = new StringBuffer(""); 74 static StringBuffer temp_opt_shifted = new StringBuffer(""); 75 static StringBuffer temp_opt_lower = new StringBuffer(""); 76 static StringBuffer temp_opt_upper = new StringBuffer(""); 77 static StringBuffer temp_opt_case = new StringBuffer(""); 78 static StringBuffer temp_opt_level = new StringBuffer("0"); 79 static StringBuffer temp_opt_keyhist = new StringBuffer(""); 80 static StringBuffer temp_opt_itertest = new StringBuffer(""); 81 static StringBuffer temp_opt_dump = new StringBuffer(""); 82 static StringBuffer temp_opt_java = new StringBuffer(""); 83 84 85 static String opt_fName = ""; 86 static String opt_locale = "en_US"; 87 //static int opt_langid = 0; // Defaults to value corresponding to opt_locale. 88 static String opt_rules = ""; 89 static boolean opt_help = false; 90 static int opt_loopCount = 1; 91 static int opt_iLoopCount = 1; 92 static boolean opt_terse = false; 93 static boolean opt_qsort = false; 94 static boolean opt_binsearch = false; 95 static boolean opt_icu = true; 96 //static boolean opt_win = false; // Run with Windows native functions. 97 //static boolean opt_unix = false; // Run with UNIX strcoll, strxfrm functions. 98 //static boolean opt_uselen = false; 99 static boolean opt_usekeys = false; 100 static boolean opt_strcmp = false; 101 static boolean opt_strcmpCPO = false; 102 static boolean opt_norm = false; 103 static boolean opt_keygen = false; 104 static boolean opt_french = false; 105 static boolean opt_frenchoff = false; 106 static boolean opt_shifted = false; 107 static boolean opt_lower = false; 108 static boolean opt_upper = false; 109 static boolean opt_case = false; 110 static int opt_level = 0; 111 static boolean opt_keyhist = false; 112 static boolean opt_itertest = false; 113 static boolean opt_dump = false; 114 static boolean opt_java = false; 115 116 static OptionSpec[] options = { 117 new OptionSpec("-file", 2, temp_opt_fName), 118 new OptionSpec("-locale", 2, temp_opt_locale), 119 //new OptionSpec("-langid", 1, temp_opt_langid), 120 new OptionSpec("-rules", 2, temp_opt_rules), 121 new OptionSpec("-qsort", 0, temp_opt_qsort), 122 new OptionSpec("-binsearch", 0, temp_opt_binsearch), 123 new OptionSpec("-iter", 0, temp_opt_itertest), 124 //new OptionSpec("-win", 0, temp_opt_win), 125 //new OptionSpec("-unix", 0, temp_opt_unix), 126 //new OptionSpec("-uselen", 0, temp_opt_uselen), 127 new OptionSpec("-usekeys", 0, temp_opt_usekeys), 128 new OptionSpec("-strcmp", 0, temp_opt_strcmp), 129 new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO), 130 new OptionSpec("-norm", 0, temp_opt_norm), 131 new OptionSpec("-french", 0, temp_opt_french), 132 new OptionSpec("-frenchoff", 0, temp_opt_frenchoff), 133 new OptionSpec("-shifted", 0, temp_opt_shifted), 134 new OptionSpec("-lower", 0, temp_opt_lower), 135 new OptionSpec("-upper", 0, temp_opt_upper), 136 new OptionSpec("-case", 0, temp_opt_case), 137 new OptionSpec("-level", 1, temp_opt_level), 138 new OptionSpec("-keyhist", 0, temp_opt_keyhist), 139 new OptionSpec("-keygen", 0, temp_opt_keygen), 140 new OptionSpec("-loop", 1, temp_opt_loopCount), 141 new OptionSpec("-iloop", 1, temp_opt_iLoopCount), 142 new OptionSpec("-terse", 0, temp_opt_terse), 143 new OptionSpec("-dump", 0, temp_opt_dump), 144 new OptionSpec("-help", 0, temp_opt_help), 145 new OptionSpec("-?", 0, temp_opt_help), 146 new OptionSpec("-java", 0, temp_opt_java), 147 }; 148 149 static java.text.Collator javaCol = null; 150 static com.ibm.icu.text.Collator icuCol = null; 151 static NumberFormat nf = null; 152 static NumberFormat percent = null; 153 ArrayList list = null; 154 String[] tests = null; 155 int globalCount = 0; 156 157 public static void main(String[] args) { 158 CollationPerformanceTest collPerf = new CollationPerformanceTest(); 159 if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) { 160 System.out.println(usageString); 161 System.exit(1); 162 } 163 164 nf = NumberFormat.getInstance(); 165 nf.setMaximumFractionDigits(2); 166 percent = NumberFormat.getPercentInstance(); 167 168 collPerf.setOptions(); 169 collPerf.readDataLines(); 170 171 if (opt_dump) { 172 collPerf.doDump(); 173 } 174 175 if (opt_qsort) { 176 collPerf.doQSort(); 177 } 178 179 if (opt_binsearch) { 180 collPerf.doBinarySearch(); 181 } 182 183 if (opt_keygen) { 184 collPerf.doKeyGen(); 185 } 186 187 if (opt_keyhist) { 188 collPerf.doKeyHist(); 189 } 190 191 if (opt_itertest) { 192 collPerf.doIterTest(); 193 } 194 195 } 196 197 //Dump file lines, CEs, Sort Keys if requested 198 void doDump() { 199 for(int i = 0; i < list.size(); i++) { 200 //print the line 201 String line = com.ibm.icu.impl.Utility.escape((String)list.get(i)); 202 System.out.println(line); 203 204 System.out.print(" CEs: "); 205 CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line); 206 int ce; 207 int j = 0; 208 for(;;) { 209 ce = CEiter.next(); 210 if (ce == CollationElementIterator.NULLORDER) { 211 break; 212 } 213 //System.out.print(); 214 String outStr = Integer.toHexString(ce); 215 for (int len = 0; len < 8 - outStr.length(); len++) { 216 outStr ='0' + outStr; 217 } 218 System.out.print(outStr + " "); 219 if(++j >8) { 220 System.out.print("\n "); 221 j = 0; 222 } 223 } 224 225 System.out.print("\n ICU Sort Key: "); 226 CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line); 227 byte[] cks = ck.toByteArray(); 228 j = 0; 229 for(int k = 0; k < cks.length; k++) { 230 String outStr = Integer.toHexString(cks[k]); 231 switch (outStr.length()) { 232 case 1: outStr = '0' + outStr; 233 break; 234 case 8: outStr = outStr.substring(6); 235 break; 236 } 237 System.out.print(outStr); 238 System.out.print(" "); 239 j++; 240 if(j > 0 && j % 20 == 0) { 241 System.out.print("\n "); 242 } 243 } 244 System.out.println("\n"); 245 } 246 } 247 248 /**--------------------------------------------------------------------------------------- 249 * 250 * doQSort() The quick sort timing test. 251 * 252 *--------------------------------------------------------------------------------------- 253 */ 254 void doQSort() { 255 callGC(); 256 //String[] sortTests = (String[]) tests.clone(); 257 //Adjust loop count to compensate for file size. QSort should be nlog(n) 258 double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length)); 259 260 if(opt_usekeys) { 261 dLoopCount *= 5; 262 } 263 264 int adj_loopCount = (int)dLoopCount; 265 if(adj_loopCount < 1) { 266 adj_loopCount = 1; 267 } 268 269 globalCount = 0; 270 long startTime = 0; 271 long endTime = 0; 272 if (opt_icu && opt_usekeys) { 273 startTime = System.currentTimeMillis(); 274 qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol); 275 endTime = System.currentTimeMillis(); 276 } 277 if (opt_icu && !opt_usekeys){ 278 startTime = System.currentTimeMillis(); 279 qSortImpl_nokeys(tests, 0, tests.length -1, icuCol); 280 endTime = System.currentTimeMillis(); 281 } 282 if (opt_java && opt_usekeys) { 283 startTime = System.currentTimeMillis(); 284 qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol); 285 endTime = System.currentTimeMillis(); 286 } 287 if (opt_java && !opt_usekeys){ 288 startTime = System.currentTimeMillis(); 289 qSortImpl_nokeys(tests, 0, tests.length -1, javaCol); 290 endTime = System.currentTimeMillis(); 291 } 292 long elapsedTime = endTime - startTime; 293 int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0)); 294 if (!opt_terse) { 295 System.out.println("qsort: total # of string compares = " + globalCount); 296 System.out.println("qsort: time per compare = " + ns); 297 } else { 298 System.out.println(ns); 299 } 300 } 301 302 /**--------------------------------------------------------------------------------------- 303 * 304 * doBinarySearch() Binary Search timing test. Each name from the list 305 * is looked up in the full sorted list of names. 306 * 307 *--------------------------------------------------------------------------------------- 308 */ 309 void doBinarySearch() { 310 callGC(); 311 int gCount = 0; 312 int loops = 0; 313 double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length); 314 long startTime = 0; 315 long elapsedTime = 0; 316 317 if(opt_usekeys) { 318 dLoopCount *= 5; 319 } 320 int adj_loopCount = (int)dLoopCount; 321 if(adj_loopCount < 1) { 322 adj_loopCount = 1; 323 } 324 325 //int opt2 = 0; 326 327 for(;;) { //not really a loop, just allows "break" to work, to simplify 328 //inadvertantly running more than one test through here 329 if(opt_strcmp) { 330 int r = 0; 331 startTime = System.currentTimeMillis(); 332 for(loops = 0; loops < adj_loopCount; loops++) { 333 for (int j = 0; j < tests.length; j++) { 334 int hi = tests.length-1; 335 int lo = 0; 336 int guess = -1; 337 for(;;) { 338 int newGuess = (hi + lo) / 2; 339 if(newGuess == guess){ 340 break; 341 } 342 guess = newGuess; 343 r = tests[j].compareTo(tests[guess]); 344 gCount++; 345 if(r == 0) { 346 break; 347 } 348 if (r < 0) { 349 hi = guess; 350 } else { 351 lo = guess; 352 } 353 } 354 } 355 } 356 elapsedTime = System.currentTimeMillis() - startTime; 357 break; 358 } 359 360 if (opt_strcmpCPO) { 361 int r = 0; 362 startTime = System.currentTimeMillis(); 363 for(loops = 0; loops < adj_loopCount; loops++) { 364 for (int j = 0; j < tests.length; j++) { 365 int hi = tests.length-1; 366 int lo = 0; 367 int guess = -1; 368 for(;;) { 369 int newGuess = (hi + lo) / 2; 370 if(newGuess == guess){ 371 break; 372 } 373 guess = newGuess; 374 r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER); 375 gCount++; 376 if(r == 0) { 377 break; 378 } 379 if (r < 0) { 380 hi = guess; 381 } else { 382 lo = guess; 383 } 384 } 385 } 386 } 387 elapsedTime = System.currentTimeMillis() - startTime; 388 break; 389 } 390 391 if (opt_icu) { 392 393 int r = 0; 394 startTime = System.currentTimeMillis(); 395 for (loops = 0; loops < adj_loopCount; loops++) { 396 for (int j = 0; j < tests.length; j++) { 397 int hi = tests.length - 1; 398 int lo = 0; 399 int guess = -1; 400 for (;;) { 401 int newGuess = (hi + lo) / 2; 402 if (newGuess == guess) { 403 break; 404 } 405 guess = newGuess; 406 if (opt_usekeys) { 407 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]); 408 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]); 409 r = sortKey1.compareTo(sortKey2); 410 gCount ++; 411 } else { 412 r = icuCol.compare(tests[j], tests[guess]); 413 gCount++; 414 } 415 if (r == 0) { 416 break; 417 } 418 if (r < 0) { 419 hi = guess; 420 } else { 421 lo = guess; 422 } 423 } 424 } 425 } 426 elapsedTime = System.currentTimeMillis() - startTime; 427 break; 428 } 429 if (opt_java) { 430 431 int r = 0; 432 startTime = System.currentTimeMillis(); 433 for (loops = 0; loops < adj_loopCount; loops++) { 434 for (int j = 0; j < tests.length; j++) { 435 int hi = tests.length - 1; 436 int lo = 0; 437 int guess = -1; 438 for (;;) { 439 int newGuess = (hi + lo) / 2; 440 if (newGuess == guess) { 441 break; 442 } 443 guess = newGuess; 444 if (opt_usekeys) { 445 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]); 446 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]); 447 r = sortKey1.compareTo(sortKey2); 448 gCount ++; 449 } else { 450 r = javaCol.compare(tests[j], tests[guess]); 451 gCount++; 452 } 453 if (r == 0) { 454 break; 455 } 456 if (r < 0) { 457 hi = guess; 458 } else { 459 lo = guess; 460 } 461 } 462 } 463 } 464 elapsedTime = System.currentTimeMillis() - startTime; 465 break; 466 } 467 break; 468 } 469 int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount); 470 if (!opt_terse) { 471 System.out.println("binary search: total # of string compares = " + gCount); 472 System.out.println("binary search: compares per loop = " + gCount / loops); 473 System.out.println("binary search: time per compare = " + ns); 474 } else { 475 System.out.println(ns); 476 } 477 } 478 479 /**--------------------------------------------------------------------------------------- 480 * 481 * doKeyGen() Key Generation Timing Test 482 * 483 *--------------------------------------------------------------------------------------- 484 */ 485 void doKeyGen() { 486 callGC(); 487 488 // Adjust loop count to compensate for file size. Should be order n 489 double dLoopCount = opt_loopCount * (1000.0 / (double)list.size()); 490 int adj_loopCount = (int)dLoopCount; 491 if (adj_loopCount < 1) adj_loopCount = 1; 492 493 long startTime = 0; 494 long totalKeyLen = 0; 495 long totalChars = 0; 496 if (opt_java) { 497 startTime = System.currentTimeMillis(); 498 for (int loops=0; loops<adj_loopCount; loops++) { 499 for (int line=0; line < tests.length; line++) { 500 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 501 totalChars += tests[line].length(); 502 byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray(); 503 totalKeyLen += sortKey.length; 504 } 505 } 506 } 507 } else { 508 startTime = System.currentTimeMillis(); 509 for (int loops=0; loops<adj_loopCount; loops++) { 510 for (int line=0; line < tests.length; line++) { 511 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 512 totalChars += tests[line].length(); 513 byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray(); 514 totalKeyLen += sortKey.length; 515 } 516 } 517 } 518 } 519 520 long elapsedTime = System.currentTimeMillis() - startTime; 521 long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0)); 522 if (!opt_terse) { 523 System.out.println("Sort Key Generation: total # of keys =" + adj_loopCount * tests.length); 524 System.out.println("Sort Key Generation: time per key = " + ns + " ns"); 525 System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0))); 526 } 527 else { 528 System.out.print(ns + ", "); 529 System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", "); 530 } 531 } 532 533 /**--------------------------------------------------------------------------------------- 534 * 535 * doKeyHist() Output a table of data for average sort key size vs. string length. 536 * 537 *--------------------------------------------------------------------------------------- 538 */ 539 void doKeyHist() { 540 callGC(); 541 int maxLen = 0; 542 543 // Find the maximum string length 544 for (int i = 0; i < tests.length; i++) { 545 if (tests[i].length() > maxLen) maxLen = tests[i].length(); 546 } 547 548 int[] accumulatedLen = new int[maxLen + 1]; 549 int[] numKeysOfSize = new int[maxLen + 1]; 550 551 // Fill the arrays... 552 for (int i = 0; i < tests.length; i++) { 553 int len = tests[i].length(); 554 accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length; 555 numKeysOfSize[len] += 1; 556 } 557 558 // And write out averages 559 System.out.println("String Length, Avg Key Length, Avg Key Len per char"); 560 for (int i = 1; i <= maxLen; i++) { 561 if (numKeysOfSize[i] > 0) { 562 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", " 563 + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0))); 564 } 565 } 566 567 } 568 569 void doForwardIterTest() { 570 callGC(); 571 System.out.print("\n\nPerforming forward iteration performance test with "); 572 System.out.println("performance test on strings from file -----------"); 573 574 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(""); 575 576 int gCount = 0; 577 int count = 0; 578 long startTime = System.currentTimeMillis(); 579 while (count < opt_loopCount) { 580 int linecount = 0; 581 while (linecount < tests.length) { 582 String str = tests[linecount]; 583 iter.setText(str); 584 while (iter.next() != CollationElementIterator.NULLORDER) { 585 gCount++; 586 } 587 linecount ++; 588 } 589 count ++; 590 } 591 592 long elapsedTime = System.currentTimeMillis() - startTime; 593 System.out.println("elapsedTime " + elapsedTime + " ms"); 594 595 // empty loop recalculation 596 count = 0; 597 startTime = System.currentTimeMillis(); 598 while (count < opt_loopCount) { 599 int linecount = 0; 600 while (linecount < tests.length) { 601 String str = tests[linecount]; 602 iter.setText(str); 603 linecount ++; 604 } 605 count ++; 606 } 607 elapsedTime -= (System.currentTimeMillis() - startTime); 608 System.out.println("elapsedTime " + elapsedTime + " ms"); 609 610 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 611 System.out.println("Total number of strings compared " + tests.length 612 + "in " + opt_loopCount + " loops"); 613 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns); 614 System.out.println("performance test on skipped-5 concatenated strings from file -----------"); 615 616 String totalStr = ""; 617 int strlen = 0; 618 // appending all the strings 619 int linecount = 0; 620 while (linecount < tests.length) { 621 totalStr += tests[linecount]; 622 strlen += tests[linecount].length(); 623 linecount ++; 624 } 625 System.out.println("Total size of strings " + strlen); 626 627 gCount = 0; 628 count = 0; 629 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr); 630 strlen -= 5; // any left over characters are not iterated, 631 // this is to ensure the backwards and forwards iterators 632 // gets the same position 633 int strindex = 0; 634 startTime = System.currentTimeMillis(); 635 while (count < opt_loopCount) { 636 int count5 = 5; 637 strindex = 0; 638 iter.setOffset(strindex); 639 while (true) { 640 if (iter.next() == CollationElementIterator.NULLORDER) { 641 break; 642 } 643 gCount++; 644 count5 --; 645 if (count5 == 0) { 646 strindex += 10; 647 if (strindex > strlen) { 648 break; 649 } 650 iter.setOffset(strindex); 651 count5 = 5; 652 } 653 } 654 count ++; 655 } 656 657 elapsedTime = System.currentTimeMillis() - startTime; 658 System.out.println("elapsedTime " + elapsedTime); 659 660 // empty loop recalculation 661 int tempgCount = 0; 662 count = 0; 663 startTime = System.currentTimeMillis(); 664 while (count < opt_loopCount) { 665 int count5 = 5; 666 strindex = 0; 667 iter.setOffset(strindex); 668 while (true) { 669 tempgCount ++; 670 count5 --; 671 if (count5 == 0) { 672 strindex += 10; 673 if (strindex > strlen) { 674 break; 675 } 676 iter.setOffset(strindex); 677 count5 = 5; 678 } 679 } 680 count ++; 681 } 682 elapsedTime -= (System.currentTimeMillis() - startTime); 683 System.out.println("elapsedTime " + elapsedTime); 684 685 System.out.println("gCount " + gCount); 686 ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 687 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns); 688 } 689 690 void doBackwardIterTest() { 691 System.out.print("\n\nPerforming backward iteration performance test with "); 692 System.out.println("performance test on strings from file -----------\n"); 693 694 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(""); 695 696 int gCount = 0; 697 int count = 0; 698 long startTime = System.currentTimeMillis(); 699 while (count < opt_loopCount) { 700 int linecount = 0; 701 while (linecount < tests.length) { 702 String str = tests[linecount]; 703 iter.setText(str); 704 while (iter.previous() != CollationElementIterator.NULLORDER) { 705 gCount++; 706 } 707 linecount ++; 708 } 709 count ++; 710 } 711 long elapsedTime = System.currentTimeMillis() - startTime; 712 System.out.println("elapsedTime " + elapsedTime + " ms"); 713 714 // empty loop recalculation 715 count = 0; 716 startTime = System.currentTimeMillis(); 717 while (count < opt_loopCount) { 718 int linecount = 0; 719 while (linecount < tests.length) { 720 String str = tests[linecount]; 721 iter.setText(str); 722 linecount ++; 723 } 724 count ++; 725 } 726 elapsedTime -= (System.currentTimeMillis() - startTime); 727 System.out.println("elapsedTime " + elapsedTime + " ms"); 728 729 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 730 System.out.println("Total number of strings compared " + tests.length 731 + "in " + opt_loopCount + " loops"); 732 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns); 733 System.out.println("performance test on skipped-5 concatenated strings from file -----------"); 734 735 String totalStr = ""; 736 int strlen = 0; 737 // appending all the strings 738 int linecount = 0; 739 while (linecount < tests.length) { 740 totalStr += tests[linecount]; 741 strlen += tests[linecount].length(); 742 linecount ++; 743 } 744 System.out.println("Total size of strings " + strlen); 745 746 gCount = 0; 747 count = 0; 748 749 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr); 750 int strindex = 0; 751 startTime = System.currentTimeMillis(); 752 while (count < opt_loopCount) { 753 int count5 = 5; 754 strindex = 5; 755 iter.setOffset(strindex); 756 while (true) { 757 if (iter.previous() == CollationElementIterator.NULLORDER) { 758 break; 759 } 760 gCount ++; 761 count5 --; 762 if (count5 == 0) { 763 strindex += 10; 764 if (strindex > strlen) { 765 break; 766 } 767 iter.setOffset(strindex); 768 count5 = 5; 769 } 770 } 771 count ++; 772 } 773 774 elapsedTime = System.currentTimeMillis() - startTime; 775 System.out.println("elapsedTime " + elapsedTime); 776 777 // empty loop recalculation 778 count = 0; 779 int tempgCount = 0; 780 startTime = System.currentTimeMillis(); 781 while (count < opt_loopCount) { 782 int count5 = 5; 783 strindex = 5; 784 iter.setOffset(strindex); 785 while (true) { 786 tempgCount ++; 787 count5 --; 788 if (count5 == 0) { 789 strindex += 10; 790 if (strindex > strlen) { 791 break; 792 } 793 iter.setOffset(strindex); 794 count5 = 5; 795 } 796 } 797 count ++; 798 } 799 elapsedTime -= (System.currentTimeMillis() - startTime); 800 System.out.println("elapsedTime " + elapsedTime); 801 802 System.out.println("gCount " + gCount); 803 ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 804 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns); 805 } 806 807 808 /**--------------------------------------------------------------------------------------- 809 * 810 * doIterTest() Iteration test 811 * 812 *--------------------------------------------------------------------------------------- 813 */ 814 void doIterTest() { 815 doForwardIterTest(); 816 doBackwardIterTest(); 817 } 818 819 void setOptions() { 820 821 if (opt_java) { 822 opt_icu = false; 823 } 824 825 if (opt_rules.length() != 0) { 826 try { 827 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules)); 828 } catch (Exception e) { 829 System.out.println("Cannot open rules:" + e.getMessage()); 830 System.exit(1); 831 } 832 } else { 833 icuCol = com.ibm.icu.text.Collator.getInstance( 834 LocaleUtility.getLocaleFromName(opt_locale)); 835 } 836 837 javaCol = java.text.Collator.getInstance( 838 LocaleUtility.getLocaleFromName(opt_locale)); 839 840 if (opt_norm) { 841 javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION); 842 icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION); 843 } 844 845 if (opt_french && opt_frenchoff) { 846 System.err.println("Error: specified both -french and -frenchoff options."); 847 } 848 849 if (opt_french) { 850 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true); 851 } 852 if (opt_frenchoff) { 853 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false); 854 } 855 856 if (opt_lower) { 857 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true); 858 } 859 860 if (opt_upper) { 861 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true); 862 } 863 864 if (opt_shifted) { 865 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true); 866 } 867 868 if (opt_level != 0) { 869 switch (opt_level) { 870 case 1 : 871 javaCol.setStrength(java.text.Collator.PRIMARY); 872 icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY); 873 break; 874 case 2 : 875 javaCol.setStrength(java.text.Collator.SECONDARY); 876 icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY); 877 break; 878 case 3 : 879 javaCol.setStrength(java.text.Collator.TERTIARY); 880 icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY); 881 break; 882 case 4 : 883 icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY); 884 break; 885 case 5 : 886 javaCol.setStrength(java.text.Collator.IDENTICAL); 887 icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL); 888 break; 889 default: 890 System.err.println("-level param must be between 1 and 5\n"); 891 System.exit(1); 892 } 893 } 894 // load classes at least once before starting 895 javaCol.compare("a", "b"); 896 icuCol.compare("a", "b"); 897 } 898 899 static boolean processOptions(String[] args) { 900 int argNum; 901 for (argNum =0; argNum < args.length; argNum++) { 902 for (int i = 0; i < options.length; i++) { 903 if (args[argNum].equalsIgnoreCase(options[i].name)) { 904 switch (options[i].type) { 905 case 0: 906 options[i].value.delete(0, options[i].value.capacity()).append("true"); 907 break; 908 case 1: 909 argNum++; 910 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) { 911 System.err.println("value expected for"+ options[i].name +"option.\n"); 912 return false; 913 } 914 try { 915 /* int value =*/ Integer.parseInt(args[argNum]); 916 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]); 917 } catch (NumberFormatException e) { 918 System.err.println("Expected: a number value"); 919 return false; 920 } 921 break; 922 case 2: 923 argNum++; 924 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) { 925 System.err.println("value expected for"+ options[i].name +"option.\n"); 926 return false; 927 } 928 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]); 929 break; 930 default: 931 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}"); 932 return false; 933 } 934 } 935 } 936 } 937 938 opt_fName = temp_opt_fName.toString(); 939 opt_locale = temp_opt_locale.toString(); 940 opt_rules = temp_opt_rules.toString(); 941 if (temp_opt_help.toString().equalsIgnoreCase("true")) { 942 opt_help = true; 943 } 944 opt_loopCount = Integer.parseInt(temp_opt_loopCount.toString()); 945 opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString()); 946 if (temp_opt_terse.toString().equalsIgnoreCase("true")) { 947 opt_terse = true; 948 } 949 if (temp_opt_qsort.toString().equalsIgnoreCase("true")) { 950 opt_qsort = true; 951 } 952 if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) { 953 opt_binsearch = true; 954 } 955 if (temp_opt_icu.toString().equalsIgnoreCase("true")) { 956 opt_icu = true; 957 } 958 if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) { 959 opt_usekeys = true; 960 } 961 if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) { 962 opt_strcmp = true; 963 } 964 if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) { 965 opt_strcmpCPO = true; 966 } 967 if (temp_opt_keygen.toString().equalsIgnoreCase("true")) { 968 opt_keygen = true; 969 } 970 if (temp_opt_norm.toString().equalsIgnoreCase("true")) { 971 opt_norm = true; 972 } 973 if (temp_opt_french.toString().equalsIgnoreCase("true")) { 974 opt_french = true; 975 } 976 if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) { 977 opt_frenchoff = true; 978 } 979 if (temp_opt_shifted.toString().equalsIgnoreCase("true")) { 980 opt_shifted = true; 981 } 982 if (temp_opt_lower.toString().equalsIgnoreCase("true")) { 983 opt_lower = true; 984 } 985 if (temp_opt_upper.toString().equalsIgnoreCase("true")) { 986 opt_upper = true; 987 } 988 if (temp_opt_case.toString().equalsIgnoreCase("true")) { 989 opt_case = true; 990 } 991 opt_level = Integer.parseInt(temp_opt_level.toString()); 992 if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) { 993 opt_keyhist = true; 994 } 995 if (temp_opt_itertest.toString().equalsIgnoreCase("true")) { 996 opt_itertest = true; 997 } 998 if (temp_opt_dump.toString().equalsIgnoreCase("true")) { 999 opt_dump = true; 1000 } 1001 if (temp_opt_java.toString().equalsIgnoreCase("true")) { 1002 opt_java = true; 1003 } 1004 1005 return true; 1006 } 1007 1008 /** 1009 * Invoke the runtime's garbage collection procedure repeatedly 1010 * until the amount of free memory stabilizes to within 10%. 1011 */ 1012 private void callGC() { 1013 // From "Java Platform Performance". This is the procedure 1014 // recommended by Javasoft. 1015 try { 1016 System.gc(); 1017 Thread.sleep(100); 1018 System.runFinalization(); 1019 Thread.sleep(100); 1020 1021 System.gc(); 1022 Thread.sleep(100); 1023 System.runFinalization(); 1024 Thread.sleep(100); 1025 } catch (InterruptedException e) {} 1026 } 1027 1028 //private boolean needCRLF = false; 1029 1030 public int DOTMASK = 0x7FF; 1031 1032 void dot(int i) { 1033 if ((i % DOTMASK) == 0) { 1034 //needCRLF = true; 1035 // I do not know why print the dot here 1036 //System.out.print('.'); 1037 } 1038 } 1039 1040 String readDataLine(BufferedReader br) throws Exception { 1041 String originalLine = ""; 1042 String line = ""; 1043 1044 try { 1045 line = originalLine = br.readLine(); 1046 if (line == null) return null; 1047 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1); 1048 int commentPos = line.indexOf('#'); 1049 if (commentPos >= 0) line = line.substring(0, commentPos); 1050 line = line.trim(); 1051 } catch (Exception e) { 1052 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " " 1053 + line + " " + e.toString()); 1054 } 1055 return line; 1056 } 1057 1058 void readDataLines() { 1059 // Read in the input file. 1060 // File assumed to be utf-16. 1061 // Lines go onto heap buffers. Global index array to line starts is created. 1062 // Lines themselves are null terminated. 1063 // 1064 FileInputStream fis = null; 1065 InputStreamReader isr = null; 1066 BufferedReader br = null; 1067 try { 1068 fis = new FileInputStream(opt_fName); 1069 isr = new InputStreamReader(fis, "UTF-8"); 1070 br= new BufferedReader(isr, 32*1024); 1071 } catch (Exception e) { 1072 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 1073 System.exit(2); 1074 } 1075 1076 int counter = 0; 1077 1078 list = new ArrayList(); 1079 while (true) { 1080 String line = null; 1081 try { 1082 line = readDataLine(br); 1083 } catch (Exception e) { 1084 System.err.println("Read File Error" + e.getMessage() + "!"); 1085 System.exit(1); 1086 } 1087 1088 if (line == null) break; 1089 if (line.length() == 0) continue; 1090 dot(counter++); 1091 list.add(line); 1092 } 1093 if (!opt_terse) { 1094 System.out.println("Read " + counter + " lines in file"); 1095 } 1096 1097 int size = list.size(); 1098 tests = new String [size]; 1099 1100 for (int i = 0; i < size; ++i) { 1101 tests[i] = (String) list.get(i); 1102 } 1103 } 1104 1105 /** 1106 * Get the Collator Rules 1107 * The Rule File format: 1108 * 1. leading and trailing whitespaces will be omitted 1109 * 2. lines with the leading character '#' will be treated as comments 1110 * 3. File encoding is ISO-8859-1 1111 */ 1112 String getCollationRules(String ruleFileName) { 1113 FileInputStream fis = null; 1114 InputStreamReader isr = null; 1115 BufferedReader br = null; 1116 try { 1117 fis = new FileInputStream(opt_rules); 1118 isr = new InputStreamReader(fis,"ISO-8859-1"); 1119 br= new BufferedReader(isr); 1120 } catch (Exception e) { 1121 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 1122 System.exit(2); 1123 } 1124 String rules = ""; 1125 String line = ""; 1126 while (true) { 1127 try { 1128 line = br.readLine(); 1129 } catch (IOException e) { 1130 System.err.println("Read File Error" + e.getMessage() + "!"); 1131 System.exit(1); 1132 } 1133 if (line == null) { 1134 break; 1135 } 1136 int commentPos = line.indexOf('#'); 1137 if (commentPos >= 0) line = line.substring(0, commentPos); 1138 line = line.trim(); 1139 rules = rules + line; 1140 } 1141 return rules; 1142 } 1143 1144 //Implementing qsort 1145 void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) { 1146 int low = fromIndex; 1147 int high = toIndex; 1148 String middle = ""; 1149 if (high > low) { 1150 middle = src[ (low + high) / 2 ]; 1151 while(low <= high) { 1152 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) { 1153 ++low; 1154 } 1155 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) { 1156 --high; 1157 } 1158 if(low <= high) { 1159 String swap = src[low]; 1160 src[low] = src[high]; 1161 src[high] = swap; 1162 ++low; 1163 --high; 1164 } 1165 } 1166 if(fromIndex < high) { 1167 qSortImpl_java_usekeys(src, fromIndex, high, c); 1168 } 1169 1170 if(low < toIndex) { 1171 qSortImpl_java_usekeys(src, low, toIndex, c); 1172 } 1173 } 1174 } 1175 1176 void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) { 1177 int low = fromIndex; 1178 int high = toIndex; 1179 String middle = ""; 1180 if (high > low) { 1181 middle = src[ (low + high) / 2 ]; 1182 while(low <= high) { 1183 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) { 1184 ++low; 1185 } 1186 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) { 1187 --high; 1188 } 1189 if(low <= high) { 1190 String swap = src[low]; 1191 src[low] = src[high]; 1192 src[high] = swap; 1193 ++low; 1194 --high; 1195 } 1196 } 1197 if(fromIndex < high) { 1198 qSortImpl_icu_usekeys(src, fromIndex, high, c); 1199 } 1200 1201 if(low < toIndex) { 1202 qSortImpl_icu_usekeys(src, low, toIndex, c); 1203 } 1204 } 1205 } 1206 1207 void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) { 1208 int low = fromIndex; 1209 int high = toIndex; 1210 String middle = ""; 1211 if (high > low) { 1212 middle = src[ (low + high) / 2 ]; 1213 while(low <= high) { 1214 while((low < toIndex) && (compare(src[low], middle, c) < 0)) { 1215 ++low; 1216 } 1217 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) { 1218 --high; 1219 } 1220 if(low <= high) { 1221 String swap = src[low]; 1222 src[low] = src[high]; 1223 src[high] = swap; 1224 ++low; 1225 --high; 1226 } 1227 } 1228 if(fromIndex < high) { 1229 qSortImpl_nokeys(src, fromIndex, high, c); 1230 } 1231 1232 if(low < toIndex) { 1233 qSortImpl_nokeys(src, low, toIndex, c); 1234 } 1235 } 1236 } 1237 1238 int compare(String source, String target, Comparator c) { 1239 globalCount++; 1240 return c.compare(source, target); 1241 } 1242 1243 int compare(java.text.CollationKey source, java.text.CollationKey target) { 1244 globalCount++; 1245 return source.compareTo(target); 1246 } 1247 1248 int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) { 1249 globalCount++; 1250 return source.compareTo(target); 1251 } 1252 1253 //Class for command line option 1254 static class OptionSpec { 1255 String name; 1256 int type; 1257 StringBuffer value; 1258 public OptionSpec(String name, int type, StringBuffer value) { 1259 this.name = name; 1260 this.type = type; 1261 this.value = value; 1262 } 1263 } 1264 }