Home | History | Annotate | Download | only in perf
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  **********************************************************************
      5  * Copyright (c) 2002-2008, International Business Machines           *
      6  * Corporation and others.  All Rights Reserved.                      *
      7  **********************************************************************
      8  */
      9 package com.ibm.icu.dev.test.perf;
     10 
     11 import java.io.FileInputStream;
     12 import java.util.ArrayList;
     13 
     14 public class BreakIteratorPerformanceTest extends PerfTest {
     15 
     16     String fileContents;
     17 
     18     com.ibm.icu.text.BreakIterator iSentenceIter;
     19     com.ibm.icu.text.BreakIterator iWordIter;
     20     com.ibm.icu.text.BreakIterator iLineIter;
     21     com.ibm.icu.text.BreakIterator iCharacterIter;
     22     java.text.BreakIterator jSentenceIter;
     23     java.text.BreakIterator jWordIter;
     24     java.text.BreakIterator jLineIter;
     25     java.text.BreakIterator jCharacterIter;
     26     String[] iSentences;
     27     String[] iWords;
     28     String[] iLines;
     29     String[] iCharacters;
     30     String[] jSentences;
     31     String[] jWords;
     32     String[] jLines;
     33     String[] jCharacters;
     34 
     35     public static void main(String[] args) throws Exception {
     36         new BreakIteratorPerformanceTest().run(args);
     37     }
     38 
     39     protected void setup(String[] args) {
     40         try {
     41             // read in the input file, being careful with a possible BOM
     42             FileInputStream in = new FileInputStream(fileName);
     43             BOMFreeReader reader = new BOMFreeReader(in, encoding);
     44             fileContents = new String(readToEOS(reader));
     45 
     46             // // get rid of any characters that may cause differences between ICU4J and Java BreakIterator
     47             // // fileContents = fileContents.replaceAll("[\t\f\r\n\\-/ ]+", " ");
     48             // String res = "";
     49             // StringTokenizer tokenizer = new StringTokenizer(fileContents, "\t\f\r\n-/ ");
     50             // while (tokenizer.hasMoreTokens())
     51             // res += tokenizer.nextToken() + " ";
     52             // fileContents = res.trim();
     53 
     54             // create the break iterators with respect to locale
     55             if (locale == null) {
     56                 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance();
     57                 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance();
     58                 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance();
     59                 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance();
     60 
     61                 jSentenceIter = java.text.BreakIterator.getSentenceInstance();
     62                 jWordIter = java.text.BreakIterator.getWordInstance();
     63                 jLineIter = java.text.BreakIterator.getLineInstance();
     64                 jCharacterIter = java.text.BreakIterator.getCharacterInstance();
     65             } else {
     66                 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(locale);
     67                 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(locale);
     68                 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(locale);
     69                 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(locale);
     70 
     71                 jSentenceIter = java.text.BreakIterator.getSentenceInstance(locale);
     72                 jWordIter = java.text.BreakIterator.getWordInstance(locale);
     73                 jLineIter = java.text.BreakIterator.getLineInstance(locale);
     74                 jCharacterIter = java.text.BreakIterator.getCharacterInstance(locale);
     75             }
     76 
     77             iSentences = init(iSentenceIter);
     78             iWords = init(iWordIter);
     79             iLines = init(iLineIter);
     80             iCharacters = init(iCharacterIter);
     81             jSentences = init(jSentenceIter);
     82             jWords = init(jWordIter);
     83             jLines = init(jLineIter);
     84             jCharacters = init(jCharacterIter);
     85 
     86         } catch (Exception ex) {
     87             ex.printStackTrace();
     88             throw new RuntimeException(ex.getMessage());
     89         }
     90 
     91         // we created some heavy objects, so lets try to clean up a little before running the tests
     92         gc();
     93     }
     94 
     95     private String[] init(com.ibm.icu.text.BreakIterator iter) {
     96         // set the string to iterate on
     97         iter.setText(fileContents);
     98 
     99         // produce a token list
    100         ArrayList tokenList = new ArrayList();
    101         int start = iter.first();
    102         for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
    103             tokenList.add(fileContents.substring(start, end));
    104 
    105         // return the token list as a string array
    106         return (String[]) tokenList.toArray(new String[0]);
    107     }
    108 
    109     private String[] init(java.text.BreakIterator iter) {
    110         // set the string to iterate on
    111         iter.setText(fileContents);
    112 
    113         // produce a token list
    114         ArrayList tokenList = new ArrayList();
    115         int start = iter.first();
    116         for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
    117             tokenList.add(fileContents.substring(start, end));
    118 
    119         // return the token list as a string array
    120         return (String[]) tokenList.toArray(new String[0]);
    121     }
    122 
    123     PerfTest.Function createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct,
    124             final String breakType) {
    125         return new PerfTest.Function() {
    126             public void call() {
    127                 int k = 0;
    128                 int start = iIter.first();
    129                 for (int end = iIter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iIter
    130                         .next())
    131                     if (!correct[k++].equals(fileContents.substring(start, end)))
    132                         throw new RuntimeException("ICU4J BreakIterator gave the wrong answer for " + breakType + " "
    133                                 + (k - 1) + " during the performance test. Cannot continue the performance test.");
    134                 if (k != correct.length)
    135                     throw new RuntimeException("ICU4J BreakIterator gave the wrong number of " + breakType
    136                             + "s during the performance test. Cannot continue the performance test.");
    137             }
    138 
    139             public long getOperationsPerIteration() {
    140                 return fileContents.length();
    141             }
    142         };
    143     }
    144 
    145     PerfTest.Function createTestJava(final java.text.BreakIterator jIter, final String[] correct, final String breakType) {
    146         return new PerfTest.Function() {
    147             public void call() {
    148                 int k = 0;
    149                 int start = jIter.first();
    150                 for (int end = jIter.next(); end != java.text.BreakIterator.DONE; start = end, end = jIter.next())
    151                     if (!correct[k++].equals(fileContents.substring(start, end)))
    152                         throw new RuntimeException("Java BreakIterator gave the wrong answer for " + breakType + " "
    153                                 + (k - 1) + " during the performance test. Cannot continue the performance test.");
    154                 if (k != correct.length)
    155                     throw new RuntimeException("Java BreakIterator gave the wrong number of " + breakType
    156                             + "s during the performance test. Cannot continue the performance test.");
    157             }
    158 
    159             public long getOperationsPerIteration() {
    160                 return fileContents.length();
    161             }
    162         };
    163     }
    164 
    165     PerfTest.Function TestICUSentences() {
    166         return createTestICU(iSentenceIter, iSentences, "sentence");
    167     }
    168 
    169     PerfTest.Function TestICUWords() {
    170         return createTestICU(iWordIter, iWords, "word");
    171     }
    172 
    173     PerfTest.Function TestICULines() {
    174         return createTestICU(iLineIter, iLines, "line");
    175     }
    176 
    177     PerfTest.Function TestICUCharacters() {
    178         return createTestICU(iCharacterIter, iCharacters, "character");
    179     }
    180 
    181     PerfTest.Function TestJavaSentences() {
    182         return createTestJava(jSentenceIter, jSentences, "sentence");
    183     }
    184 
    185     PerfTest.Function TestJavaWords() {
    186         return createTestJava(jWordIter, jWords, "word");
    187     }
    188 
    189     PerfTest.Function TestJavaLines() {
    190         return createTestJava(jLineIter, jLines, "line");
    191     }
    192 
    193     PerfTest.Function TestJavaCharacters() {
    194         return createTestJava(jCharacterIter, jCharacters, "character");
    195     }
    196 }
    197