Home | History | Annotate | Download | only in perf
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  **********************************************************************
      5  * Copyright (c) 2002-2008, International Business Machines
      6  * Corporation and others.  All Rights Reserved.
      7  **********************************************************************
      8  */
      9 package com.ibm.icu.dev.test.perf;
     10 
     11 import java.io.BufferedReader;
     12 import java.io.FileInputStream;
     13 import java.io.IOException;
     14 import java.io.InputStream;
     15 import java.io.InputStreamReader;
     16 import java.io.PushbackInputStream;
     17 import java.io.Reader;
     18 import java.lang.reflect.Method;
     19 import java.util.ArrayList;
     20 import java.util.Arrays;
     21 import java.util.HashMap;
     22 import java.util.HashSet;
     23 import java.util.Iterator;
     24 import java.util.Locale;
     25 import java.util.Map;
     26 import java.util.Set;
     27 
     28 import com.ibm.icu.dev.tool.UOption;
     29 import com.ibm.icu.impl.LocaleUtility;
     30 
     31 /**
     32  * Base class for performance testing framework. To use, the subclass can simply
     33  * define one or more instance methods with names beginning with "test" (case
     34  * ignored). The prototype of the method is
     35  *
     36  * PerfTest.Function testTheName()
     37  *
     38  * The actual performance test will execute on the returned Commond object
     39  * (refer to Command Pattern). To call a test from command line, the 'test'
     40  * prefix of the test method name can be ignored/removed.
     41  *
     42  * In addition, the subclass should define a main() method that calls
     43  * PerfTest.run() as defined here.
     44  *
     45  * If the subclasses uses any command line arguments (beyond those handled
     46  * automatically by this calss) then it should override PerfTest.setup() to
     47  * handle its arguments. If the subclasse needs more sophisticated management
     48  * for controlling finding/calling test method, it can replace the default
     49  * implementation for PerfTest.testProvider before calling PerfTest.run().
     50  *
     51  * Example invocation: java -cp classes -verbose:gc
     52  * com.ibm.icu.dev.test.perf.UnicodeSetPerf --gc --passes 4 --iterations 100
     53  * UnicodeSetAdd [[:l:][:c:]]
     54  *
     55  * Example output: [GC 511K->192K(1984K), 0.0086170 secs] [GC 704K->353K(1984K),
     56  * 0.0059619 secs] [Full GC 618K->371K(1984K), 0.0242779 secs] [Full GC
     57  * 371K->371K(1984K), 0.0228649 secs] = testUnicodeSetAdd begin 100 =
     58  * testUnicodeSetAdd end 11977 1109044 = testUnicodeSetAdd begin 100 =
     59  * testUnicodeSetAdd end 12047 1109044 = testUnicodeSetAdd begin 100 =
     60  * testUnicodeSetAdd end 11987 1109044 = testUnicodeSetAdd begin 100 =
     61  * testUnicodeSetAdd end 11978 1109044
     62  *
     63  * The [] lines are emitted by the JVM as a result of the -verbose:gc switch.
     64  *
     65  * Lines beginning with '=' are emitted by PerfTest: = testUnicodeSetAdd begin
     66  * 100 A 'begin' statement contains the name of the setup method, which
     67  * determines what test function is measures, and the number of iterations that
     68  * will be times. = testUnicodeSetAdd end 12047 1109044 An 'end' statement gives
     69  * the name of the setup method again, and then two integers. The first is the
     70  * total elapsed time in milliseconds, and the second is the number of events
     71  * per iteration. In this example, the time per event is 12047 / (100 * 1109044)
     72  * or 108.6 ns/event.
     73  *
     74  * Raw times are given as integer ms, because this is what the system measures.
     75  *
     76  * @author Alan Liu
     77  * @since ICU 2.4
     78  */
     79 public abstract class PerfTest {
     80     // Command-line options set these:
     81     protected boolean verbose;
     82     protected String sourceDir;
     83     protected String fileName;
     84 
     85     // protected String resolvedFileName;
     86     protected String encoding;
     87     protected String testName;
     88     protected boolean uselen;
     89     protected int iterations;
     90     protected int passes;
     91     protected int time;
     92     protected boolean line_mode;
     93     protected boolean bulk_mode;
     94     protected Locale locale;
     95     protected boolean doPriorGC;
     96     protected int threads;
     97 
     98     protected TestCmdProvider testProvider = new TestPrefixProvider(this);
     99 
    100     static interface TestCmdProvider {
    101         /**
    102          * @return The names for all available test.
    103          */
    104         public Set getAllTestCmdNames();
    105 
    106         /**
    107          * @param name
    108          * @return Whether the given name is a test name. The implementation may
    109          *         have more sophisticated naming control here.
    110          *         TestCmdProvider.isTestCmd() != Set.contains()
    111          */
    112         public boolean isTestCmd(String name);
    113 
    114         /**
    115          * @param name
    116          * @return the test Command or null
    117          */
    118         public PerfTest.Function getTestCmd(String name);
    119     }
    120 
    121     /**
    122      * Treat all method beginning with 'test' prefix (ignoring case) for given
    123      * object as the test methods.
    124      */
    125     static class TestPrefixProvider implements TestCmdProvider {
    126         private Map theTests = null; // Map<string(no case), string(with case)>
    127         private Set orgNames = null; // shadow reference, ==theTests, for better output
    128         private Object refer;
    129 
    130         TestPrefixProvider(Object theProvider) {
    131             refer = theProvider;
    132         }
    133 
    134         public Set getAllTestCmdNames() {
    135             if (theTests == null) {
    136                 theTests = new HashMap();
    137                 orgNames = new HashSet();
    138                 Method[] methods = refer.getClass().getDeclaredMethods();
    139                 for (int i = 0; i < methods.length; i++) {
    140                     String org = methods[i].getName();
    141                     String name = org.toLowerCase(); // ignoring case
    142                     // beginning with 'test'
    143                     // Note: methods named 'test()' are ignored
    144                     if (name.length() > 4 && name.startsWith("test")) {
    145                         if (theTests.containsKey(name)) {
    146                             throw new Error(
    147                                     "Duplicate method name ignoring case: "
    148                                             + name);
    149                         }
    150                         theTests.put(name, org);
    151                         orgNames.add(org);
    152                     }
    153                 }
    154             }
    155             return orgNames; // begining with 'test', keeping case
    156         }
    157 
    158         /**
    159          * The given name will map to a method of the same name, or a method
    160          * named "test" + name. Case is ignored.
    161          */
    162         private String isTestCmd_impl(String name) {
    163             getAllTestCmdNames();
    164             String tn1 = name.toLowerCase();
    165             String tn2 = "test" + tn1;
    166             if (theTests.containsKey(tn1)) {
    167                 return tn1;
    168             } else if (theTests.containsKey(tn2)) {
    169                 return tn2;
    170             }
    171             return null;
    172         }
    173 
    174         public boolean isTestCmd(String name) {
    175             return isTestCmd_impl(name) != null;
    176         }
    177 
    178         public Function getTestCmd(String aname) {
    179             String name = (String) theTests.get(isTestCmd_impl(aname));
    180             if (name == null) {
    181                 return null;
    182             }
    183 
    184             try {
    185                 Method m = refer.getClass().getDeclaredMethod(name,
    186                         (Class[]) null);
    187                 return (Function) m.invoke(refer, new Object[] {});
    188             } catch (Exception e) {
    189                 throw new Error(
    190                         "TestPrefixProvider implementation error. Finding: "
    191                                 + name, e);
    192             }
    193         }
    194     }
    195 
    196     /**
    197      * Subclasses of PerfTest will need to create subclasses of Function that
    198      * define a call() method which contains the code to be timed. They then
    199      * call setTestFunction() in their "Test..." method to establish this as the
    200      * current test functor.
    201      */
    202     public abstract static class Function {
    203 
    204         /**
    205          * Subclasses should implement this method to do the action to be
    206          * measured if the action is thread-safe
    207          */
    208         public void call() { call(0); }
    209 
    210         /**
    211          * Subclasses should implement this method if the action is not thread-safe
    212          */
    213         public void call(int i) { call(); }
    214 
    215         /**
    216          * Subclasses may implement this method to return positive integer
    217          * indicating the number of operations in a single call to this object's
    218          * call() method. If subclasses do not override this method, the default
    219          * implementation returns 1.
    220          */
    221         public long getOperationsPerIteration() {
    222             return 1;
    223         }
    224 
    225         /**
    226          * Subclasses may implement this method to return either positive or
    227          * negative integer indicating the number of events in a single call to
    228          * this object's call() method. If subclasses do not override this
    229          * method, the default implementation returns -1, indicating that events
    230          * are not applicable to this test. e.g: Number of breaks / iterations
    231          * for break iterator
    232          */
    233         public long getEventsPerIteration() {
    234             return -1;
    235         }
    236 
    237         /**
    238          * Call call() n times in a tight loop and return the elapsed
    239          * milliseconds. If n is small and call() is fast the return result may
    240          * be zero. Small return values have limited meaningfulness, depending
    241          * on the underlying VM and OS.
    242          */
    243         public final long time(long n) {
    244             long start, stop;
    245             start = System.currentTimeMillis();
    246             while (n-- > 0) {
    247                 call();
    248             }
    249             stop = System.currentTimeMillis();
    250             return stop - start; // ms
    251         }
    252 
    253 
    254         /**
    255          * init is called each time before looping through call
    256          */
    257         public void init() {}
    258 
    259 
    260         public final int getID() {
    261             return id;
    262         }
    263 
    264         public final void setID(int id) {
    265             this.id = id;
    266         }
    267 
    268         private int id;
    269     }
    270 
    271     private class FunctionRunner implements Runnable {
    272         public FunctionRunner(Function f, long loops, int id) {
    273             this.f = f;
    274             this.loops = loops;
    275             this.id = id;
    276         }
    277 
    278         public void run() {
    279             long n = loops;
    280             while (n-- > 0)
    281                 f.call(id);
    282         }
    283 
    284         private Function f;
    285 
    286         private long loops;
    287         private int id;
    288     }
    289 
    290 
    291     /**
    292      * Exception indicating a usage error.
    293      */
    294     public static class UsageException extends Exception {
    295         /**
    296          * For serialization
    297          */
    298         private static final long serialVersionUID = -1201256240606806242L;
    299 
    300         public UsageException(String message) {
    301             super(message);
    302         }
    303 
    304         public UsageException() {
    305             super();
    306         }
    307     }
    308 
    309     /**
    310      * Constructor.
    311      */
    312     protected PerfTest() {
    313     }
    314 
    315     /**
    316      * Framework method. Default implementation does not parse any extra
    317      * arguments. Subclasses may override this to parse extra arguments.
    318      * Subclass implementations should NOT call the base class implementation.
    319      */
    320     protected void setup(String[] args) {
    321         if (args.length > 0) {
    322             throw new RuntimeException("Extra arguments received");
    323         }
    324     }
    325 
    326     /**
    327      * These must be kept in sync with getOptions().
    328      */
    329     static final int HELP1 = 0;
    330     static final int HELP2 = 1;
    331     static final int VERBOSE = 2;
    332     static final int SOURCEDIR = 3;
    333     static final int ENCODING = 4;
    334     static final int USELEN = 5;
    335     static final int FILE_NAME = 6;
    336     static final int PASSES = 7;
    337     static final int ITERATIONS = 8;
    338     static final int TIME = 9;
    339     static final int LINE_MODE = 10;
    340     static final int BULK_MODE = 11;
    341     static final int LOCALE = 12;
    342     static final int TEST_NAME = 13;
    343     static final int THREADS = 14;
    344 
    345     // Options above here are identical to those in C; keep in sync with C
    346     // Options below here are unique to Java; shift down as necessary
    347     static final int GARBAGE_COLLECT = 14;
    348     static final int LIST = 15;
    349 
    350     UOption[] getOptions() {
    351         return new UOption[] {
    352                 UOption.HELP_H(),
    353                 UOption.HELP_QUESTION_MARK(),
    354                 UOption.VERBOSE(),
    355                 UOption.SOURCEDIR(),
    356                 UOption.ENCODING(),
    357                 UOption.DEF("uselen",     'u', UOption.NO_ARG),
    358                 UOption.DEF("filename",   'f', UOption.REQUIRES_ARG),
    359                 UOption.DEF("passes",     'p', UOption.REQUIRES_ARG),
    360                 UOption.DEF("iterations", 'i', UOption.REQUIRES_ARG),
    361                 UOption.DEF("time",       't', UOption.REQUIRES_ARG),
    362                 UOption.DEF("line-mode",  'l', UOption.NO_ARG),
    363                 UOption.DEF("bulk-mode",  'b', UOption.NO_ARG),
    364                 UOption.DEF("locale",     'L', UOption.REQUIRES_ARG),
    365                 UOption.DEF("testname",   'T', UOption.REQUIRES_ARG),
    366                 UOption.DEF("threads",    'r', UOption.REQUIRES_ARG),
    367 
    368                 // Options above here are identical to those in C; keep in sync
    369                 // Options below here are unique to Java
    370 
    371                 UOption.DEF("gc", 'g', UOption.NO_ARG),
    372                 UOption.DEF("list", (char) -1, UOption.NO_ARG), };
    373     }
    374 
    375     /**
    376      * Subclasses should call this method in their main(). run() will in turn
    377      * call setup() with any arguments it does not parse. This method parses the
    378      * command line and runs the tests given on the command line, with the given
    379      * parameters. See the class description for details.
    380      */
    381     protected final void run(String[] args) throws Exception {
    382         Set testList = parseOptions(args);
    383 
    384         // Run the tests
    385         for (Iterator iter = testList.iterator(); iter.hasNext();) {
    386             String meth = (String) iter.next();
    387 
    388             // Call meth to set up the test
    389             // long eventsPerCall = -1;
    390             Function testFunction = testProvider.getTestCmd(meth);
    391             if (testFunction == null) {
    392                 throw new RuntimeException(meth
    393                         + " failed to return a test function");
    394             }
    395             if (testFunction.getOperationsPerIteration() < 1) {
    396                 throw new RuntimeException(meth
    397                         + " returned an illegal operations/iteration()");
    398             }
    399 
    400             long t;
    401             // long b = System.currentTimeMillis();
    402             long loops = getIteration(meth, testFunction);
    403             // System.out.println("The guess cost: " + (System.currentTimeMillis() - b)/1000. + " s.");
    404 
    405             for (int j = 0; j < passes; ++j) {
    406                 long events = -1;
    407                 if (verbose) {
    408                     if (iterations > 0) {
    409                         System.out.println("= " + meth + " begin " + iterations);
    410                     } else {
    411                         System.out.println("= " + meth + " begin " + time + " seconds");
    412                     }
    413                 } else {
    414                     System.out.println("= " + meth + " begin ");
    415                 }
    416 
    417                 t = performLoops(testFunction, loops);
    418 
    419                 events = testFunction.getEventsPerIteration();
    420 
    421                 if (verbose) {
    422                     if (events == -1) {
    423                         System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: "
    424                                 + testFunction.getOperationsPerIteration());
    425                     } else {
    426                         System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: "
    427                                 + testFunction.getOperationsPerIteration() + " events: " + events);
    428                     }
    429                 } else {
    430                     if (events == -1) {
    431                         System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " "
    432                                 + testFunction.getOperationsPerIteration());
    433                     } else {
    434                         System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " "
    435                                 + testFunction.getOperationsPerIteration() + " " + events);
    436                     }
    437                 }
    438 
    439             }
    440         }
    441     }
    442 
    443     /**
    444      * @param args
    445      * @return the method list to call
    446      * @throws UsageException
    447      */
    448     private Set parseOptions(String[] args) throws UsageException {
    449 
    450         doPriorGC = false;
    451         encoding = "";
    452         uselen = false;
    453         fileName = null;
    454         sourceDir = null;
    455         line_mode = false;
    456         verbose = false;
    457         bulk_mode = false;
    458         passes = iterations = time = -1;
    459         locale = null;
    460         testName = null;
    461         threads = 1;
    462 
    463         UOption[] options = getOptions();
    464         int remainingArgc = UOption.parseArgs(args, options);
    465 
    466         if (args.length == 0 || options[HELP1].doesOccur || options[HELP2].doesOccur)
    467             throw new UsageException();
    468 
    469         if (options[LIST].doesOccur) {
    470             System.err.println("Available tests:");
    471             Set testNames = testProvider.getAllTestCmdNames();
    472             for (Iterator iter = testNames.iterator(); iter.hasNext();) {
    473                 String name = (String) iter.next();
    474                 System.err.println(" " + name);
    475             }
    476             System.exit(0);
    477         }
    478 
    479         if (options[TIME].doesOccur && options[ITERATIONS].doesOccur)
    480             throw new UsageException("Cannot specify both '-t <seconds>' and '-i <iterations>'");
    481         else if (!options[TIME].doesOccur && !options[ITERATIONS].doesOccur)
    482             throw new UsageException("Either '-t <seconds>' or '-i <iterations>' must be specified");
    483         else if (options[ITERATIONS].doesOccur) {
    484             try {
    485                 iterations = Integer.parseInt(options[ITERATIONS].value);
    486             } catch (NumberFormatException ex) {
    487                 throw new UsageException("'-i <iterations>' requires an integer number of iterations");
    488             }
    489         } else { //if (options[TIME].doesOccur)
    490             try {
    491                 time = Integer.parseInt(options[TIME].value);
    492             } catch (NumberFormatException ex) {
    493                 throw new UsageException("'-r <seconds>' requires an integer number of seconds");
    494             }
    495         }
    496 
    497         if (!options[PASSES].doesOccur)
    498             throw new UsageException("'-p <passes>' must be specified");
    499         else
    500             passes = Integer.parseInt(options[PASSES].value);
    501 
    502         if (options[LINE_MODE].doesOccur && options[BULK_MODE].doesOccur)
    503             throw new UsageException("Cannot specify both '-l' (line mode) and '-b' (bulk mode)");
    504 
    505         if (options[THREADS].doesOccur) {
    506             try {
    507                 threads = Integer.parseInt(options[THREADS].value);
    508             } catch (NumberFormatException ex) {
    509                 throw new UsageException("'-r <threads>' requires an integer number of threads");
    510             }
    511             if (threads <= 0)
    512                 throw new UsageException("'-r <threads>' requires an number of threads greater than 0");
    513         }
    514 
    515         line_mode = options[LINE_MODE].doesOccur;
    516         bulk_mode = options[BULK_MODE].doesOccur;
    517         verbose   = options[VERBOSE].doesOccur;
    518         uselen    = options[USELEN].doesOccur;
    519         doPriorGC = options[GARBAGE_COLLECT].doesOccur;
    520 
    521         if (options[SOURCEDIR].doesOccur) sourceDir = options[SOURCEDIR].value;
    522         if (options[ENCODING].doesOccur)  encoding  = options[ENCODING].value;
    523         if (options[FILE_NAME].doesOccur) fileName  = options[FILE_NAME].value;
    524         if (options[TEST_NAME].doesOccur) testName  = options[TEST_NAME].value;
    525         if (options[LOCALE].doesOccur)    locale    = LocaleUtility.getLocaleFromName(options[LOCALE].value);
    526 
    527 
    528         // build the test list
    529         Set testList = new HashSet();
    530         int i, j;
    531         for (i = 0; i < remainingArgc; ++i) {
    532             // is args[i] a method name?
    533             if (testProvider.isTestCmd(args[i])) {
    534                 testList.add(args[i]);
    535             } else {
    536                 // args[i] is neither a method name nor a number. Pass
    537                 // everything from here on through to the subclass via
    538                 // setup().
    539                 break;
    540             }
    541         }
    542 
    543         // if no tests were specified, put all the tests in the test list
    544         if (testList.size() == 0) {
    545             Set testNames = testProvider.getAllTestCmdNames();
    546             Iterator iter = testNames.iterator();
    547             while (iter.hasNext())
    548                 testList.add((String)iter.next());
    549         }
    550 
    551         // pass remaining arguments, if any, through to the subclass via setup() method.
    552         String[] subclassArgs = new String[remainingArgc - i];
    553         for (j = 0; i < remainingArgc; j++)
    554             subclassArgs[j] = args[i++];
    555         setup(subclassArgs);
    556 
    557         // Put the heap in a consistent state
    558         if (doPriorGC)
    559             gc();
    560 
    561         return testList;
    562     }
    563 
    564     /**
    565      * Translate '-t time' to iterations (or just return '-i iteration')
    566      *
    567      * @param meth
    568      * @param fn
    569      * @return rt
    570      */
    571     private long getIteration(String methName, Function fn) throws InterruptedException {
    572         long iter = 0;
    573         if (time < 0) { // && iterations > 0
    574             iter = iterations;
    575         } else { // && iterations < 0
    576             // Translate time to iteration
    577             // Assuming there is a linear relation between time and iterations
    578 
    579             if (verbose) {
    580                 System.out.println("= " + methName + " calibrating " + time
    581                         + " seconds");
    582             }
    583 
    584             long base = time * 1000;
    585             // System.out.println("base :" + base);
    586             long seed = 1;
    587             long t = 0;
    588             while (t < base * 0.9 || base * 1.1 < t) { // + - 10%
    589                 if (iter == 0 || t == 0) {
    590                     iter = seed; // start up from 1
    591                     seed *= 100; // if the method is too fast (t == 0),
    592                     // multiply 100 times
    593                     // 100 is rational because 'base' is always larger than 1000
    594                 } else {
    595                     // If 't' is large enough, use linear function to calculate
    596                     // new iteration
    597                     //
    598                     // new iter(base) old iter
    599                     // -------------- = -------- = k
    600                     // new time old time
    601                     //
    602                     // System.out.println("before guess t: " + t);
    603                     // System.out.println("before guess iter: " + iter);
    604                     iter = (long) ((double) iter / t * base); // avoid long
    605                     // cut, eg. 1/10
    606                     // == 0
    607                     if (iter == 0) {
    608                         throw new RuntimeException(
    609                                 "Unable to converge on desired duration");
    610                     }
    611                 }
    612                 t = performLoops(fn, iter);
    613             }
    614             // System.out.println("final t : " + t);
    615             // System.out.println("final i : " + iter);
    616         }
    617         return iter;
    618     }
    619 
    620 
    621     private long performLoops(Function function, long loops) throws InterruptedException {
    622         function.init();
    623         if (threads > 1) {
    624             Thread[] threadList = new Thread[threads];
    625             for (int i=0; i<threads; i++)
    626                 threadList[i] = new Thread(new FunctionRunner(function, loops, i));
    627 
    628             long start = System.currentTimeMillis();
    629             for (int i=0; i<threads; i++)
    630                 threadList[i].start();
    631             for (int i=0; i<threads; i++)
    632                 threadList[i].join();
    633             return System.currentTimeMillis() - start;
    634 
    635         } else {
    636             return function.time(loops); // ms
    637         }
    638     }
    639 
    640 
    641     /**
    642      * Invoke the runtime's garbage collection procedure repeatedly until the
    643      * amount of free memory stabilizes to within 10%.
    644      */
    645     protected void gc() {
    646         if (false) {
    647             long last;
    648             long free = 1;
    649             Runtime runtime = Runtime.getRuntime();
    650             do {
    651                 runtime.gc();
    652                 last = free;
    653                 free = runtime.freeMemory();
    654             } while (((double) Math.abs(free - last)) / free > 0.1);
    655             // Wait for the change in free memory to drop under 10%
    656             // between successive calls.
    657         }
    658 
    659         // From "Java Platform Performance". This is the procedure
    660         // recommended by Javasoft.
    661         try {
    662             System.gc();
    663             Thread.sleep(100);
    664             System.runFinalization();
    665             Thread.sleep(100);
    666 
    667             System.gc();
    668             Thread.sleep(100);
    669             System.runFinalization();
    670             Thread.sleep(100);
    671         } catch (InterruptedException e) {
    672         }
    673     }
    674 
    675 
    676     public static char[] readToEOS(Reader reader) {
    677         ArrayList vec = new ArrayList();
    678         int count = 0;
    679         int pos = 0;
    680         final int MAXLENGTH = 0x8000; // max buffer size - 32K
    681         int length = 0x80; // start with small buffers and work up
    682         do {
    683             pos = 0;
    684             length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
    685             char[] buffer = new char[length];
    686             try {
    687                 do {
    688                     int n = reader.read(buffer, pos, length - pos);
    689                     if (n == -1) {
    690                         break;
    691                     }
    692                     pos += n;
    693                 } while (pos < length);
    694             }
    695             catch (IOException e) {
    696             }
    697             vec.add(buffer);
    698             count += pos;
    699         } while (pos == length);
    700 
    701         char[] data = new char[count];
    702         pos = 0;
    703         for (int i = 0; i < vec.size(); ++i) {
    704             char[] buf = (char[]) vec.get(i);
    705             int len = Math.min(buf.length, count - pos);
    706             System.arraycopy(buf, 0, data, pos, len);
    707             pos += len;
    708         }
    709         return data;
    710     }
    711     public static byte[] readToEOS(InputStream stream) {
    712 
    713         ArrayList vec = new ArrayList();
    714         int count = 0;
    715         int pos = 0;
    716         final int MAXLENGTH = 0x8000; // max buffer size - 32K
    717         int length = 0x80; // start with small buffers and work up
    718         do {
    719             pos = 0;
    720             length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
    721             byte[] buffer = new byte[length];
    722             try {
    723                 do {
    724                     int n = stream.read(buffer, pos, length - pos);
    725                     if (n == -1) {
    726                         break;
    727                     }
    728                     pos += n;
    729                 } while (pos < length);
    730             }
    731             catch (IOException e) {
    732             }
    733             vec.add(buffer);
    734             count += pos;
    735         } while (pos == length);
    736 
    737 
    738         byte[] data = new byte[count];
    739         pos = 0;
    740         for (int i = 0; i < vec.size(); ++i) {
    741             byte[] buf = (byte[]) vec.get(i);
    742             int len = Math.min(buf.length, count - pos);
    743             System.arraycopy(buf, 0, data, pos, len);
    744             pos += len;
    745         }
    746         return data;
    747     }
    748 
    749     protected String[] readLines(String filename, String srcEncoding, boolean bulkMode) {
    750         FileInputStream fis = null;
    751         InputStreamReader isr = null;
    752         BufferedReader br = null;
    753         try {
    754             fis = new FileInputStream(filename);
    755             isr = new InputStreamReader(fis, srcEncoding);
    756             br = new BufferedReader(isr);
    757         } catch (Exception e) {
    758             System.err.println("Error: File access exception: " + e.getMessage() + "!");
    759             System.exit(1);
    760         }
    761         ArrayList list = new ArrayList();
    762         while (true) {
    763             String line = null;
    764             try {
    765                 line = readDataLine(br);
    766             } catch (Exception e) {
    767                 System.err.println("Read File Error" + e.getMessage() + "!");
    768                 System.exit(1);
    769             }
    770             if (line == null) break;
    771             if (line.length() == 0) continue;
    772             list.add(line);
    773         }
    774 
    775         int size = list.size();
    776         String[] lines = null;
    777 
    778         if (bulkMode) {
    779             lines = new String[1];
    780             StringBuffer buffer = new StringBuffer("");
    781             for (int i = 0; i < size; ++i) {
    782                 buffer.append((String) list.get(i));
    783                 /*if (i < (size - 1)) {
    784                     buffer.append("\r\n");
    785                 }*/
    786             }
    787             lines[0] = buffer.toString();
    788         } else {
    789             lines = new String[size];
    790             for (int i = 0; i < size; ++i) {
    791                 lines[i] = (String) list.get(i);
    792             }
    793         }
    794 
    795         return lines;
    796     }
    797 
    798     public String readDataLine(BufferedReader br) throws Exception {
    799         String originalLine = "";
    800         String line = "";
    801         try {
    802             line = originalLine = br.readLine();
    803             if (line == null) return null;
    804             if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
    805             int commentPos = line.indexOf('#');
    806             if (commentPos >= 0) line = line.substring(0, commentPos);
    807             line = line.trim();
    808         } catch (Exception e) {
    809             throw new Exception("Line \"{0}\",  \"{1}\"" + originalLine + " "
    810                     + line + " " + e.toString());
    811         }
    812         return line;
    813     }
    814 
    815 
    816     public static class BOMFreeReader extends Reader {
    817         InputStreamReader reader;
    818         String encoding;
    819         int MAX_BOM_LENGTH = 5;
    820 
    821         /**
    822          * Creates a new reader, skipping a BOM associated with the given
    823          * encoding. Equivalent to BOMFreeReader(in, null).
    824          *
    825          * @param in
    826          *            The input stream.
    827          * @throws IOException
    828          *             Thrown if reading for a BOM causes an IOException.
    829          */
    830         public BOMFreeReader(InputStream in) throws IOException {
    831             this(in, null);
    832         }
    833 
    834         /**
    835          * Creates a new reader, skipping a BOM associated with the given
    836          * encoding. If encoding is null, attempts to detect the encoding by the
    837          * BOM.
    838          *
    839          * @param in
    840          *            The input stream.
    841          * @param encoding
    842          *            The encoding to use. Can be null.
    843          * @throws IOException
    844          *             Thrown if reading for a BOM causes an IOException.
    845          */
    846         public BOMFreeReader(InputStream in, String encoding) throws IOException {
    847             PushbackInputStream pushback = new PushbackInputStream(in, MAX_BOM_LENGTH);
    848             this.encoding = encoding;
    849 
    850             byte[] start = new byte[MAX_BOM_LENGTH];
    851             Arrays.fill(start, (byte)0xa5);
    852 
    853             int amountRead = pushback.read(start, 0, MAX_BOM_LENGTH);
    854             int bomLength = detectBOMLength(start);
    855             if (amountRead > bomLength)
    856                 pushback.unread(start, bomLength, amountRead - bomLength);
    857 
    858             reader = (encoding == null) ? new InputStreamReader(pushback) : new InputStreamReader(pushback, encoding);
    859         }
    860 
    861         /**
    862          * Determines the length of a BOM in the beginning of start. Assumes
    863          * start is at least a length 5 array. If encoding is null, the check
    864          * will not be encoding specific and it will set the encoding of this
    865          * BOMFreeReader.
    866          *
    867          * @param start
    868          *            The starting bytes.
    869          * @param encoding
    870          *            The encoding. Can be null.
    871          * @return The length of a detected BOM.
    872          */
    873         private int detectBOMLength(byte[] start) {
    874             if ((encoding == null || "UTF-16BE".equals(encoding)) && start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
    875                 if (encoding == null) this.encoding = "UTF-16BE";
    876                 return 2; // "UTF-16BE";
    877             } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
    878                 if ((encoding == null || "UTF-32LE".equals(encoding)) && start[2] == (byte) 0x00
    879                         && start[3] == (byte) 0x00) {
    880                     if (encoding == null) this.encoding = "UTF-32LE";
    881                     return 4; // "UTF-32LE";
    882                 } else if ((encoding == null || "UTF-16LE".equals(encoding))) {
    883                     if (encoding == null) this.encoding = "UTF-16LE";
    884                     return 2; // "UTF-16LE";
    885                 }
    886             } else if ((encoding == null || "UTF-8".equals(encoding)) && start[0] == (byte) 0xEF
    887                     && start[1] == (byte) 0xBB && start[2] == (byte) 0xBF) {
    888                 if (encoding == null) this.encoding = "UTF-8";
    889                 return 3; // "UTF-8";
    890             } else if ((encoding == null || "UTF-32BE".equals(encoding)) && start[0] == (byte) 0x00
    891                     && start[1] == (byte) 0x00 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
    892                 if (encoding == null) this.encoding = "UTF-32BE";
    893                 return 4; // "UTF-32BE";
    894             } else if ((encoding == null || "SCSU".equals(encoding)) && start[0] == (byte) 0x0E
    895                     && start[1] == (byte) 0xFE && start[2] == (byte) 0xFF) {
    896                 if (encoding == null) this.encoding = "SCSU";
    897                 return 3; // "SCSU";
    898             } else if ((encoding == null || "BOCU-1".equals(encoding)) && start[0] == (byte) 0xFB
    899                     && start[1] == (byte) 0xEE && start[2] == (byte) 0x28) {
    900                 if (encoding == null) this.encoding = "BOCU-1";
    901                 return 3; // "BOCU-1";
    902             } else if ((encoding == null || "UTF-7".equals(encoding)) && start[0] == (byte) 0x2B
    903                     && start[1] == (byte) 0x2F && start[2] == (byte) 0x76) {
    904                 if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
    905                     if (encoding == null) this.encoding = "UTF-7";
    906                     return 5; // "UTF-7";
    907                 } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 || start[3] == (byte) 0x2B
    908                         || start[3] == (byte) 0x2F) {
    909                     if (encoding == null) this.encoding = "UTF-7";
    910                     return 4; // "UTF-7";
    911                 }
    912             } else if ((encoding == null || "UTF-EBCDIC".equals(encoding)) && start[0] == (byte) 0xDD
    913                     && start[2] == (byte) 0x73 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
    914                 if (encoding == null) this.encoding = "UTF-EBCDIC";
    915                 return 4; // "UTF-EBCDIC";
    916             }
    917 
    918             /* no known Unicode signature byte sequence recognized */
    919             return 0;
    920         }
    921 
    922         public int read(char[] cbuf, int off, int len) throws IOException {
    923             return reader.read(cbuf, off, len);
    924         }
    925 
    926         public void close() throws IOException {
    927             reader.close();
    928         }
    929     }
    930 }
    931 
    932 
    933 
    934 // eof
    935