Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2015, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 package android.icu.impl;
     11 
     12 import java.io.IOException;
     13 import java.util.ArrayList;
     14 import java.util.Arrays;
     15 import java.util.Locale;
     16 import java.util.regex.Pattern;
     17 
     18 import android.icu.lang.UCharacter;
     19 import android.icu.text.Replaceable;
     20 import android.icu.text.UTF16;
     21 import android.icu.text.UnicodeMatcher;
     22 
     23 /**
     24  * @hide Only a subset of ICU is exposed in Android
     25  */
     26 public final class Utility {
     27 
     28     private static final char APOSTROPHE = '\'';
     29     private static final char BACKSLASH  = '\\';
     30     private static final int MAGIC_UNSIGNED = 0x80000000;
     31 
     32     /**
     33      * Convenience utility to compare two Object[]s.
     34      * Ought to be in System
     35      */
     36     public final static boolean arrayEquals(Object[] source, Object target) {
     37         if (source == null) return (target == null);
     38         if (!(target instanceof Object[])) return false;
     39         Object[] targ = (Object[]) target;
     40         return (source.length == targ.length
     41                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     42     }
     43 
     44     /**
     45      * Convenience utility to compare two int[]s
     46      * Ought to be in System
     47      */
     48     public final static boolean arrayEquals(int[] source, Object target) {
     49         if (source == null) return (target == null);
     50         if (!(target instanceof int[])) return false;
     51         int[] targ = (int[]) target;
     52         return (source.length == targ.length
     53                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     54     }
     55 
     56     /**
     57      * Convenience utility to compare two double[]s
     58      * Ought to be in System
     59      */
     60     public final static boolean arrayEquals(double[] source, Object target) {
     61         if (source == null) return (target == null);
     62         if (!(target instanceof double[])) return false;
     63         double[] targ = (double[]) target;
     64         return (source.length == targ.length
     65                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     66     }
     67     public final static boolean arrayEquals(byte[] source, Object target) {
     68         if (source == null) return (target == null);
     69         if (!(target instanceof byte[])) return false;
     70         byte[] targ = (byte[]) target;
     71         return (source.length == targ.length
     72                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     73     }
     74 
     75     /**
     76      * Convenience utility to compare two Object[]s
     77      * Ought to be in System
     78      */
     79     public final static boolean arrayEquals(Object source, Object target) {
     80         if (source == null) return (target == null);
     81         // for some reason, the correct arrayEquals is not being called
     82         // so do it by hand for now.
     83         if (source instanceof Object[])
     84             return(arrayEquals((Object[]) source,target));
     85         if (source instanceof int[])
     86             return(arrayEquals((int[]) source,target));
     87         if (source instanceof double[])
     88             return(arrayEquals((double[]) source, target));
     89         if (source instanceof byte[])
     90             return(arrayEquals((byte[]) source,target));
     91         return source.equals(target);
     92     }
     93 
     94     /**
     95      * Convenience utility to compare two Object[]s
     96      * Ought to be in System.
     97      * @param len the length to compare.
     98      * The start indices and start+len must be valid.
     99      */
    100     public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
    101             Object[] target, int targetStart,
    102             int len)
    103     {
    104         int sourceEnd = sourceStart + len;
    105         int delta = targetStart - sourceStart;
    106         for (int i = sourceStart; i < sourceEnd; i++) {
    107             if (!arrayEquals(source[i],target[i + delta]))
    108                 return false;
    109         }
    110         return true;
    111     }
    112 
    113     /**
    114      * Convenience utility to compare two Object[]s
    115      * Ought to be in System.
    116      * @param len the length to compare.
    117      * The start indices and start+len must be valid.
    118      */
    119     public final static boolean arrayRegionMatches(char[] source, int sourceStart,
    120             char[] target, int targetStart,
    121             int len)
    122     {
    123         int sourceEnd = sourceStart + len;
    124         int delta = targetStart - sourceStart;
    125         for (int i = sourceStart; i < sourceEnd; i++) {
    126             if (source[i]!=target[i + delta])
    127                 return false;
    128         }
    129         return true;
    130     }
    131 
    132     /**
    133      * Convenience utility to compare two int[]s.
    134      * @param len the length to compare.
    135      * The start indices and start+len must be valid.
    136      * Ought to be in System
    137      */
    138     public final static boolean arrayRegionMatches(int[] source, int sourceStart,
    139             int[] target, int targetStart,
    140             int len)
    141     {
    142         int sourceEnd = sourceStart + len;
    143         int delta = targetStart - sourceStart;
    144         for (int i = sourceStart; i < sourceEnd; i++) {
    145             if (source[i] != target[i + delta])
    146                 return false;
    147         }
    148         return true;
    149     }
    150 
    151     /**
    152      * Convenience utility to compare two arrays of doubles.
    153      * @param len the length to compare.
    154      * The start indices and start+len must be valid.
    155      * Ought to be in System
    156      */
    157     public final static boolean arrayRegionMatches(double[] source, int sourceStart,
    158             double[] target, int targetStart,
    159             int len)
    160     {
    161         int sourceEnd = sourceStart + len;
    162         int delta = targetStart - sourceStart;
    163         for (int i = sourceStart; i < sourceEnd; i++) {
    164             if (source[i] != target[i + delta])
    165                 return false;
    166         }
    167         return true;
    168     }
    169     public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
    170             byte[] target, int targetStart, int len){
    171         int sourceEnd = sourceStart + len;
    172         int delta = targetStart - sourceStart;
    173         for (int i = sourceStart; i < sourceEnd; i++) {
    174             if (source[i] != target[i + delta])
    175                 return false;
    176         }
    177         return true;
    178     }
    179 
    180     /**
    181      * Trivial reference equality.
    182      * This method should help document that we really want == not equals(),
    183      * and to have a single place to suppress warnings from static analysis tools.
    184      */
    185     public static final boolean sameObjects(Object a, Object b) {
    186         return a == b;
    187     }
    188 
    189     /**
    190      * Convenience utility. Does null checks on objects, then calls equals.
    191      */
    192     public final static boolean objectEquals(Object a, Object b) {
    193         return a == null ?
    194                 b == null ? true : false :
    195                     b == null ? false : a.equals(b);
    196     }
    197 
    198     /**
    199      * Convenience utility. Does null checks on objects, then calls compare.
    200      */
    201     public static <T extends Comparable<T>> int checkCompare(T a, T b) {
    202         return a == null ?
    203                 b == null ? 0 : -1 :
    204                     b == null ? 1 : a.compareTo(b);
    205       }
    206 
    207     /**
    208      * Convenience utility. Does null checks on object, then calls hashCode.
    209      */
    210     public static int checkHash(Object a) {
    211         return a == null ? 0 : a.hashCode();
    212       }
    213 
    214     /**
    215      * The ESCAPE character is used during run-length encoding.  It signals
    216      * a run of identical chars.
    217      */
    218     private static final char ESCAPE = '\uA5A5';
    219 
    220     /**
    221      * The ESCAPE_BYTE character is used during run-length encoding.  It signals
    222      * a run of identical bytes.
    223      */
    224     static final byte ESCAPE_BYTE = (byte)0xA5;
    225 
    226     /**
    227      * Construct a string representing an int array.  Use run-length encoding.
    228      * A character represents itself, unless it is the ESCAPE character.  Then
    229      * the following notations are possible:
    230      *   ESCAPE ESCAPE   ESCAPE literal
    231      *   ESCAPE n c      n instances of character c
    232      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    233      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    234      * If we encounter a run where n == ESCAPE, we represent this as:
    235      *   c ESCAPE n-1 c
    236      * The ESCAPE value is chosen so as not to collide with commonly
    237      * seen values.
    238      */
    239     static public final String arrayToRLEString(int[] a) {
    240         StringBuilder buffer = new StringBuilder();
    241 
    242         appendInt(buffer, a.length);
    243         int runValue = a[0];
    244         int runLength = 1;
    245         for (int i=1; i<a.length; ++i) {
    246             int s = a[i];
    247             if (s == runValue && runLength < 0xFFFF) {
    248                 ++runLength;
    249             } else {
    250                 encodeRun(buffer, runValue, runLength);
    251                 runValue = s;
    252                 runLength = 1;
    253             }
    254         }
    255         encodeRun(buffer, runValue, runLength);
    256         return buffer.toString();
    257     }
    258 
    259     /**
    260      * Construct a string representing a short array.  Use run-length encoding.
    261      * A character represents itself, unless it is the ESCAPE character.  Then
    262      * the following notations are possible:
    263      *   ESCAPE ESCAPE   ESCAPE literal
    264      *   ESCAPE n c      n instances of character c
    265      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    266      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    267      * If we encounter a run where n == ESCAPE, we represent this as:
    268      *   c ESCAPE n-1 c
    269      * The ESCAPE value is chosen so as not to collide with commonly
    270      * seen values.
    271      */
    272     static public final String arrayToRLEString(short[] a) {
    273         StringBuilder buffer = new StringBuilder();
    274         // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
    275         buffer.append((char) (a.length >> 16));
    276         buffer.append((char) a.length);
    277         short runValue = a[0];
    278         int runLength = 1;
    279         for (int i=1; i<a.length; ++i) {
    280             short s = a[i];
    281             if (s == runValue && runLength < 0xFFFF) ++runLength;
    282             else {
    283                 encodeRun(buffer, runValue, runLength);
    284                 runValue = s;
    285                 runLength = 1;
    286             }
    287         }
    288         encodeRun(buffer, runValue, runLength);
    289         return buffer.toString();
    290     }
    291 
    292     /**
    293      * Construct a string representing a char array.  Use run-length encoding.
    294      * A character represents itself, unless it is the ESCAPE character.  Then
    295      * the following notations are possible:
    296      *   ESCAPE ESCAPE   ESCAPE literal
    297      *   ESCAPE n c      n instances of character c
    298      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    299      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    300      * If we encounter a run where n == ESCAPE, we represent this as:
    301      *   c ESCAPE n-1 c
    302      * The ESCAPE value is chosen so as not to collide with commonly
    303      * seen values.
    304      */
    305     static public final String arrayToRLEString(char[] a) {
    306         StringBuilder buffer = new StringBuilder();
    307         buffer.append((char) (a.length >> 16));
    308         buffer.append((char) a.length);
    309         char runValue = a[0];
    310         int runLength = 1;
    311         for (int i=1; i<a.length; ++i) {
    312             char s = a[i];
    313             if (s == runValue && runLength < 0xFFFF) ++runLength;
    314             else {
    315                 encodeRun(buffer, (short)runValue, runLength);
    316                 runValue = s;
    317                 runLength = 1;
    318             }
    319         }
    320         encodeRun(buffer, (short)runValue, runLength);
    321         return buffer.toString();
    322     }
    323 
    324     /**
    325      * Construct a string representing a byte array.  Use run-length encoding.
    326      * Two bytes are packed into a single char, with a single extra zero byte at
    327      * the end if needed.  A byte represents itself, unless it is the
    328      * ESCAPE_BYTE.  Then the following notations are possible:
    329      *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
    330      *   ESCAPE_BYTE n b           n instances of byte b
    331      * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
    332      * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
    333      * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
    334      *   b ESCAPE_BYTE n-1 b
    335      * The ESCAPE_BYTE value is chosen so as not to collide with commonly
    336      * seen values.
    337      */
    338     static public final String arrayToRLEString(byte[] a) {
    339         StringBuilder buffer = new StringBuilder();
    340         buffer.append((char) (a.length >> 16));
    341         buffer.append((char) a.length);
    342         byte runValue = a[0];
    343         int runLength = 1;
    344         byte[] state = new byte[2];
    345         for (int i=1; i<a.length; ++i) {
    346             byte b = a[i];
    347             if (b == runValue && runLength < 0xFF) ++runLength;
    348             else {
    349                 encodeRun(buffer, runValue, runLength, state);
    350                 runValue = b;
    351                 runLength = 1;
    352             }
    353         }
    354         encodeRun(buffer, runValue, runLength, state);
    355 
    356         // We must save the final byte, if there is one, by padding
    357         // an extra zero.
    358         if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
    359 
    360         return buffer.toString();
    361     }
    362 
    363     /**
    364      * Encode a run, possibly a degenerate run (of < 4 values).
    365      * @param length The length of the run; must be > 0 && <= 0xFFFF.
    366      */
    367     private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
    368         if (length < 4) {
    369             for (int j=0; j<length; ++j) {
    370                 if (value == ESCAPE) {
    371                     appendInt(buffer, value);
    372                 }
    373                 appendInt(buffer, value);
    374             }
    375         }
    376         else {
    377             if (length == ESCAPE) {
    378                 if (value == ESCAPE) {
    379                     appendInt(buffer, ESCAPE);
    380                 }
    381                 appendInt(buffer, value);
    382                 --length;
    383             }
    384             appendInt(buffer, ESCAPE);
    385             appendInt(buffer, length);
    386             appendInt(buffer, value); // Don't need to escape this value
    387         }
    388     }
    389 
    390     private static final <T extends Appendable> void appendInt(T buffer, int value) {
    391         try {
    392             buffer.append((char)(value >>> 16));
    393             buffer.append((char)(value & 0xFFFF));
    394         } catch (IOException e) {
    395             throw new IllegalIcuArgumentException(e);
    396         }
    397     }
    398 
    399     /**
    400      * Encode a run, possibly a degenerate run (of < 4 values).
    401      * @param length The length of the run; must be > 0 && <= 0xFFFF.
    402      */
    403     private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
    404         try {
    405             char valueChar = (char) value;
    406             if (length < 4) {
    407                 for (int j=0; j<length; ++j) {
    408                     if (valueChar == ESCAPE) {
    409                         buffer.append(ESCAPE);
    410                     }
    411                     buffer.append(valueChar);
    412                 }
    413             }
    414             else {
    415                 if (length == ESCAPE) {
    416                     if (valueChar == ESCAPE) {
    417                         buffer.append(ESCAPE);
    418                     }
    419                     buffer.append(valueChar);
    420                     --length;
    421                 }
    422                 buffer.append(ESCAPE);
    423                 buffer.append((char) length);
    424                 buffer.append(valueChar); // Don't need to escape this value
    425             }
    426         } catch (IOException e) {
    427             throw new IllegalIcuArgumentException(e);
    428         }
    429     }
    430 
    431     /**
    432      * Encode a run, possibly a degenerate run (of < 4 values).
    433      * @param length The length of the run; must be > 0 && <= 0xFF.
    434      */
    435     private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
    436             byte[] state) {
    437         if (length < 4) {
    438             for (int j=0; j<length; ++j) {
    439                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
    440                 appendEncodedByte(buffer, value, state);
    441             }
    442         }
    443         else {
    444             if ((byte)length == ESCAPE_BYTE) {
    445                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
    446                 appendEncodedByte(buffer, value, state);
    447                 --length;
    448             }
    449             appendEncodedByte(buffer, ESCAPE_BYTE, state);
    450             appendEncodedByte(buffer, (byte)length, state);
    451             appendEncodedByte(buffer, value, state); // Don't need to escape this value
    452         }
    453     }
    454 
    455     /**
    456      * Append a byte to the given Appendable, packing two bytes into each
    457      * character.  The state parameter maintains intermediary data between
    458      * calls.
    459      * @param state A two-element array, with state[0] == 0 if this is the
    460      * first byte of a pair, or state[0] != 0 if this is the second byte
    461      * of a pair, in which case state[1] is the first byte.
    462      */
    463     private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
    464             byte[] state) {
    465         try {
    466             if (state[0] != 0) {
    467                 char c = (char) ((state[1] << 8) | ((value) & 0xFF));
    468                 buffer.append(c);
    469                 state[0] = 0;
    470             }
    471             else {
    472                 state[0] = 1;
    473                 state[1] = value;
    474             }
    475         } catch (IOException e) {
    476             throw new IllegalIcuArgumentException(e);
    477         }
    478     }
    479 
    480     /**
    481      * Construct an array of ints from a run-length encoded string.
    482      */
    483     static public final int[] RLEStringToIntArray(String s) {
    484         int length = getInt(s, 0);
    485         int[] array = new int[length];
    486         int ai = 0, i = 1;
    487 
    488         int maxI = s.length() / 2;
    489         while (ai < length && i < maxI) {
    490             int c = getInt(s, i++);
    491 
    492             if (c == ESCAPE) {
    493                 c = getInt(s, i++);
    494                 if (c == ESCAPE) {
    495                     array[ai++] = c;
    496                 } else {
    497                     int runLength = c;
    498                     int runValue = getInt(s, i++);
    499                     for (int j=0; j<runLength; ++j) {
    500                         array[ai++] = runValue;
    501                     }
    502                 }
    503             }
    504             else {
    505                 array[ai++] = c;
    506             }
    507         }
    508 
    509         if (ai != length || i != maxI) {
    510             throw new IllegalStateException("Bad run-length encoded int array");
    511         }
    512 
    513         return array;
    514     }
    515     static final int getInt(String s, int i) {
    516         return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1);
    517     }
    518 
    519     /**
    520      * Construct an array of shorts from a run-length encoded string.
    521      */
    522     static public final short[] RLEStringToShortArray(String s) {
    523         int length = ((s.charAt(0)) << 16) | (s.charAt(1));
    524         short[] array = new short[length];
    525         int ai = 0;
    526         for (int i=2; i<s.length(); ++i) {
    527             char c = s.charAt(i);
    528             if (c == ESCAPE) {
    529                 c = s.charAt(++i);
    530                 if (c == ESCAPE) {
    531                     array[ai++] = (short) c;
    532                 } else {
    533                     int runLength = c;
    534                     short runValue = (short) s.charAt(++i);
    535                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
    536                 }
    537             }
    538             else {
    539                 array[ai++] = (short) c;
    540             }
    541         }
    542 
    543         if (ai != length)
    544             throw new IllegalStateException("Bad run-length encoded short array");
    545 
    546         return array;
    547     }
    548 
    549     /**
    550      * Construct an array of shorts from a run-length encoded string.
    551      */
    552     static public final char[] RLEStringToCharArray(String s) {
    553         int length = ((s.charAt(0)) << 16) | (s.charAt(1));
    554         char[] array = new char[length];
    555         int ai = 0;
    556         for (int i=2; i<s.length(); ++i) {
    557             char c = s.charAt(i);
    558             if (c == ESCAPE) {
    559                 c = s.charAt(++i);
    560                 if (c == ESCAPE) {
    561                     array[ai++] = c;
    562                 } else {
    563                     int runLength = c;
    564                     char runValue = s.charAt(++i);
    565                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
    566                 }
    567             }
    568             else {
    569                 array[ai++] = c;
    570             }
    571         }
    572 
    573         if (ai != length)
    574             throw new IllegalStateException("Bad run-length encoded short array");
    575 
    576         return array;
    577     }
    578 
    579     /**
    580      * Construct an array of bytes from a run-length encoded string.
    581      */
    582     static public final byte[] RLEStringToByteArray(String s) {
    583         int length = ((s.charAt(0)) << 16) | (s.charAt(1));
    584         byte[] array = new byte[length];
    585         boolean nextChar = true;
    586         char c = 0;
    587         int node = 0;
    588         int runLength = 0;
    589         int i = 2;
    590         for (int ai=0; ai<length; ) {
    591             // This part of the loop places the next byte into the local
    592             // variable 'b' each time through the loop.  It keeps the
    593             // current character in 'c' and uses the boolean 'nextChar'
    594             // to see if we've taken both bytes out of 'c' yet.
    595             byte b;
    596             if (nextChar) {
    597                 c = s.charAt(i++);
    598                 b = (byte) (c >> 8);
    599                 nextChar = false;
    600             }
    601             else {
    602                 b = (byte) (c & 0xFF);
    603                 nextChar = true;
    604             }
    605 
    606             // This part of the loop is a tiny state machine which handles
    607             // the parsing of the run-length encoding.  This would be simpler
    608             // if we could look ahead, but we can't, so we use 'node' to
    609             // move between three nodes in the state machine.
    610             switch (node) {
    611             case 0:
    612                 // Normal idle node
    613                 if (b == ESCAPE_BYTE) {
    614                     node = 1;
    615                 }
    616                 else {
    617                     array[ai++] = b;
    618                 }
    619                 break;
    620             case 1:
    621                 // We have seen one ESCAPE_BYTE; we expect either a second
    622                 // one, or a run length and value.
    623                 if (b == ESCAPE_BYTE) {
    624                     array[ai++] = ESCAPE_BYTE;
    625                     node = 0;
    626                 }
    627                 else {
    628                     runLength = b;
    629                     // Interpret signed byte as unsigned
    630                     if (runLength < 0) runLength += 0x100;
    631                     node = 2;
    632                 }
    633                 break;
    634             case 2:
    635                 // We have seen an ESCAPE_BYTE and length byte.  We interpret
    636                 // the next byte as the value to be repeated.
    637                 for (int j=0; j<runLength; ++j) array[ai++] = b;
    638                 node = 0;
    639                 break;
    640             }
    641         }
    642 
    643         if (node != 0)
    644             throw new IllegalStateException("Bad run-length encoded byte array");
    645 
    646         if (i != s.length())
    647             throw new IllegalStateException("Excess data in RLE byte array string");
    648 
    649         return array;
    650     }
    651 
    652     static public String LINE_SEPARATOR = System.getProperty("line.separator");
    653 
    654     /**
    655      * Format a String for representation in a source file.  This includes
    656      * breaking it into lines and escaping characters using octal notation
    657      * when necessary (control characters and double quotes).
    658      */
    659     static public final String formatForSource(String s) {
    660         StringBuilder buffer = new StringBuilder();
    661         for (int i=0; i<s.length();) {
    662             if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
    663             buffer.append("        \"");
    664             int count = 11;
    665             while (i<s.length() && count<80) {
    666                 char c = s.charAt(i++);
    667                 if (c < '\u0020' || c == '"' || c == '\\') {
    668                     if (c == '\n') {
    669                         buffer.append("\\n");
    670                         count += 2;
    671                     } else if (c == '\t') {
    672                         buffer.append("\\t");
    673                         count += 2;
    674                     } else if (c == '\r') {
    675                         buffer.append("\\r");
    676                         count += 2;
    677                     } else {
    678                         // Represent control characters, backslash and double quote
    679                         // using octal notation; otherwise the string we form
    680                         // won't compile, since Unicode escape sequences are
    681                         // processed before tokenization.
    682                         buffer.append('\\');
    683                         buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
    684                         buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
    685                         buffer.append(HEX_DIGIT[(c & 0007)]);
    686                         count += 4;
    687                     }
    688                 }
    689                 else if (c <= '\u007E') {
    690                     buffer.append(c);
    691                     count += 1;
    692                 }
    693                 else {
    694                     buffer.append("\\u");
    695                     buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
    696                     buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
    697                     buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
    698                     buffer.append(HEX_DIGIT[(c & 0x000F)]);
    699                     count += 6;
    700                 }
    701             }
    702             buffer.append('"');
    703         }
    704         return buffer.toString();
    705     }
    706 
    707     static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
    708         '8','9','A','B','C','D','E','F'};
    709 
    710     /**
    711      * Format a String for representation in a source file.  Like
    712      * formatForSource but does not do line breaking.
    713      */
    714     static public final String format1ForSource(String s) {
    715         StringBuilder buffer = new StringBuilder();
    716         buffer.append("\"");
    717         for (int i=0; i<s.length();) {
    718             char c = s.charAt(i++);
    719             if (c < '\u0020' || c == '"' || c == '\\') {
    720                 if (c == '\n') {
    721                     buffer.append("\\n");
    722                 } else if (c == '\t') {
    723                     buffer.append("\\t");
    724                 } else if (c == '\r') {
    725                     buffer.append("\\r");
    726                 } else {
    727                     // Represent control characters, backslash and double quote
    728                     // using octal notation; otherwise the string we form
    729                     // won't compile, since Unicode escape sequences are
    730                     // processed before tokenization.
    731                     buffer.append('\\');
    732                     buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
    733                     buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
    734                     buffer.append(HEX_DIGIT[(c & 0007)]);
    735                 }
    736             }
    737             else if (c <= '\u007E') {
    738                 buffer.append(c);
    739             }
    740             else {
    741                 buffer.append("\\u");
    742                 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
    743                 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
    744                 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
    745                 buffer.append(HEX_DIGIT[(c & 0x000F)]);
    746             }
    747         }
    748         buffer.append('"');
    749         return buffer.toString();
    750     }
    751 
    752     /**
    753      * Convert characters outside the range U+0020 to U+007F to
    754      * Unicode escapes, and convert backslash to a double backslash.
    755      */
    756     public static final String escape(String s) {
    757         StringBuilder buf = new StringBuilder();
    758         for (int i=0; i<s.length(); ) {
    759             int c = Character.codePointAt(s, i);
    760             i += UTF16.getCharCount(c);
    761             if (c >= ' ' && c <= 0x007F) {
    762                 if (c == '\\') {
    763                     buf.append("\\\\"); // That is, "\\"
    764                 } else {
    765                     buf.append((char)c);
    766                 }
    767             } else {
    768                 boolean four = c <= 0xFFFF;
    769                 buf.append(four ? "\\u" : "\\U");
    770                 buf.append(hex(c, four ? 4 : 8));
    771             }
    772         }
    773         return buf.toString();
    774     }
    775 
    776     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
    777     static private final char[] UNESCAPE_MAP = {
    778         /*"   0x22, 0x22 */
    779         /*'   0x27, 0x27 */
    780         /*?   0x3F, 0x3F */
    781         /*\   0x5C, 0x5C */
    782         /*a*/ 0x61, 0x07,
    783         /*b*/ 0x62, 0x08,
    784         /*e*/ 0x65, 0x1b,
    785         /*f*/ 0x66, 0x0c,
    786         /*n*/ 0x6E, 0x0a,
    787         /*r*/ 0x72, 0x0d,
    788         /*t*/ 0x74, 0x09,
    789         /*v*/ 0x76, 0x0b
    790     };
    791 
    792     /**
    793      * Convert an escape to a 32-bit code point value.  We attempt
    794      * to parallel the icu4c unescapeAt() function.
    795      * @param offset16 an array containing offset to the character
    796      * <em>after</em> the backslash.  Upon return offset16[0] will
    797      * be updated to point after the escape sequence.
    798      * @return character value from 0 to 10FFFF, or -1 on error.
    799      */
    800     public static int unescapeAt(String s, int[] offset16) {
    801         int c;
    802         int result = 0;
    803         int n = 0;
    804         int minDig = 0;
    805         int maxDig = 0;
    806         int bitsPerDigit = 4;
    807         int dig;
    808         int i;
    809         boolean braces = false;
    810 
    811         /* Check that offset is in range */
    812         int offset = offset16[0];
    813         int length = s.length();
    814         if (offset < 0 || offset >= length) {
    815             return -1;
    816         }
    817 
    818         /* Fetch first UChar after '\\' */
    819         c = Character.codePointAt(s, offset);
    820         offset += UTF16.getCharCount(c);
    821 
    822         /* Convert hexadecimal and octal escapes */
    823         switch (c) {
    824         case 'u':
    825             minDig = maxDig = 4;
    826             break;
    827         case 'U':
    828             minDig = maxDig = 8;
    829             break;
    830         case 'x':
    831             minDig = 1;
    832             if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
    833                 ++offset;
    834                 braces = true;
    835                 maxDig = 8;
    836             } else {
    837                 maxDig = 2;
    838             }
    839             break;
    840         default:
    841             dig = UCharacter.digit(c, 8);
    842             if (dig >= 0) {
    843                 minDig = 1;
    844                 maxDig = 3;
    845                 n = 1; /* Already have first octal digit */
    846                 bitsPerDigit = 3;
    847                 result = dig;
    848             }
    849             break;
    850         }
    851         if (minDig != 0) {
    852             while (offset < length && n < maxDig) {
    853                 c = UTF16.charAt(s, offset);
    854                 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
    855                 if (dig < 0) {
    856                     break;
    857                 }
    858                 result = (result << bitsPerDigit) | dig;
    859                 offset += UTF16.getCharCount(c);
    860                 ++n;
    861             }
    862             if (n < minDig) {
    863                 return -1;
    864             }
    865             if (braces) {
    866                 if (c != 0x7D /*}*/) {
    867                     return -1;
    868                 }
    869                 ++offset;
    870             }
    871             if (result < 0 || result >= 0x110000) {
    872                 return -1;
    873             }
    874             // If an escape sequence specifies a lead surrogate, see
    875             // if there is a trail surrogate after it, either as an
    876             // escape or as a literal.  If so, join them up into a
    877             // supplementary.
    878             if (offset < length &&
    879                     UTF16.isLeadSurrogate((char) result)) {
    880                 int ahead = offset+1;
    881                 c = s.charAt(offset); // [sic] get 16-bit code unit
    882                 if (c == '\\' && ahead < length) {
    883                     int o[] = new int[] { ahead };
    884                     c = unescapeAt(s, o);
    885                     ahead = o[0];
    886                 }
    887                 if (UTF16.isTrailSurrogate((char) c)) {
    888                     offset = ahead;
    889                     result = Character.toCodePoint((char) result, (char) c);
    890                 }
    891             }
    892             offset16[0] = offset;
    893             return result;
    894         }
    895 
    896         /* Convert C-style escapes in table */
    897         for (i=0; i<UNESCAPE_MAP.length; i+=2) {
    898             if (c == UNESCAPE_MAP[i]) {
    899                 offset16[0] = offset;
    900                 return UNESCAPE_MAP[i+1];
    901             } else if (c < UNESCAPE_MAP[i]) {
    902                 break;
    903             }
    904         }
    905 
    906         /* Map \cX to control-X: X & 0x1F */
    907         if (c == 'c' && offset < length) {
    908             c = UTF16.charAt(s, offset);
    909             offset16[0] = offset + UTF16.getCharCount(c);
    910             return 0x1F & c;
    911         }
    912 
    913         /* If no special forms are recognized, then consider
    914          * the backslash to generically escape the next character. */
    915         offset16[0] = offset;
    916         return c;
    917     }
    918 
    919     /**
    920      * Convert all escapes in a given string using unescapeAt().
    921      * @exception IllegalArgumentException if an invalid escape is
    922      * seen.
    923      */
    924     public static String unescape(String s) {
    925         StringBuilder buf = new StringBuilder();
    926         int[] pos = new int[1];
    927         for (int i=0; i<s.length(); ) {
    928             char c = s.charAt(i++);
    929             if (c == '\\') {
    930                 pos[0] = i;
    931                 int e = unescapeAt(s, pos);
    932                 if (e < 0) {
    933                     throw new IllegalArgumentException("Invalid escape sequence " +
    934                             s.substring(i-1, Math.min(i+8, s.length())));
    935                 }
    936                 buf.appendCodePoint(e);
    937                 i = pos[0];
    938             } else {
    939                 buf.append(c);
    940             }
    941         }
    942         return buf.toString();
    943     }
    944 
    945     /**
    946      * Convert all escapes in a given string using unescapeAt().
    947      * Leave invalid escape sequences unchanged.
    948      */
    949     public static String unescapeLeniently(String s) {
    950         StringBuilder buf = new StringBuilder();
    951         int[] pos = new int[1];
    952         for (int i=0; i<s.length(); ) {
    953             char c = s.charAt(i++);
    954             if (c == '\\') {
    955                 pos[0] = i;
    956                 int e = unescapeAt(s, pos);
    957                 if (e < 0) {
    958                     buf.append(c);
    959                 } else {
    960                     buf.appendCodePoint(e);
    961                     i = pos[0];
    962                 }
    963             } else {
    964                 buf.append(c);
    965             }
    966         }
    967         return buf.toString();
    968     }
    969 
    970     /**
    971      * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
    972      * "0041".
    973      */
    974     public static String hex(long ch) {
    975         return hex(ch, 4);
    976     }
    977 
    978     /**
    979      * Supplies a zero-padded hex representation of an integer (without 0x)
    980      */
    981     static public String hex(long i, int places) {
    982         if (i == Long.MIN_VALUE) return "-8000000000000000";
    983         boolean negative = i < 0;
    984         if (negative) {
    985             i = -i;
    986         }
    987         String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
    988         if (result.length() < places) {
    989             result = "0000000000000000".substring(result.length(),places) + result;
    990         }
    991         if (negative) {
    992             return '-' + result;
    993         }
    994         return result;
    995     }
    996 
    997     /**
    998      * Convert a string to comma-separated groups of 4 hex uppercase
    999      * digits.  E.g., hex('ab') => "0041,0042".
   1000      */
   1001     public static String hex(CharSequence s) {
   1002         return hex(s, 4, ",", true, new StringBuilder()).toString();
   1003     }
   1004 
   1005     /**
   1006      * Convert a string to separated groups of hex uppercase
   1007      * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
   1008      * to the given Appendable.
   1009      */
   1010     public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
   1011         try {
   1012             if (useCodePoints) {
   1013                 int cp;
   1014                 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
   1015                     cp = Character.codePointAt(s, i);
   1016                     if (i != 0) {
   1017                         result.append(separator);
   1018                     }
   1019                     result.append(hex(cp,width));
   1020                 }
   1021             } else {
   1022                 for (int i = 0; i < s.length(); ++i) {
   1023                     if (i != 0) {
   1024                         result.append(separator);
   1025                     }
   1026                     result.append(hex(s.charAt(i),width));
   1027                 }
   1028             }
   1029             return result;
   1030         } catch (IOException e) {
   1031             throw new IllegalIcuArgumentException(e);
   1032         }
   1033     }
   1034 
   1035     public static String hex(byte[] o, int start, int end, String separator) {
   1036         StringBuilder result = new StringBuilder();
   1037         //int ch;
   1038         for (int i = start; i < end; ++i) {
   1039           if (i != 0) result.append(separator);
   1040           result.append(hex(o[i]));
   1041         }
   1042         return result.toString();
   1043       }
   1044 
   1045     /**
   1046      * Convert a string to comma-separated groups of 4 hex uppercase
   1047      * digits.  E.g., hex('ab') => "0041,0042".
   1048      */
   1049     public static <S extends CharSequence> String hex(S s, int width, S separator) {
   1050         return hex(s, width, separator, true, new StringBuilder()).toString();
   1051     }
   1052 
   1053     /**
   1054      * Split a string into pieces based on the given divider character
   1055      * @param s the string to split
   1056      * @param divider the character on which to split.  Occurrences of
   1057      * this character are not included in the output
   1058      * @param output an array to receive the substrings between
   1059      * instances of divider.  It must be large enough on entry to
   1060      * accomodate all output.  Adjacent instances of the divider
   1061      * character will place empty strings into output.  Before
   1062      * returning, output is padded out with empty strings.
   1063      */
   1064     public static void split(String s, char divider, String[] output) {
   1065         int last = 0;
   1066         int current = 0;
   1067         int i;
   1068         for (i = 0; i < s.length(); ++i) {
   1069             if (s.charAt(i) == divider) {
   1070                 output[current++] = s.substring(last,i);
   1071                 last = i+1;
   1072             }
   1073         }
   1074         output[current++] = s.substring(last,i);
   1075         while (current < output.length) {
   1076             output[current++] = "";
   1077         }
   1078     }
   1079 
   1080     /**
   1081      * Split a string into pieces based on the given divider character
   1082      * @param s the string to split
   1083      * @param divider the character on which to split.  Occurrences of
   1084      * this character are not included in the output
   1085      * @return output an array to receive the substrings between
   1086      * instances of divider. Adjacent instances of the divider
   1087      * character will place empty strings into output.
   1088      */
   1089     public static String[] split(String s, char divider) {
   1090         int last = 0;
   1091         int i;
   1092         ArrayList<String> output = new ArrayList<String>();
   1093         for (i = 0; i < s.length(); ++i) {
   1094             if (s.charAt(i) == divider) {
   1095                 output.add(s.substring(last,i));
   1096                 last = i+1;
   1097             }
   1098         }
   1099         output.add( s.substring(last,i));
   1100         return output.toArray(new String[output.size()]);
   1101     }
   1102 
   1103     /**
   1104      * Look up a given string in a string array.  Returns the index at
   1105      * which the first occurrence of the string was found in the
   1106      * array, or -1 if it was not found.
   1107      * @param source the string to search for
   1108      * @param target the array of zero or more strings in which to
   1109      * look for source
   1110      * @return the index of target at which source first occurs, or -1
   1111      * if not found
   1112      */
   1113     public static int lookup(String source, String[] target) {
   1114         for (int i = 0; i < target.length; ++i) {
   1115             if (source.equals(target[i])) return i;
   1116         }
   1117         return -1;
   1118     }
   1119 
   1120     /**
   1121      * Parse a single non-whitespace character 'ch', optionally
   1122      * preceded by whitespace.
   1123      * @param id the string to be parsed
   1124      * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
   1125      * offset of the first character to be parsed.  On output, pos[0]
   1126      * is the index after the last parsed character.  If the parse
   1127      * fails, pos[0] will be unchanged.
   1128      * @param ch the non-whitespace character to be parsed.
   1129      * @return true if 'ch' is seen preceded by zero or more
   1130      * whitespace characters.
   1131      */
   1132     public static boolean parseChar(String id, int[] pos, char ch) {
   1133         int start = pos[0];
   1134         pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
   1135         if (pos[0] == id.length() ||
   1136                 id.charAt(pos[0]) != ch) {
   1137             pos[0] = start;
   1138             return false;
   1139         }
   1140         ++pos[0];
   1141         return true;
   1142     }
   1143 
   1144     /**
   1145      * Parse a pattern string starting at offset pos.  Keywords are
   1146      * matched case-insensitively.  Spaces may be skipped and may be
   1147      * optional or required.  Integer values may be parsed, and if
   1148      * they are, they will be returned in the given array.  If
   1149      * successful, the offset of the next non-space character is
   1150      * returned.  On failure, -1 is returned.
   1151      * @param pattern must only contain lowercase characters, which
   1152      * will match their uppercase equivalents as well.  A space
   1153      * character matches one or more required spaces.  A '~' character
   1154      * matches zero or more optional spaces.  A '#' character matches
   1155      * an integer and stores it in parsedInts, which the caller must
   1156      * ensure has enough capacity.
   1157      * @param parsedInts array to receive parsed integers.  Caller
   1158      * must ensure that parsedInts.length is >= the number of '#'
   1159      * signs in 'pattern'.
   1160      * @return the position after the last character parsed, or -1 if
   1161      * the parse failed
   1162      */
   1163     @SuppressWarnings("fallthrough")
   1164     public static int parsePattern(String rule, int pos, int limit,
   1165             String pattern, int[] parsedInts) {
   1166         // TODO Update this to handle surrogates
   1167         int[] p = new int[1];
   1168         int intCount = 0; // number of integers parsed
   1169         for (int i=0; i<pattern.length(); ++i) {
   1170             char cpat = pattern.charAt(i);
   1171             char c;
   1172             switch (cpat) {
   1173             case ' ':
   1174                 if (pos >= limit) {
   1175                     return -1;
   1176                 }
   1177                 c = rule.charAt(pos++);
   1178                 if (!PatternProps.isWhiteSpace(c)) {
   1179                     return -1;
   1180                 }
   1181                 // FALL THROUGH to skipWhitespace
   1182             case '~':
   1183                 pos = PatternProps.skipWhiteSpace(rule, pos);
   1184                 break;
   1185             case '#':
   1186                 p[0] = pos;
   1187                 parsedInts[intCount++] = parseInteger(rule, p, limit);
   1188                 if (p[0] == pos) {
   1189                     // Syntax error; failed to parse integer
   1190                     return -1;
   1191                 }
   1192                 pos = p[0];
   1193                 break;
   1194             default:
   1195                 if (pos >= limit) {
   1196                     return -1;
   1197                 }
   1198                 c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
   1199                 if (c != cpat) {
   1200                     return -1;
   1201                 }
   1202                 break;
   1203             }
   1204         }
   1205         return pos;
   1206     }
   1207 
   1208     /**
   1209      * Parse a pattern string within the given Replaceable and a parsing
   1210      * pattern.  Characters are matched literally and case-sensitively
   1211      * except for the following special characters:
   1212      *
   1213      * ~  zero or more Pattern_White_Space chars
   1214      *
   1215      * If end of pattern is reached with all matches along the way,
   1216      * pos is advanced to the first unparsed index and returned.
   1217      * Otherwise -1 is returned.
   1218      * @param pat pattern that controls parsing
   1219      * @param text text to be parsed, starting at index
   1220      * @param index offset to first character to parse
   1221      * @param limit offset after last character to parse
   1222      * @return index after last parsed character, or -1 on parse failure.
   1223      */
   1224     public static int parsePattern(String pat,
   1225             Replaceable text,
   1226             int index,
   1227             int limit) {
   1228         int ipat = 0;
   1229 
   1230         // empty pattern matches immediately
   1231         if (ipat == pat.length()) {
   1232             return index;
   1233         }
   1234 
   1235         int cpat = Character.codePointAt(pat, ipat);
   1236 
   1237         while (index < limit) {
   1238             int c = text.char32At(index);
   1239 
   1240             // parse \s*
   1241             if (cpat == '~') {
   1242                 if (PatternProps.isWhiteSpace(c)) {
   1243                     index += UTF16.getCharCount(c);
   1244                     continue;
   1245                 } else {
   1246                     if (++ipat == pat.length()) {
   1247                         return index; // success; c unparsed
   1248                     }
   1249                     // fall thru; process c again with next cpat
   1250                 }
   1251             }
   1252 
   1253             // parse literal
   1254             else if (c == cpat) {
   1255                 int n = UTF16.getCharCount(c);
   1256                 index += n;
   1257                 ipat += n;
   1258                 if (ipat == pat.length()) {
   1259                     return index; // success; c parsed
   1260                 }
   1261                 // fall thru; get next cpat
   1262             }
   1263 
   1264             // match failure of literal
   1265             else {
   1266                 return -1;
   1267             }
   1268 
   1269             cpat = UTF16.charAt(pat, ipat);
   1270         }
   1271 
   1272         return -1; // text ended before end of pat
   1273     }
   1274 
   1275     /**
   1276      * Parse an integer at pos, either of the form \d+ or of the form
   1277      * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
   1278      * or octal format.
   1279      * @param pos INPUT-OUTPUT parameter.  On input, the first
   1280      * character to parse.  On output, the character after the last
   1281      * parsed character.
   1282      */
   1283     public static int parseInteger(String rule, int[] pos, int limit) {
   1284         int count = 0;
   1285         int value = 0;
   1286         int p = pos[0];
   1287         int radix = 10;
   1288 
   1289         if (rule.regionMatches(true, p, "0x", 0, 2)) {
   1290             p += 2;
   1291             radix = 16;
   1292         } else if (p < limit && rule.charAt(p) == '0') {
   1293             p++;
   1294             count = 1;
   1295             radix = 8;
   1296         }
   1297 
   1298         while (p < limit) {
   1299             int d = UCharacter.digit(rule.charAt(p++), radix);
   1300             if (d < 0) {
   1301                 --p;
   1302                 break;
   1303             }
   1304             ++count;
   1305             int v = (value * radix) + d;
   1306             if (v <= value) {
   1307                 // If there are too many input digits, at some point
   1308                 // the value will go negative, e.g., if we have seen
   1309                 // "0x8000000" already and there is another '0', when
   1310                 // we parse the next 0 the value will go negative.
   1311                 return 0;
   1312             }
   1313             value = v;
   1314         }
   1315         if (count > 0) {
   1316             pos[0] = p;
   1317         }
   1318         return value;
   1319     }
   1320 
   1321     /**
   1322      * Parse a Unicode identifier from the given string at the given
   1323      * position.  Return the identifier, or null if there is no
   1324      * identifier.
   1325      * @param str the string to parse
   1326      * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
   1327      * first character to examine.  It must be less than str.length(),
   1328      * and it must not point to a whitespace character.  That is, must
   1329      * have pos[0] < str.length().  On
   1330      * OUTPUT, the position after the last parsed character.
   1331      * @return the Unicode identifier, or null if there is no valid
   1332      * identifier at pos[0].
   1333      */
   1334     public static String parseUnicodeIdentifier(String str, int[] pos) {
   1335         // assert(pos[0] < str.length());
   1336         StringBuilder buf = new StringBuilder();
   1337         int p = pos[0];
   1338         while (p < str.length()) {
   1339             int ch = Character.codePointAt(str, p);
   1340             if (buf.length() == 0) {
   1341                 if (UCharacter.isUnicodeIdentifierStart(ch)) {
   1342                     buf.appendCodePoint(ch);
   1343                 } else {
   1344                     return null;
   1345                 }
   1346             } else {
   1347                 if (UCharacter.isUnicodeIdentifierPart(ch)) {
   1348                     buf.appendCodePoint(ch);
   1349                 } else {
   1350                     break;
   1351                 }
   1352             }
   1353             p += UTF16.getCharCount(ch);
   1354         }
   1355         pos[0] = p;
   1356         return buf.toString();
   1357     }
   1358 
   1359     static final char DIGITS[] = {
   1360         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   1361         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
   1362         'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
   1363         'U', 'V', 'W', 'X', 'Y', 'Z'
   1364     };
   1365 
   1366     /**
   1367      * Append the digits of a positive integer to the given
   1368      * <code>Appendable</code> in the given radix. This is
   1369      * done recursively since it is easiest to generate the low-
   1370      * order digit first, but it must be appended last.
   1371      *
   1372      * @param result is the <code>Appendable</code> to append to
   1373      * @param n is the positive integer
   1374      * @param radix is the radix, from 2 to 36 inclusive
   1375      * @param minDigits is the minimum number of digits to append.
   1376      */
   1377     private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
   1378             int radix, int minDigits)
   1379     {
   1380         try {
   1381             int digit = n % radix;
   1382 
   1383             if (n >= radix || minDigits > 1) {
   1384                 recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
   1385             }
   1386             result.append(DIGITS[digit]);
   1387         } catch (IOException e) {
   1388             throw new IllegalIcuArgumentException(e);
   1389         }
   1390     }
   1391 
   1392     /**
   1393      * Append a number to the given Appendable in the given radix.
   1394      * Standard digits '0'-'9' are used and letters 'A'-'Z' for
   1395      * radices 11 through 36.
   1396      * @param result the digits of the number are appended here
   1397      * @param n the number to be converted to digits; may be negative.
   1398      * If negative, a '-' is prepended to the digits.
   1399      * @param radix a radix from 2 to 36 inclusive.
   1400      * @param minDigits the minimum number of digits, not including
   1401      * any '-', to produce.  Values less than 2 have no effect.  One
   1402      * digit is always emitted regardless of this parameter.
   1403      * @return a reference to result
   1404      */
   1405     public static <T extends Appendable> T appendNumber(T result, int n,
   1406             int radix, int minDigits)
   1407     {
   1408         try {
   1409             if (radix < 2 || radix > 36) {
   1410                 throw new IllegalArgumentException("Illegal radix " + radix);
   1411             }
   1412 
   1413 
   1414             int abs = n;
   1415 
   1416             if (n < 0) {
   1417                 abs = -n;
   1418                 result.append("-");
   1419             }
   1420 
   1421             recursiveAppendNumber(result, abs, radix, minDigits);
   1422 
   1423             return result;
   1424         } catch (IOException e) {
   1425             throw new IllegalIcuArgumentException(e);
   1426         }
   1427 
   1428     }
   1429 
   1430     /**
   1431      * Parse an unsigned 31-bit integer at the given offset.  Use
   1432      * UCharacter.digit() to parse individual characters into digits.
   1433      * @param text the text to be parsed
   1434      * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
   1435      * offset within text at which to start parsing; it should point
   1436      * to a valid digit.  On exit, pos[0] is the offset after the last
   1437      * parsed character.  If the parse failed, it will be unchanged on
   1438      * exit.  Must be >= 0 on entry.
   1439      * @param radix the radix in which to parse; must be >= 2 and <=
   1440      * 36.
   1441      * @return a non-negative parsed number, or -1 upon parse failure.
   1442      * Parse fails if there are no digits, that is, if pos[0] does not
   1443      * point to a valid digit on entry, or if the number to be parsed
   1444      * does not fit into a 31-bit unsigned integer.
   1445      */
   1446     public static int parseNumber(String text, int[] pos, int radix) {
   1447         // assert(pos[0] >= 0);
   1448         // assert(radix >= 2);
   1449         // assert(radix <= 36);
   1450         int n = 0;
   1451         int p = pos[0];
   1452         while (p < text.length()) {
   1453             int ch = Character.codePointAt(text, p);
   1454             int d = UCharacter.digit(ch, radix);
   1455             if (d < 0) {
   1456                 break;
   1457             }
   1458             n = radix*n + d;
   1459             // ASSUME that when a 32-bit integer overflows it becomes
   1460             // negative.  E.g., 214748364 * 10 + 8 => negative value.
   1461             if (n < 0) {
   1462                 return -1;
   1463             }
   1464             ++p;
   1465         }
   1466         if (p == pos[0]) {
   1467             return -1;
   1468         }
   1469         pos[0] = p;
   1470         return n;
   1471     }
   1472 
   1473     /**
   1474      * Return true if the character is NOT printable ASCII.  The tab,
   1475      * newline and linefeed characters are considered unprintable.
   1476      */
   1477     public static boolean isUnprintable(int c) {
   1478         //0x20 = 32 and 0x7E = 126
   1479         return !(c >= 0x20 && c <= 0x7E);
   1480     }
   1481 
   1482     /**
   1483      * Escape unprintable characters using <backslash>uxxxx notation
   1484      * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
   1485      * above.  If the character is printable ASCII, then do nothing
   1486      * and return FALSE.  Otherwise, append the escaped notation and
   1487      * return TRUE.
   1488      */
   1489     public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
   1490         try {
   1491             if (isUnprintable(c)) {
   1492                 result.append('\\');
   1493                 if ((c & ~0xFFFF) != 0) {
   1494                     result.append('U');
   1495                     result.append(DIGITS[0xF&(c>>28)]);
   1496                     result.append(DIGITS[0xF&(c>>24)]);
   1497                     result.append(DIGITS[0xF&(c>>20)]);
   1498                     result.append(DIGITS[0xF&(c>>16)]);
   1499                 } else {
   1500                     result.append('u');
   1501                 }
   1502                 result.append(DIGITS[0xF&(c>>12)]);
   1503                 result.append(DIGITS[0xF&(c>>8)]);
   1504                 result.append(DIGITS[0xF&(c>>4)]);
   1505                 result.append(DIGITS[0xF&c]);
   1506                 return true;
   1507             }
   1508             return false;
   1509         } catch (IOException e) {
   1510             throw new IllegalIcuArgumentException(e);
   1511         }
   1512     }
   1513 
   1514     /**
   1515      * Returns the index of the first character in a set, ignoring quoted text.
   1516      * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
   1517      * found by a search for "h".  Unlike String.indexOf(), this method searches
   1518      * not for a single character, but for any character of the string
   1519      * <code>setOfChars</code>.
   1520      * @param text text to be searched
   1521      * @param start the beginning index, inclusive; <code>0 <= start
   1522      * <= limit</code>.
   1523      * @param limit the ending index, exclusive; <code>start <= limit
   1524      * <= text.length()</code>.
   1525      * @param setOfChars string with one or more distinct characters
   1526      * @return Offset of the first character in <code>setOfChars</code>
   1527      * found, or -1 if not found.
   1528      * @see String#indexOf
   1529      */
   1530     public static int quotedIndexOf(String text, int start, int limit,
   1531             String setOfChars) {
   1532         for (int i=start; i<limit; ++i) {
   1533             char c = text.charAt(i);
   1534             if (c == BACKSLASH) {
   1535                 ++i;
   1536             } else if (c == APOSTROPHE) {
   1537                 while (++i < limit
   1538                         && text.charAt(i) != APOSTROPHE) {}
   1539             } else if (setOfChars.indexOf(c) >= 0) {
   1540                 return i;
   1541             }
   1542         }
   1543         return -1;
   1544     }
   1545 
   1546     /**
   1547      * Append a character to a rule that is being built up.  To flush
   1548      * the quoteBuf to rule, make one final call with isLiteral == true.
   1549      * If there is no final character, pass in (int)-1 as c.
   1550      * @param rule the string to append the character to
   1551      * @param c the character to append, or (int)-1 if none.
   1552      * @param isLiteral if true, then the given character should not be
   1553      * quoted or escaped.  Usually this means it is a syntactic element
   1554      * such as > or $
   1555      * @param escapeUnprintable if true, then unprintable characters
   1556      * should be escaped using escapeUnprintable().  These escapes will
   1557      * appear outside of quotes.
   1558      * @param quoteBuf a buffer which is used to build up quoted
   1559      * substrings.  The caller should initially supply an empty buffer,
   1560      * and thereafter should not modify the buffer.  The buffer should be
   1561      * cleared out by, at the end, calling this method with a literal
   1562      * character (which may be -1).
   1563      */
   1564     public static void appendToRule(StringBuffer rule,
   1565             int c,
   1566             boolean isLiteral,
   1567             boolean escapeUnprintable,
   1568             StringBuffer quoteBuf) {
   1569         // If we are escaping unprintables, then escape them outside
   1570         // quotes.  \\u and \\U are not recognized within quotes.  The same
   1571         // logic applies to literals, but literals are never escaped.
   1572         if (isLiteral ||
   1573                 (escapeUnprintable && Utility.isUnprintable(c))) {
   1574             if (quoteBuf.length() > 0) {
   1575                 // We prefer backslash APOSTROPHE to double APOSTROPHE
   1576                 // (more readable, less similar to ") so if there are
   1577                 // double APOSTROPHEs at the ends, we pull them outside
   1578                 // of the quote.
   1579 
   1580                 // If the first thing in the quoteBuf is APOSTROPHE
   1581                 // (doubled) then pull it out.
   1582                 while (quoteBuf.length() >= 2 &&
   1583                         quoteBuf.charAt(0) == APOSTROPHE &&
   1584                         quoteBuf.charAt(1) == APOSTROPHE) {
   1585                     rule.append(BACKSLASH).append(APOSTROPHE);
   1586                     quoteBuf.delete(0, 2);
   1587                 }
   1588                 // If the last thing in the quoteBuf is APOSTROPHE
   1589                 // (doubled) then remove and count it and add it after.
   1590                 int trailingCount = 0;
   1591                 while (quoteBuf.length() >= 2 &&
   1592                         quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
   1593                         quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
   1594                     quoteBuf.setLength(quoteBuf.length()-2);
   1595                     ++trailingCount;
   1596                 }
   1597                 if (quoteBuf.length() > 0) {
   1598                     rule.append(APOSTROPHE);
   1599                     rule.append(quoteBuf);
   1600                     rule.append(APOSTROPHE);
   1601                     quoteBuf.setLength(0);
   1602                 }
   1603                 while (trailingCount-- > 0) {
   1604                     rule.append(BACKSLASH).append(APOSTROPHE);
   1605                 }
   1606             }
   1607             if (c != -1) {
   1608                 /* Since spaces are ignored during parsing, they are
   1609                  * emitted only for readability.  We emit one here
   1610                  * only if there isn't already one at the end of the
   1611                  * rule.
   1612                  */
   1613                 if (c == ' ') {
   1614                     int len = rule.length();
   1615                     if (len > 0 && rule.charAt(len-1) != ' ') {
   1616                         rule.append(' ');
   1617                     }
   1618                 } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
   1619                     rule.appendCodePoint(c);
   1620                 }
   1621             }
   1622         }
   1623 
   1624         // Escape ' and '\' and don't begin a quote just for them
   1625         else if (quoteBuf.length() == 0 &&
   1626                 (c == APOSTROPHE || c == BACKSLASH)) {
   1627             rule.append(BACKSLASH).append((char)c);
   1628         }
   1629 
   1630         // Specials (printable ascii that isn't [0-9a-zA-Z]) and
   1631         // whitespace need quoting.  Also append stuff to quotes if we are
   1632         // building up a quoted substring already.
   1633         else if (quoteBuf.length() > 0 ||
   1634                 (c >= 0x0021 && c <= 0x007E &&
   1635                         !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
   1636                                 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
   1637                                 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
   1638                                 PatternProps.isWhiteSpace(c)) {
   1639             quoteBuf.appendCodePoint(c);
   1640             // Double ' within a quote
   1641             if (c == APOSTROPHE) {
   1642                 quoteBuf.append((char)c);
   1643             }
   1644         }
   1645 
   1646         // Otherwise just append
   1647         else {
   1648             rule.appendCodePoint(c);
   1649         }
   1650     }
   1651 
   1652     /**
   1653      * Append the given string to the rule.  Calls the single-character
   1654      * version of appendToRule for each character.
   1655      */
   1656     public static void appendToRule(StringBuffer rule,
   1657             String text,
   1658             boolean isLiteral,
   1659             boolean escapeUnprintable,
   1660             StringBuffer quoteBuf) {
   1661         for (int i=0; i<text.length(); ++i) {
   1662             // Okay to process in 16-bit code units here
   1663             appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
   1664         }
   1665     }
   1666 
   1667     /**
   1668      * Given a matcher reference, which may be null, append its
   1669      * pattern as a literal to the given rule.
   1670      */
   1671     public static void appendToRule(StringBuffer rule,
   1672             UnicodeMatcher matcher,
   1673             boolean escapeUnprintable,
   1674             StringBuffer quoteBuf) {
   1675         if (matcher != null) {
   1676             appendToRule(rule, matcher.toPattern(escapeUnprintable),
   1677                     true, escapeUnprintable, quoteBuf);
   1678         }
   1679     }
   1680 
   1681     /**
   1682      * Compares 2 unsigned integers
   1683      * @param source 32 bit unsigned integer
   1684      * @param target 32 bit unsigned integer
   1685      * @return 0 if equals, 1 if source is greater than target and -1
   1686      *         otherwise
   1687      */
   1688     public static final int compareUnsigned(int source, int target)
   1689     {
   1690         source += MAGIC_UNSIGNED;
   1691         target += MAGIC_UNSIGNED;
   1692         if (source < target) {
   1693             return -1;
   1694         }
   1695         else if (source > target) {
   1696             return 1;
   1697         }
   1698         return 0;
   1699     }
   1700 
   1701     /**
   1702      * Find the highest bit in a positive integer. This is done
   1703      * by doing a binary search through the bits.
   1704      *
   1705      * @param n is the integer
   1706      *
   1707      * @return the bit number of the highest bit, with 0 being
   1708      * the low order bit, or -1 if <code>n</code> is not positive
   1709      */
   1710     public static final byte highBit(int n)
   1711     {
   1712         if (n <= 0) {
   1713             return -1;
   1714         }
   1715 
   1716         byte bit = 0;
   1717 
   1718         if (n >= 1 << 16) {
   1719             n >>= 16;
   1720         bit += 16;
   1721         }
   1722 
   1723         if (n >= 1 << 8) {
   1724             n >>= 8;
   1725         bit += 8;
   1726         }
   1727 
   1728         if (n >= 1 << 4) {
   1729             n >>= 4;
   1730         bit += 4;
   1731         }
   1732 
   1733         if (n >= 1 << 2) {
   1734             n >>= 2;
   1735         bit += 2;
   1736         }
   1737 
   1738         if (n >= 1 << 1) {
   1739             n >>= 1;
   1740         bit += 1;
   1741         }
   1742 
   1743         return bit;
   1744     }
   1745     /**
   1746      * Utility method to take a int[] containing codepoints and return
   1747      * a string representation with code units.
   1748      */
   1749     public static String valueOf(int[]source){
   1750         // TODO: Investigate why this method is not on UTF16 class
   1751         StringBuilder result = new StringBuilder(source.length);
   1752         for(int i=0; i<source.length; i++){
   1753             result.appendCodePoint(source[i]);
   1754         }
   1755         return result.toString();
   1756     }
   1757 
   1758 
   1759     /**
   1760      * Utility to duplicate a string count times
   1761      * @param s String to be duplicated.
   1762      * @param count Number of times to duplicate a string.
   1763      */
   1764     public static String repeat(String s, int count) {
   1765         if (count <= 0) return "";
   1766         if (count == 1) return s;
   1767         StringBuilder result = new StringBuilder();
   1768         for (int i = 0; i < count; ++i) {
   1769             result.append(s);
   1770         }
   1771         return result.toString();
   1772     }
   1773 
   1774     public static String[] splitString(String src, String target) {
   1775         return src.split("\\Q" + target + "\\E");
   1776     }
   1777 
   1778     /**
   1779      * Split the string at runs of ascii whitespace characters.
   1780      */
   1781     public static String[] splitWhitespace(String src) {
   1782         return src.split("\\s+");
   1783     }
   1784 
   1785     /**
   1786      * Parse a list of hex numbers and return a string
   1787      * @param string String of hex numbers.
   1788      * @param minLength Minimal length.
   1789      * @param separator Separator.
   1790      * @return A string from hex numbers.
   1791      */
   1792     public static String fromHex(String string, int minLength, String separator) {
   1793         return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
   1794     }
   1795 
   1796     /**
   1797      * Parse a list of hex numbers and return a string
   1798      * @param string String of hex numbers.
   1799      * @param minLength Minimal length.
   1800      * @param separator Separator.
   1801      * @return A string from hex numbers.
   1802      */
   1803     public static String fromHex(String string, int minLength, Pattern separator) {
   1804         StringBuilder buffer = new StringBuilder();
   1805         String[] parts = separator.split(string);
   1806         for (String part : parts) {
   1807             if (part.length() < minLength) {
   1808                 throw new IllegalArgumentException("code point too short: " + part);
   1809             }
   1810             int cp = Integer.parseInt(part, 16);
   1811             buffer.appendCodePoint(cp);
   1812         }
   1813         return buffer.toString();
   1814     }
   1815 
   1816     /**
   1817      * This implementation is equivalent to Java 7+ Objects#equals(Object a, Object b)
   1818      *
   1819      * @param a an object
   1820      * @param b an object to be compared with a for equality
   1821      * @return true if the arguments are equal to each other and false otherwise
   1822      */
   1823     public static boolean equals(Object a, Object b) {
   1824         return (a == b)
   1825                 || (a != null && b != null && a.equals(b));
   1826     }
   1827 
   1828     /**
   1829      * This implementation is equivalent to Java 7+ Objects#hash(Object... values)
   1830      * @param values the values to be hashed
   1831      * @return a hash value of the sequence of input values
   1832      */
   1833     public static int hash(Object... values) {
   1834         return Arrays.hashCode(values);
   1835     }
   1836 
   1837     /**
   1838      * This implementation is equivalent to Java 7+ Objects#hashCode(Object o)
   1839      * @param o an object
   1840      * @return a hash value of a non-null argument and 0 for null argument
   1841      */
   1842     public static int hashCode(Object o) {
   1843         return o == null ? 0 : o.hashCode();
   1844     }
   1845 
   1846     /**
   1847      * This implementation is equivalent to Java 7+ Objects#toString(Object o)
   1848      * @param o an object
   1849      * @return the result of calling toStirng for a non-null argument and "null" for a
   1850      * null argument
   1851      */
   1852     public static String toString(Object o) {
   1853         return o == null ? "null" : o.toString();
   1854     }
   1855 }
   1856