Home | History | Annotate | Download | only in impl
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 1996-2015, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.impl;
      8 
      9 import java.io.IOException;
     10 import java.util.ArrayList;
     11 import java.util.Locale;
     12 import java.util.regex.Pattern;
     13 
     14 import com.ibm.icu.lang.UCharacter;
     15 import com.ibm.icu.text.Replaceable;
     16 import com.ibm.icu.text.UTF16;
     17 import com.ibm.icu.text.UnicodeMatcher;
     18 
     19 public final class Utility {
     20 
     21     private static final char APOSTROPHE = '\'';
     22     private static final char BACKSLASH  = '\\';
     23     private static final int MAGIC_UNSIGNED = 0x80000000;
     24 
     25     /**
     26      * Convenience utility to compare two Object[]s.
     27      * Ought to be in System
     28      */
     29     public final static boolean arrayEquals(Object[] source, Object target) {
     30         if (source == null) return (target == null);
     31         if (!(target instanceof Object[])) return false;
     32         Object[] targ = (Object[]) target;
     33         return (source.length == targ.length
     34                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     35     }
     36 
     37     /**
     38      * Convenience utility to compare two int[]s
     39      * Ought to be in System
     40      */
     41     public final static boolean arrayEquals(int[] source, Object target) {
     42         if (source == null) return (target == null);
     43         if (!(target instanceof int[])) return false;
     44         int[] targ = (int[]) target;
     45         return (source.length == targ.length
     46                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     47     }
     48 
     49     /**
     50      * Convenience utility to compare two double[]s
     51      * Ought to be in System
     52      */
     53     public final static boolean arrayEquals(double[] source, Object target) {
     54         if (source == null) return (target == null);
     55         if (!(target instanceof double[])) return false;
     56         double[] targ = (double[]) target;
     57         return (source.length == targ.length
     58                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     59     }
     60     public final static boolean arrayEquals(byte[] source, Object target) {
     61         if (source == null) return (target == null);
     62         if (!(target instanceof byte[])) return false;
     63         byte[] targ = (byte[]) target;
     64         return (source.length == targ.length
     65                 && arrayRegionMatches(source, 0, targ, 0, source.length));
     66     }
     67 
     68     /**
     69      * Convenience utility to compare two Object[]s
     70      * Ought to be in System
     71      */
     72     public final static boolean arrayEquals(Object source, Object target) {
     73         if (source == null) return (target == null);
     74         // for some reason, the correct arrayEquals is not being called
     75         // so do it by hand for now.
     76         if (source instanceof Object[])
     77             return(arrayEquals((Object[]) source,target));
     78         if (source instanceof int[])
     79             return(arrayEquals((int[]) source,target));
     80         if (source instanceof double[])
     81             return(arrayEquals((double[]) source, target));
     82         if (source instanceof byte[])
     83             return(arrayEquals((byte[]) source,target));
     84         return source.equals(target);
     85     }
     86 
     87     /**
     88      * Convenience utility to compare two Object[]s
     89      * Ought to be in System.
     90      * @param len the length to compare.
     91      * The start indices and start+len must be valid.
     92      */
     93     public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
     94             Object[] target, int targetStart,
     95             int len)
     96     {
     97         int sourceEnd = sourceStart + len;
     98         int delta = targetStart - sourceStart;
     99         for (int i = sourceStart; i < sourceEnd; i++) {
    100             if (!arrayEquals(source[i],target[i + delta]))
    101                 return false;
    102         }
    103         return true;
    104     }
    105 
    106     /**
    107      * Convenience utility to compare two Object[]s
    108      * Ought to be in System.
    109      * @param len the length to compare.
    110      * The start indices and start+len must be valid.
    111      */
    112     public final static boolean arrayRegionMatches(char[] source, int sourceStart,
    113             char[] target, int targetStart,
    114             int len)
    115     {
    116         int sourceEnd = sourceStart + len;
    117         int delta = targetStart - sourceStart;
    118         for (int i = sourceStart; i < sourceEnd; i++) {
    119             if (source[i]!=target[i + delta])
    120                 return false;
    121         }
    122         return true;
    123     }
    124 
    125     /**
    126      * Convenience utility to compare two int[]s.
    127      * @param len the length to compare.
    128      * The start indices and start+len must be valid.
    129      * Ought to be in System
    130      */
    131     public final static boolean arrayRegionMatches(int[] source, int sourceStart,
    132             int[] target, int targetStart,
    133             int len)
    134     {
    135         int sourceEnd = sourceStart + len;
    136         int delta = targetStart - sourceStart;
    137         for (int i = sourceStart; i < sourceEnd; i++) {
    138             if (source[i] != target[i + delta])
    139                 return false;
    140         }
    141         return true;
    142     }
    143 
    144     /**
    145      * Convenience utility to compare two arrays of doubles.
    146      * @param len the length to compare.
    147      * The start indices and start+len must be valid.
    148      * Ought to be in System
    149      */
    150     public final static boolean arrayRegionMatches(double[] source, int sourceStart,
    151             double[] target, int targetStart,
    152             int len)
    153     {
    154         int sourceEnd = sourceStart + len;
    155         int delta = targetStart - sourceStart;
    156         for (int i = sourceStart; i < sourceEnd; i++) {
    157             if (source[i] != target[i + delta])
    158                 return false;
    159         }
    160         return true;
    161     }
    162     public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
    163             byte[] target, int targetStart, int len){
    164         int sourceEnd = sourceStart + len;
    165         int delta = targetStart - sourceStart;
    166         for (int i = sourceStart; i < sourceEnd; i++) {
    167             if (source[i] != target[i + delta])
    168                 return false;
    169         }
    170         return true;
    171     }
    172 
    173     /**
    174      * Convenience utility. Does null checks on objects, then calls equals.
    175      */
    176     public final static boolean objectEquals(Object a, Object b) {
    177         return a == null ?
    178                 b == null ? true : false :
    179                     b == null ? false : a.equals(b);
    180     }
    181 
    182     /**
    183      * Convenience utility. Does null checks on objects, then calls compare.
    184      */
    185     public static <T extends Comparable<T>> int checkCompare(T a, T b) {
    186         return a == null ?
    187                 b == null ? 0 : -1 :
    188                     b == null ? 1 : a.compareTo(b);
    189       }
    190 
    191     /**
    192      * Convenience utility. Does null checks on object, then calls hashCode.
    193      */
    194     public static int checkHash(Object a) {
    195         return a == null ? 0 : a.hashCode();
    196       }
    197 
    198     /**
    199      * The ESCAPE character is used during run-length encoding.  It signals
    200      * a run of identical chars.
    201      */
    202     private static final char ESCAPE = '\uA5A5';
    203 
    204     /**
    205      * The ESCAPE_BYTE character is used during run-length encoding.  It signals
    206      * a run of identical bytes.
    207      */
    208     static final byte ESCAPE_BYTE = (byte)0xA5;
    209 
    210     /**
    211      * Construct a string representing an int array.  Use run-length encoding.
    212      * A character represents itself, unless it is the ESCAPE character.  Then
    213      * the following notations are possible:
    214      *   ESCAPE ESCAPE   ESCAPE literal
    215      *   ESCAPE n c      n instances of character c
    216      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    217      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    218      * If we encounter a run where n == ESCAPE, we represent this as:
    219      *   c ESCAPE n-1 c
    220      * The ESCAPE value is chosen so as not to collide with commonly
    221      * seen values.
    222      */
    223     static public final String arrayToRLEString(int[] a) {
    224         StringBuilder buffer = new StringBuilder();
    225 
    226         appendInt(buffer, a.length);
    227         int runValue = a[0];
    228         int runLength = 1;
    229         for (int i=1; i<a.length; ++i) {
    230             int s = a[i];
    231             if (s == runValue && runLength < 0xFFFF) {
    232                 ++runLength;
    233             } else {
    234                 encodeRun(buffer, runValue, runLength);
    235                 runValue = s;
    236                 runLength = 1;
    237             }
    238         }
    239         encodeRun(buffer, runValue, runLength);
    240         return buffer.toString();
    241     }
    242 
    243     /**
    244      * Construct a string representing a short array.  Use run-length encoding.
    245      * A character represents itself, unless it is the ESCAPE character.  Then
    246      * the following notations are possible:
    247      *   ESCAPE ESCAPE   ESCAPE literal
    248      *   ESCAPE n c      n instances of character c
    249      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    250      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    251      * If we encounter a run where n == ESCAPE, we represent this as:
    252      *   c ESCAPE n-1 c
    253      * The ESCAPE value is chosen so as not to collide with commonly
    254      * seen values.
    255      */
    256     static public final String arrayToRLEString(short[] a) {
    257         StringBuilder buffer = new StringBuilder();
    258         // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
    259         buffer.append((char) (a.length >> 16));
    260         buffer.append((char) a.length);
    261         short runValue = a[0];
    262         int runLength = 1;
    263         for (int i=1; i<a.length; ++i) {
    264             short s = a[i];
    265             if (s == runValue && runLength < 0xFFFF) ++runLength;
    266             else {
    267                 encodeRun(buffer, runValue, runLength);
    268                 runValue = s;
    269                 runLength = 1;
    270             }
    271         }
    272         encodeRun(buffer, runValue, runLength);
    273         return buffer.toString();
    274     }
    275 
    276     /**
    277      * Construct a string representing a char array.  Use run-length encoding.
    278      * A character represents itself, unless it is the ESCAPE character.  Then
    279      * the following notations are possible:
    280      *   ESCAPE ESCAPE   ESCAPE literal
    281      *   ESCAPE n c      n instances of character c
    282      * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    283      * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    284      * If we encounter a run where n == ESCAPE, we represent this as:
    285      *   c ESCAPE n-1 c
    286      * The ESCAPE value is chosen so as not to collide with commonly
    287      * seen values.
    288      */
    289     static public final String arrayToRLEString(char[] a) {
    290         StringBuilder buffer = new StringBuilder();
    291         buffer.append((char) (a.length >> 16));
    292         buffer.append((char) a.length);
    293         char runValue = a[0];
    294         int runLength = 1;
    295         for (int i=1; i<a.length; ++i) {
    296             char s = a[i];
    297             if (s == runValue && runLength < 0xFFFF) ++runLength;
    298             else {
    299                 encodeRun(buffer, (short)runValue, runLength);
    300                 runValue = s;
    301                 runLength = 1;
    302             }
    303         }
    304         encodeRun(buffer, (short)runValue, runLength);
    305         return buffer.toString();
    306     }
    307 
    308     /**
    309      * Construct a string representing a byte array.  Use run-length encoding.
    310      * Two bytes are packed into a single char, with a single extra zero byte at
    311      * the end if needed.  A byte represents itself, unless it is the
    312      * ESCAPE_BYTE.  Then the following notations are possible:
    313      *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
    314      *   ESCAPE_BYTE n b           n instances of byte b
    315      * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
    316      * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
    317      * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
    318      *   b ESCAPE_BYTE n-1 b
    319      * The ESCAPE_BYTE value is chosen so as not to collide with commonly
    320      * seen values.
    321      */
    322     static public final String arrayToRLEString(byte[] a) {
    323         StringBuilder buffer = new StringBuilder();
    324         buffer.append((char) (a.length >> 16));
    325         buffer.append((char) a.length);
    326         byte runValue = a[0];
    327         int runLength = 1;
    328         byte[] state = new byte[2];
    329         for (int i=1; i<a.length; ++i) {
    330             byte b = a[i];
    331             if (b == runValue && runLength < 0xFF) ++runLength;
    332             else {
    333                 encodeRun(buffer, runValue, runLength, state);
    334                 runValue = b;
    335                 runLength = 1;
    336             }
    337         }
    338         encodeRun(buffer, runValue, runLength, state);
    339 
    340         // We must save the final byte, if there is one, by padding
    341         // an extra zero.
    342         if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
    343 
    344         return buffer.toString();
    345     }
    346 
    347     /**
    348      * Encode a run, possibly a degenerate run (of < 4 values).
    349      * @param length The length of the run; must be > 0 && <= 0xFFFF.
    350      */
    351     private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
    352         if (length < 4) {
    353             for (int j=0; j<length; ++j) {
    354                 if (value == ESCAPE) {
    355                     appendInt(buffer, value);
    356                 }
    357                 appendInt(buffer, value);
    358             }
    359         }
    360         else {
    361             if (length == (int) ESCAPE) {
    362                 if (value == (int) ESCAPE) {
    363                     appendInt(buffer, ESCAPE);
    364                 }
    365                 appendInt(buffer, value);
    366                 --length;
    367             }
    368             appendInt(buffer, ESCAPE);
    369             appendInt(buffer, length);
    370             appendInt(buffer, value); // Don't need to escape this value
    371         }
    372     }
    373 
    374     private static final <T extends Appendable> void appendInt(T buffer, int value) {
    375         try {
    376             buffer.append((char)(value >>> 16));
    377             buffer.append((char)(value & 0xFFFF));
    378         } catch (IOException e) {
    379             throw new IllegalIcuArgumentException(e);
    380         }
    381     }
    382 
    383     /**
    384      * Encode a run, possibly a degenerate run (of < 4 values).
    385      * @param length The length of the run; must be > 0 && <= 0xFFFF.
    386      */
    387     private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
    388         try {
    389             if (length < 4) {
    390                 for (int j=0; j<length; ++j) {
    391                     if (value == (int) ESCAPE)
    392                         buffer.append(ESCAPE);
    393                     buffer.append((char) value);
    394                 }
    395             }
    396             else {
    397                 if (length == (int) ESCAPE) {
    398                     if (value == (int) ESCAPE) buffer.append(ESCAPE);
    399                     buffer.append((char) value);
    400                     --length;
    401                 }
    402                 buffer.append(ESCAPE);
    403                 buffer.append((char) length);
    404                 buffer.append((char) value); // Don't need to escape this value
    405             }
    406         } catch (IOException e) {
    407             throw new IllegalIcuArgumentException(e);
    408         }
    409     }
    410 
    411     /**
    412      * Encode a run, possibly a degenerate run (of < 4 values).
    413      * @param length The length of the run; must be > 0 && <= 0xFF.
    414      */
    415     private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
    416             byte[] state) {
    417         if (length < 4) {
    418             for (int j=0; j<length; ++j) {
    419                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
    420                 appendEncodedByte(buffer, value, state);
    421             }
    422         }
    423         else {
    424             if (length == ESCAPE_BYTE) {
    425                 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
    426                 appendEncodedByte(buffer, value, state);
    427                 --length;
    428             }
    429             appendEncodedByte(buffer, ESCAPE_BYTE, state);
    430             appendEncodedByte(buffer, (byte)length, state);
    431             appendEncodedByte(buffer, value, state); // Don't need to escape this value
    432         }
    433     }
    434 
    435     /**
    436      * Append a byte to the given Appendable, packing two bytes into each
    437      * character.  The state parameter maintains intermediary data between
    438      * calls.
    439      * @param state A two-element array, with state[0] == 0 if this is the
    440      * first byte of a pair, or state[0] != 0 if this is the second byte
    441      * of a pair, in which case state[1] is the first byte.
    442      */
    443     private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
    444             byte[] state) {
    445         try {
    446             if (state[0] != 0) {
    447                 char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
    448                 buffer.append(c);
    449                 state[0] = 0;
    450             }
    451             else {
    452                 state[0] = 1;
    453                 state[1] = value;
    454             }
    455         } catch (IOException e) {
    456             throw new IllegalIcuArgumentException(e);
    457         }
    458     }
    459 
    460     /**
    461      * Construct an array of ints from a run-length encoded string.
    462      */
    463     static public final int[] RLEStringToIntArray(String s) {
    464         int length = getInt(s, 0);
    465         int[] array = new int[length];
    466         int ai = 0, i = 1;
    467 
    468         int maxI = s.length() / 2;
    469         while (ai < length && i < maxI) {
    470             int c = getInt(s, i++);
    471 
    472             if (c == ESCAPE) {
    473                 c = getInt(s, i++);
    474                 if (c == ESCAPE) {
    475                     array[ai++] = c;
    476                 } else {
    477                     int runLength = c;
    478                     int runValue = getInt(s, i++);
    479                     for (int j=0; j<runLength; ++j) {
    480                         array[ai++] = runValue;
    481                     }
    482                 }
    483             }
    484             else {
    485                 array[ai++] = c;
    486             }
    487         }
    488 
    489         if (ai != length || i != maxI) {
    490             throw new IllegalStateException("Bad run-length encoded int array");
    491         }
    492 
    493         return array;
    494     }
    495     static final int getInt(String s, int i) {
    496         return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1);
    497     }
    498 
    499     /**
    500      * Construct an array of shorts from a run-length encoded string.
    501      */
    502     static public final short[] RLEStringToShortArray(String s) {
    503         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
    504         short[] array = new short[length];
    505         int ai = 0;
    506         for (int i=2; i<s.length(); ++i) {
    507             char c = s.charAt(i);
    508             if (c == ESCAPE) {
    509                 c = s.charAt(++i);
    510                 if (c == ESCAPE) {
    511                     array[ai++] = (short) c;
    512                 } else {
    513                     int runLength = (int) c;
    514                     short runValue = (short) s.charAt(++i);
    515                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
    516                 }
    517             }
    518             else {
    519                 array[ai++] = (short) c;
    520             }
    521         }
    522 
    523         if (ai != length)
    524             throw new IllegalStateException("Bad run-length encoded short array");
    525 
    526         return array;
    527     }
    528 
    529     /**
    530      * Construct an array of shorts from a run-length encoded string.
    531      */
    532     static public final char[] RLEStringToCharArray(String s) {
    533         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
    534         char[] array = new char[length];
    535         int ai = 0;
    536         for (int i=2; i<s.length(); ++i) {
    537             char c = s.charAt(i);
    538             if (c == ESCAPE) {
    539                 c = s.charAt(++i);
    540                 if (c == ESCAPE) {
    541                     array[ai++] = c;
    542                 } else {
    543                     int runLength = (int) c;
    544                     char runValue = s.charAt(++i);
    545                     for (int j=0; j<runLength; ++j) array[ai++] = runValue;
    546                 }
    547             }
    548             else {
    549                 array[ai++] = c;
    550             }
    551         }
    552 
    553         if (ai != length)
    554             throw new IllegalStateException("Bad run-length encoded short array");
    555 
    556         return array;
    557     }
    558 
    559     /**
    560      * Construct an array of bytes from a run-length encoded string.
    561      */
    562     static public final byte[] RLEStringToByteArray(String s) {
    563         int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
    564         byte[] array = new byte[length];
    565         boolean nextChar = true;
    566         char c = 0;
    567         int node = 0;
    568         int runLength = 0;
    569         int i = 2;
    570         for (int ai=0; ai<length; ) {
    571             // This part of the loop places the next byte into the local
    572             // variable 'b' each time through the loop.  It keeps the
    573             // current character in 'c' and uses the boolean 'nextChar'
    574             // to see if we've taken both bytes out of 'c' yet.
    575             byte b;
    576             if (nextChar) {
    577                 c = s.charAt(i++);
    578                 b = (byte) (c >> 8);
    579                 nextChar = false;
    580             }
    581             else {
    582                 b = (byte) (c & 0xFF);
    583                 nextChar = true;
    584             }
    585 
    586             // This part of the loop is a tiny state machine which handles
    587             // the parsing of the run-length encoding.  This would be simpler
    588             // if we could look ahead, but we can't, so we use 'node' to
    589             // move between three nodes in the state machine.
    590             switch (node) {
    591             case 0:
    592                 // Normal idle node
    593                 if (b == ESCAPE_BYTE) {
    594                     node = 1;
    595                 }
    596                 else {
    597                     array[ai++] = b;
    598                 }
    599                 break;
    600             case 1:
    601                 // We have seen one ESCAPE_BYTE; we expect either a second
    602                 // one, or a run length and value.
    603                 if (b == ESCAPE_BYTE) {
    604                     array[ai++] = ESCAPE_BYTE;
    605                     node = 0;
    606                 }
    607                 else {
    608                     runLength = b;
    609                     // Interpret signed byte as unsigned
    610                     if (runLength < 0) runLength += 0x100;
    611                     node = 2;
    612                 }
    613                 break;
    614             case 2:
    615                 // We have seen an ESCAPE_BYTE and length byte.  We interpret
    616                 // the next byte as the value to be repeated.
    617                 for (int j=0; j<runLength; ++j) array[ai++] = b;
    618                 node = 0;
    619                 break;
    620             }
    621         }
    622 
    623         if (node != 0)
    624             throw new IllegalStateException("Bad run-length encoded byte array");
    625 
    626         if (i != s.length())
    627             throw new IllegalStateException("Excess data in RLE byte array string");
    628 
    629         return array;
    630     }
    631 
    632     static public String LINE_SEPARATOR = System.getProperty("line.separator");
    633 
    634     /**
    635      * Format a String for representation in a source file.  This includes
    636      * breaking it into lines and escaping characters using octal notation
    637      * when necessary (control characters and double quotes).
    638      */
    639     static public final String formatForSource(String s) {
    640         StringBuilder buffer = new StringBuilder();
    641         for (int i=0; i<s.length();) {
    642             if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
    643             buffer.append("        \"");
    644             int count = 11;
    645             while (i<s.length() && count<80) {
    646                 char c = s.charAt(i++);
    647                 if (c < '\u0020' || c == '"' || c == '\\') {
    648                     if (c == '\n') {
    649                         buffer.append("\\n");
    650                         count += 2;
    651                     } else if (c == '\t') {
    652                         buffer.append("\\t");
    653                         count += 2;
    654                     } else if (c == '\r') {
    655                         buffer.append("\\r");
    656                         count += 2;
    657                     } else {
    658                         // Represent control characters, backslash and double quote
    659                         // using octal notation; otherwise the string we form
    660                         // won't compile, since Unicode escape sequences are
    661                         // processed before tokenization.
    662                         buffer.append('\\');
    663                         buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
    664                         buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
    665                         buffer.append(HEX_DIGIT[(c & 0007)]);
    666                         count += 4;
    667                     }
    668                 }
    669                 else if (c <= '\u007E') {
    670                     buffer.append(c);
    671                     count += 1;
    672                 }
    673                 else {
    674                     buffer.append("\\u");
    675                     buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
    676                     buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
    677                     buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
    678                     buffer.append(HEX_DIGIT[(c & 0x000F)]);
    679                     count += 6;
    680                 }
    681             }
    682             buffer.append('"');
    683         }
    684         return buffer.toString();
    685     }
    686 
    687     static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
    688         '8','9','A','B','C','D','E','F'};
    689 
    690     /**
    691      * Format a String for representation in a source file.  Like
    692      * formatForSource but does not do line breaking.
    693      */
    694     static public final String format1ForSource(String s) {
    695         StringBuilder buffer = new StringBuilder();
    696         buffer.append("\"");
    697         for (int i=0; i<s.length();) {
    698             char c = s.charAt(i++);
    699             if (c < '\u0020' || c == '"' || c == '\\') {
    700                 if (c == '\n') {
    701                     buffer.append("\\n");
    702                 } else if (c == '\t') {
    703                     buffer.append("\\t");
    704                 } else if (c == '\r') {
    705                     buffer.append("\\r");
    706                 } else {
    707                     // Represent control characters, backslash and double quote
    708                     // using octal notation; otherwise the string we form
    709                     // won't compile, since Unicode escape sequences are
    710                     // processed before tokenization.
    711                     buffer.append('\\');
    712                     buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
    713                     buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
    714                     buffer.append(HEX_DIGIT[(c & 0007)]);
    715                 }
    716             }
    717             else if (c <= '\u007E') {
    718                 buffer.append(c);
    719             }
    720             else {
    721                 buffer.append("\\u");
    722                 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
    723                 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
    724                 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
    725                 buffer.append(HEX_DIGIT[(c & 0x000F)]);
    726             }
    727         }
    728         buffer.append('"');
    729         return buffer.toString();
    730     }
    731 
    732     /**
    733      * Convert characters outside the range U+0020 to U+007F to
    734      * Unicode escapes, and convert backslash to a double backslash.
    735      */
    736     public static final String escape(String s) {
    737         StringBuilder buf = new StringBuilder();
    738         for (int i=0; i<s.length(); ) {
    739             int c = Character.codePointAt(s, i);
    740             i += UTF16.getCharCount(c);
    741             if (c >= ' ' && c <= 0x007F) {
    742                 if (c == '\\') {
    743                     buf.append("\\\\"); // That is, "\\"
    744                 } else {
    745                     buf.append((char)c);
    746                 }
    747             } else {
    748                 boolean four = c <= 0xFFFF;
    749                 buf.append(four ? "\\u" : "\\U");
    750                 buf.append(hex(c, four ? 4 : 8));
    751             }
    752         }
    753         return buf.toString();
    754     }
    755 
    756     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
    757     static private final char[] UNESCAPE_MAP = {
    758         /*"   0x22, 0x22 */
    759         /*'   0x27, 0x27 */
    760         /*?   0x3F, 0x3F */
    761         /*\   0x5C, 0x5C */
    762         /*a*/ 0x61, 0x07,
    763         /*b*/ 0x62, 0x08,
    764         /*e*/ 0x65, 0x1b,
    765         /*f*/ 0x66, 0x0c,
    766         /*n*/ 0x6E, 0x0a,
    767         /*r*/ 0x72, 0x0d,
    768         /*t*/ 0x74, 0x09,
    769         /*v*/ 0x76, 0x0b
    770     };
    771 
    772     /**
    773      * Convert an escape to a 32-bit code point value.  We attempt
    774      * to parallel the icu4c unescapeAt() function.
    775      * @param offset16 an array containing offset to the character
    776      * <em>after</em> the backslash.  Upon return offset16[0] will
    777      * be updated to point after the escape sequence.
    778      * @return character value from 0 to 10FFFF, or -1 on error.
    779      */
    780     public static int unescapeAt(String s, int[] offset16) {
    781         int c;
    782         int result = 0;
    783         int n = 0;
    784         int minDig = 0;
    785         int maxDig = 0;
    786         int bitsPerDigit = 4;
    787         int dig;
    788         int i;
    789         boolean braces = false;
    790 
    791         /* Check that offset is in range */
    792         int offset = offset16[0];
    793         int length = s.length();
    794         if (offset < 0 || offset >= length) {
    795             return -1;
    796         }
    797 
    798         /* Fetch first UChar after '\\' */
    799         c = Character.codePointAt(s, offset);
    800         offset += UTF16.getCharCount(c);
    801 
    802         /* Convert hexadecimal and octal escapes */
    803         switch (c) {
    804         case 'u':
    805             minDig = maxDig = 4;
    806             break;
    807         case 'U':
    808             minDig = maxDig = 8;
    809             break;
    810         case 'x':
    811             minDig = 1;
    812             if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
    813                 ++offset;
    814                 braces = true;
    815                 maxDig = 8;
    816             } else {
    817                 maxDig = 2;
    818             }
    819             break;
    820         default:
    821             dig = UCharacter.digit(c, 8);
    822             if (dig >= 0) {
    823                 minDig = 1;
    824                 maxDig = 3;
    825                 n = 1; /* Already have first octal digit */
    826                 bitsPerDigit = 3;
    827                 result = dig;
    828             }
    829             break;
    830         }
    831         if (minDig != 0) {
    832             while (offset < length && n < maxDig) {
    833                 c = UTF16.charAt(s, offset);
    834                 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
    835                 if (dig < 0) {
    836                     break;
    837                 }
    838                 result = (result << bitsPerDigit) | dig;
    839                 offset += UTF16.getCharCount(c);
    840                 ++n;
    841             }
    842             if (n < minDig) {
    843                 return -1;
    844             }
    845             if (braces) {
    846                 if (c != 0x7D /*}*/) {
    847                     return -1;
    848                 }
    849                 ++offset;
    850             }
    851             if (result < 0 || result >= 0x110000) {
    852                 return -1;
    853             }
    854             // If an escape sequence specifies a lead surrogate, see
    855             // if there is a trail surrogate after it, either as an
    856             // escape or as a literal.  If so, join them up into a
    857             // supplementary.
    858             if (offset < length &&
    859                     UTF16.isLeadSurrogate((char) result)) {
    860                 int ahead = offset+1;
    861                 c = s.charAt(offset); // [sic] get 16-bit code unit
    862                 if (c == '\\' && ahead < length) {
    863                     int o[] = new int[] { ahead };
    864                     c = unescapeAt(s, o);
    865                     ahead = o[0];
    866                 }
    867                 if (UTF16.isTrailSurrogate((char) c)) {
    868                     offset = ahead;
    869                     result = Character.toCodePoint((char) result, (char) c);
    870                 }
    871             }
    872             offset16[0] = offset;
    873             return result;
    874         }
    875 
    876         /* Convert C-style escapes in table */
    877         for (i=0; i<UNESCAPE_MAP.length; i+=2) {
    878             if (c == UNESCAPE_MAP[i]) {
    879                 offset16[0] = offset;
    880                 return UNESCAPE_MAP[i+1];
    881             } else if (c < UNESCAPE_MAP[i]) {
    882                 break;
    883             }
    884         }
    885 
    886         /* Map \cX to control-X: X & 0x1F */
    887         if (c == 'c' && offset < length) {
    888             c = UTF16.charAt(s, offset);
    889             offset16[0] = offset + UTF16.getCharCount(c);
    890             return 0x1F & c;
    891         }
    892 
    893         /* If no special forms are recognized, then consider
    894          * the backslash to generically escape the next character. */
    895         offset16[0] = offset;
    896         return c;
    897     }
    898 
    899     /**
    900      * Convert all escapes in a given string using unescapeAt().
    901      * @exception IllegalArgumentException if an invalid escape is
    902      * seen.
    903      */
    904     public static String unescape(String s) {
    905         StringBuilder buf = new StringBuilder();
    906         int[] pos = new int[1];
    907         for (int i=0; i<s.length(); ) {
    908             char c = s.charAt(i++);
    909             if (c == '\\') {
    910                 pos[0] = i;
    911                 int e = unescapeAt(s, pos);
    912                 if (e < 0) {
    913                     throw new IllegalArgumentException("Invalid escape sequence " +
    914                             s.substring(i-1, Math.min(i+8, s.length())));
    915                 }
    916                 buf.appendCodePoint(e);
    917                 i = pos[0];
    918             } else {
    919                 buf.append(c);
    920             }
    921         }
    922         return buf.toString();
    923     }
    924 
    925     /**
    926      * Convert all escapes in a given string using unescapeAt().
    927      * Leave invalid escape sequences unchanged.
    928      */
    929     public static String unescapeLeniently(String s) {
    930         StringBuilder buf = new StringBuilder();
    931         int[] pos = new int[1];
    932         for (int i=0; i<s.length(); ) {
    933             char c = s.charAt(i++);
    934             if (c == '\\') {
    935                 pos[0] = i;
    936                 int e = unescapeAt(s, pos);
    937                 if (e < 0) {
    938                     buf.append(c);
    939                 } else {
    940                     buf.appendCodePoint(e);
    941                     i = pos[0];
    942                 }
    943             } else {
    944                 buf.append(c);
    945             }
    946         }
    947         return buf.toString();
    948     }
    949 
    950     /**
    951      * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
    952      * "0041".
    953      */
    954     public static String hex(long ch) {
    955         return hex(ch, 4);
    956     }
    957 
    958     /**
    959      * Supplies a zero-padded hex representation of an integer (without 0x)
    960      */
    961     static public String hex(long i, int places) {
    962         if (i == Long.MIN_VALUE) return "-8000000000000000";
    963         boolean negative = i < 0;
    964         if (negative) {
    965             i = -i;
    966         }
    967         String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
    968         if (result.length() < places) {
    969             result = "0000000000000000".substring(result.length(),places) + result;
    970         }
    971         if (negative) {
    972             return '-' + result;
    973         }
    974         return result;
    975     }
    976 
    977     /**
    978      * Convert a string to comma-separated groups of 4 hex uppercase
    979      * digits.  E.g., hex('ab') => "0041,0042".
    980      */
    981     public static String hex(CharSequence s) {
    982         return hex(s, 4, ",", true, new StringBuilder()).toString();
    983     }
    984 
    985     /**
    986      * Convert a string to separated groups of hex uppercase
    987      * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
    988      * to the given Appendable.
    989      */
    990     public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
    991         try {
    992             if (useCodePoints) {
    993                 int cp;
    994                 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
    995                     cp = Character.codePointAt(s, i);
    996                     if (i != 0) {
    997                         result.append(separator);
    998                     }
    999                     result.append(hex(cp,width));
   1000                 }
   1001             } else {
   1002                 for (int i = 0; i < s.length(); ++i) {
   1003                     if (i != 0) {
   1004                         result.append(separator);
   1005                     }
   1006                     result.append(hex(s.charAt(i),width));
   1007                 }
   1008             }
   1009             return result;
   1010         } catch (IOException e) {
   1011             throw new IllegalIcuArgumentException(e);
   1012         }
   1013     }
   1014 
   1015     public static String hex(byte[] o, int start, int end, String separator) {
   1016         StringBuilder result = new StringBuilder();
   1017         //int ch;
   1018         for (int i = start; i < end; ++i) {
   1019           if (i != 0) result.append(separator);
   1020           result.append(hex(o[i]));
   1021         }
   1022         return result.toString();
   1023       }
   1024 
   1025     /**
   1026      * Convert a string to comma-separated groups of 4 hex uppercase
   1027      * digits.  E.g., hex('ab') => "0041,0042".
   1028      */
   1029     public static <S extends CharSequence> String hex(S s, int width, S separator) {
   1030         return hex(s, width, separator, true, new StringBuilder()).toString();
   1031     }
   1032 
   1033     /**
   1034      * Split a string into pieces based on the given divider character
   1035      * @param s the string to split
   1036      * @param divider the character on which to split.  Occurrences of
   1037      * this character are not included in the output
   1038      * @param output an array to receive the substrings between
   1039      * instances of divider.  It must be large enough on entry to
   1040      * accomodate all output.  Adjacent instances of the divider
   1041      * character will place empty strings into output.  Before
   1042      * returning, output is padded out with empty strings.
   1043      */
   1044     public static void split(String s, char divider, String[] output) {
   1045         int last = 0;
   1046         int current = 0;
   1047         int i;
   1048         for (i = 0; i < s.length(); ++i) {
   1049             if (s.charAt(i) == divider) {
   1050                 output[current++] = s.substring(last,i);
   1051                 last = i+1;
   1052             }
   1053         }
   1054         output[current++] = s.substring(last,i);
   1055         while (current < output.length) {
   1056             output[current++] = "";
   1057         }
   1058     }
   1059 
   1060     /**
   1061      * Split a string into pieces based on the given divider character
   1062      * @param s the string to split
   1063      * @param divider the character on which to split.  Occurrences of
   1064      * this character are not included in the output
   1065      * @return output an array to receive the substrings between
   1066      * instances of divider. Adjacent instances of the divider
   1067      * character will place empty strings into output.
   1068      */
   1069     public static String[] split(String s, char divider) {
   1070         int last = 0;
   1071         int i;
   1072         ArrayList<String> output = new ArrayList<String>();
   1073         for (i = 0; i < s.length(); ++i) {
   1074             if (s.charAt(i) == divider) {
   1075                 output.add(s.substring(last,i));
   1076                 last = i+1;
   1077             }
   1078         }
   1079         output.add( s.substring(last,i));
   1080         return output.toArray(new String[output.size()]);
   1081     }
   1082 
   1083     /**
   1084      * Look up a given string in a string array.  Returns the index at
   1085      * which the first occurrence of the string was found in the
   1086      * array, or -1 if it was not found.
   1087      * @param source the string to search for
   1088      * @param target the array of zero or more strings in which to
   1089      * look for source
   1090      * @return the index of target at which source first occurs, or -1
   1091      * if not found
   1092      */
   1093     public static int lookup(String source, String[] target) {
   1094         for (int i = 0; i < target.length; ++i) {
   1095             if (source.equals(target[i])) return i;
   1096         }
   1097         return -1;
   1098     }
   1099 
   1100     /**
   1101      * Parse a single non-whitespace character 'ch', optionally
   1102      * preceded by whitespace.
   1103      * @param id the string to be parsed
   1104      * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
   1105      * offset of the first character to be parsed.  On output, pos[0]
   1106      * is the index after the last parsed character.  If the parse
   1107      * fails, pos[0] will be unchanged.
   1108      * @param ch the non-whitespace character to be parsed.
   1109      * @return true if 'ch' is seen preceded by zero or more
   1110      * whitespace characters.
   1111      */
   1112     public static boolean parseChar(String id, int[] pos, char ch) {
   1113         int start = pos[0];
   1114         pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
   1115         if (pos[0] == id.length() ||
   1116                 id.charAt(pos[0]) != ch) {
   1117             pos[0] = start;
   1118             return false;
   1119         }
   1120         ++pos[0];
   1121         return true;
   1122     }
   1123 
   1124     /**
   1125      * Parse a pattern string starting at offset pos.  Keywords are
   1126      * matched case-insensitively.  Spaces may be skipped and may be
   1127      * optional or required.  Integer values may be parsed, and if
   1128      * they are, they will be returned in the given array.  If
   1129      * successful, the offset of the next non-space character is
   1130      * returned.  On failure, -1 is returned.
   1131      * @param pattern must only contain lowercase characters, which
   1132      * will match their uppercase equivalents as well.  A space
   1133      * character matches one or more required spaces.  A '~' character
   1134      * matches zero or more optional spaces.  A '#' character matches
   1135      * an integer and stores it in parsedInts, which the caller must
   1136      * ensure has enough capacity.
   1137      * @param parsedInts array to receive parsed integers.  Caller
   1138      * must ensure that parsedInts.length is >= the number of '#'
   1139      * signs in 'pattern'.
   1140      * @return the position after the last character parsed, or -1 if
   1141      * the parse failed
   1142      */
   1143     @SuppressWarnings("fallthrough")
   1144     public static int parsePattern(String rule, int pos, int limit,
   1145             String pattern, int[] parsedInts) {
   1146         // TODO Update this to handle surrogates
   1147         int[] p = new int[1];
   1148         int intCount = 0; // number of integers parsed
   1149         for (int i=0; i<pattern.length(); ++i) {
   1150             char cpat = pattern.charAt(i);
   1151             char c;
   1152             switch (cpat) {
   1153             case ' ':
   1154                 if (pos >= limit) {
   1155                     return -1;
   1156                 }
   1157                 c = rule.charAt(pos++);
   1158                 if (!PatternProps.isWhiteSpace(c)) {
   1159                     return -1;
   1160                 }
   1161                 // FALL THROUGH to skipWhitespace
   1162             case '~':
   1163                 pos = PatternProps.skipWhiteSpace(rule, pos);
   1164                 break;
   1165             case '#':
   1166                 p[0] = pos;
   1167                 parsedInts[intCount++] = parseInteger(rule, p, limit);
   1168                 if (p[0] == pos) {
   1169                     // Syntax error; failed to parse integer
   1170                     return -1;
   1171                 }
   1172                 pos = p[0];
   1173                 break;
   1174             default:
   1175                 if (pos >= limit) {
   1176                     return -1;
   1177                 }
   1178                 c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
   1179                 if (c != cpat) {
   1180                     return -1;
   1181                 }
   1182                 break;
   1183             }
   1184         }
   1185         return pos;
   1186     }
   1187 
   1188     /**
   1189      * Parse a pattern string within the given Replaceable and a parsing
   1190      * pattern.  Characters are matched literally and case-sensitively
   1191      * except for the following special characters:
   1192      *
   1193      * ~  zero or more Pattern_White_Space chars
   1194      *
   1195      * If end of pattern is reached with all matches along the way,
   1196      * pos is advanced to the first unparsed index and returned.
   1197      * Otherwise -1 is returned.
   1198      * @param pat pattern that controls parsing
   1199      * @param text text to be parsed, starting at index
   1200      * @param index offset to first character to parse
   1201      * @param limit offset after last character to parse
   1202      * @return index after last parsed character, or -1 on parse failure.
   1203      */
   1204     public static int parsePattern(String pat,
   1205             Replaceable text,
   1206             int index,
   1207             int limit) {
   1208         int ipat = 0;
   1209 
   1210         // empty pattern matches immediately
   1211         if (ipat == pat.length()) {
   1212             return index;
   1213         }
   1214 
   1215         int cpat = Character.codePointAt(pat, ipat);
   1216 
   1217         while (index < limit) {
   1218             int c = text.char32At(index);
   1219 
   1220             // parse \s*
   1221             if (cpat == '~') {
   1222                 if (PatternProps.isWhiteSpace(c)) {
   1223                     index += UTF16.getCharCount(c);
   1224                     continue;
   1225                 } else {
   1226                     if (++ipat == pat.length()) {
   1227                         return index; // success; c unparsed
   1228                     }
   1229                     // fall thru; process c again with next cpat
   1230                 }
   1231             }
   1232 
   1233             // parse literal
   1234             else if (c == cpat) {
   1235                 int n = UTF16.getCharCount(c);
   1236                 index += n;
   1237                 ipat += n;
   1238                 if (ipat == pat.length()) {
   1239                     return index; // success; c parsed
   1240                 }
   1241                 // fall thru; get next cpat
   1242             }
   1243 
   1244             // match failure of literal
   1245             else {
   1246                 return -1;
   1247             }
   1248 
   1249             cpat = UTF16.charAt(pat, ipat);
   1250         }
   1251 
   1252         return -1; // text ended before end of pat
   1253     }
   1254 
   1255     /**
   1256      * Parse an integer at pos, either of the form \d+ or of the form
   1257      * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
   1258      * or octal format.
   1259      * @param pos INPUT-OUTPUT parameter.  On input, the first
   1260      * character to parse.  On output, the character after the last
   1261      * parsed character.
   1262      */
   1263     public static int parseInteger(String rule, int[] pos, int limit) {
   1264         int count = 0;
   1265         int value = 0;
   1266         int p = pos[0];
   1267         int radix = 10;
   1268 
   1269         if (rule.regionMatches(true, p, "0x", 0, 2)) {
   1270             p += 2;
   1271             radix = 16;
   1272         } else if (p < limit && rule.charAt(p) == '0') {
   1273             p++;
   1274             count = 1;
   1275             radix = 8;
   1276         }
   1277 
   1278         while (p < limit) {
   1279             int d = UCharacter.digit(rule.charAt(p++), radix);
   1280             if (d < 0) {
   1281                 --p;
   1282                 break;
   1283             }
   1284             ++count;
   1285             int v = (value * radix) + d;
   1286             if (v <= value) {
   1287                 // If there are too many input digits, at some point
   1288                 // the value will go negative, e.g., if we have seen
   1289                 // "0x8000000" already and there is another '0', when
   1290                 // we parse the next 0 the value will go negative.
   1291                 return 0;
   1292             }
   1293             value = v;
   1294         }
   1295         if (count > 0) {
   1296             pos[0] = p;
   1297         }
   1298         return value;
   1299     }
   1300 
   1301     /**
   1302      * Parse a Unicode identifier from the given string at the given
   1303      * position.  Return the identifier, or null if there is no
   1304      * identifier.
   1305      * @param str the string to parse
   1306      * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
   1307      * first character to examine.  It must be less than str.length(),
   1308      * and it must not point to a whitespace character.  That is, must
   1309      * have pos[0] < str.length().  On
   1310      * OUTPUT, the position after the last parsed character.
   1311      * @return the Unicode identifier, or null if there is no valid
   1312      * identifier at pos[0].
   1313      */
   1314     public static String parseUnicodeIdentifier(String str, int[] pos) {
   1315         // assert(pos[0] < str.length());
   1316         StringBuilder buf = new StringBuilder();
   1317         int p = pos[0];
   1318         while (p < str.length()) {
   1319             int ch = Character.codePointAt(str, p);
   1320             if (buf.length() == 0) {
   1321                 if (UCharacter.isUnicodeIdentifierStart(ch)) {
   1322                     buf.appendCodePoint(ch);
   1323                 } else {
   1324                     return null;
   1325                 }
   1326             } else {
   1327                 if (UCharacter.isUnicodeIdentifierPart(ch)) {
   1328                     buf.appendCodePoint(ch);
   1329                 } else {
   1330                     break;
   1331                 }
   1332             }
   1333             p += UTF16.getCharCount(ch);
   1334         }
   1335         pos[0] = p;
   1336         return buf.toString();
   1337     }
   1338 
   1339     static final char DIGITS[] = {
   1340         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   1341         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
   1342         'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
   1343         'U', 'V', 'W', 'X', 'Y', 'Z'
   1344     };
   1345 
   1346     /**
   1347      * Append the digits of a positive integer to the given
   1348      * <code>Appendable</code> in the given radix. This is
   1349      * done recursively since it is easiest to generate the low-
   1350      * order digit first, but it must be appended last.
   1351      *
   1352      * @param result is the <code>Appendable</code> to append to
   1353      * @param n is the positive integer
   1354      * @param radix is the radix, from 2 to 36 inclusive
   1355      * @param minDigits is the minimum number of digits to append.
   1356      */
   1357     private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
   1358             int radix, int minDigits)
   1359     {
   1360         try {
   1361             int digit = n % radix;
   1362 
   1363             if (n >= radix || minDigits > 1) {
   1364                 recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
   1365             }
   1366             result.append(DIGITS[digit]);
   1367         } catch (IOException e) {
   1368             throw new IllegalIcuArgumentException(e);
   1369         }
   1370     }
   1371 
   1372     /**
   1373      * Append a number to the given Appendable in the given radix.
   1374      * Standard digits '0'-'9' are used and letters 'A'-'Z' for
   1375      * radices 11 through 36.
   1376      * @param result the digits of the number are appended here
   1377      * @param n the number to be converted to digits; may be negative.
   1378      * If negative, a '-' is prepended to the digits.
   1379      * @param radix a radix from 2 to 36 inclusive.
   1380      * @param minDigits the minimum number of digits, not including
   1381      * any '-', to produce.  Values less than 2 have no effect.  One
   1382      * digit is always emitted regardless of this parameter.
   1383      * @return a reference to result
   1384      */
   1385     public static <T extends Appendable> T appendNumber(T result, int n,
   1386             int radix, int minDigits)
   1387     {
   1388         try {
   1389             if (radix < 2 || radix > 36) {
   1390                 throw new IllegalArgumentException("Illegal radix " + radix);
   1391             }
   1392 
   1393 
   1394             int abs = n;
   1395 
   1396             if (n < 0) {
   1397                 abs = -n;
   1398                 result.append("-");
   1399             }
   1400 
   1401             recursiveAppendNumber(result, abs, radix, minDigits);
   1402 
   1403             return result;
   1404         } catch (IOException e) {
   1405             throw new IllegalIcuArgumentException(e);
   1406         }
   1407 
   1408     }
   1409 
   1410     /**
   1411      * Parse an unsigned 31-bit integer at the given offset.  Use
   1412      * UCharacter.digit() to parse individual characters into digits.
   1413      * @param text the text to be parsed
   1414      * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
   1415      * offset within text at which to start parsing; it should point
   1416      * to a valid digit.  On exit, pos[0] is the offset after the last
   1417      * parsed character.  If the parse failed, it will be unchanged on
   1418      * exit.  Must be >= 0 on entry.
   1419      * @param radix the radix in which to parse; must be >= 2 and <=
   1420      * 36.
   1421      * @return a non-negative parsed number, or -1 upon parse failure.
   1422      * Parse fails if there are no digits, that is, if pos[0] does not
   1423      * point to a valid digit on entry, or if the number to be parsed
   1424      * does not fit into a 31-bit unsigned integer.
   1425      */
   1426     public static int parseNumber(String text, int[] pos, int radix) {
   1427         // assert(pos[0] >= 0);
   1428         // assert(radix >= 2);
   1429         // assert(radix <= 36);
   1430         int n = 0;
   1431         int p = pos[0];
   1432         while (p < text.length()) {
   1433             int ch = Character.codePointAt(text, p);
   1434             int d = UCharacter.digit(ch, radix);
   1435             if (d < 0) {
   1436                 break;
   1437             }
   1438             n = radix*n + d;
   1439             // ASSUME that when a 32-bit integer overflows it becomes
   1440             // negative.  E.g., 214748364 * 10 + 8 => negative value.
   1441             if (n < 0) {
   1442                 return -1;
   1443             }
   1444             ++p;
   1445         }
   1446         if (p == pos[0]) {
   1447             return -1;
   1448         }
   1449         pos[0] = p;
   1450         return n;
   1451     }
   1452 
   1453     /**
   1454      * Return true if the character is NOT printable ASCII.  The tab,
   1455      * newline and linefeed characters are considered unprintable.
   1456      */
   1457     public static boolean isUnprintable(int c) {
   1458         //0x20 = 32 and 0x7E = 126
   1459         return !(c >= 0x20 && c <= 0x7E);
   1460     }
   1461 
   1462     /**
   1463      * Escape unprintable characters using <backslash>uxxxx notation
   1464      * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
   1465      * above.  If the character is printable ASCII, then do nothing
   1466      * and return FALSE.  Otherwise, append the escaped notation and
   1467      * return TRUE.
   1468      */
   1469     public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
   1470         try {
   1471             if (isUnprintable(c)) {
   1472                 result.append('\\');
   1473                 if ((c & ~0xFFFF) != 0) {
   1474                     result.append('U');
   1475                     result.append(DIGITS[0xF&(c>>28)]);
   1476                     result.append(DIGITS[0xF&(c>>24)]);
   1477                     result.append(DIGITS[0xF&(c>>20)]);
   1478                     result.append(DIGITS[0xF&(c>>16)]);
   1479                 } else {
   1480                     result.append('u');
   1481                 }
   1482                 result.append(DIGITS[0xF&(c>>12)]);
   1483                 result.append(DIGITS[0xF&(c>>8)]);
   1484                 result.append(DIGITS[0xF&(c>>4)]);
   1485                 result.append(DIGITS[0xF&c]);
   1486                 return true;
   1487             }
   1488             return false;
   1489         } catch (IOException e) {
   1490             throw new IllegalIcuArgumentException(e);
   1491         }
   1492     }
   1493 
   1494     /**
   1495      * Returns the index of the first character in a set, ignoring quoted text.
   1496      * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
   1497      * found by a search for "h".  Unlike String.indexOf(), this method searches
   1498      * not for a single character, but for any character of the string
   1499      * <code>setOfChars</code>.
   1500      * @param text text to be searched
   1501      * @param start the beginning index, inclusive; <code>0 <= start
   1502      * <= limit</code>.
   1503      * @param limit the ending index, exclusive; <code>start <= limit
   1504      * <= text.length()</code>.
   1505      * @param setOfChars string with one or more distinct characters
   1506      * @return Offset of the first character in <code>setOfChars</code>
   1507      * found, or -1 if not found.
   1508      * @see String#indexOf
   1509      */
   1510     public static int quotedIndexOf(String text, int start, int limit,
   1511             String setOfChars) {
   1512         for (int i=start; i<limit; ++i) {
   1513             char c = text.charAt(i);
   1514             if (c == BACKSLASH) {
   1515                 ++i;
   1516             } else if (c == APOSTROPHE) {
   1517                 while (++i < limit
   1518                         && text.charAt(i) != APOSTROPHE) {}
   1519             } else if (setOfChars.indexOf(c) >= 0) {
   1520                 return i;
   1521             }
   1522         }
   1523         return -1;
   1524     }
   1525 
   1526     /**
   1527      * Append a character to a rule that is being built up.  To flush
   1528      * the quoteBuf to rule, make one final call with isLiteral == true.
   1529      * If there is no final character, pass in (int)-1 as c.
   1530      * @param rule the string to append the character to
   1531      * @param c the character to append, or (int)-1 if none.
   1532      * @param isLiteral if true, then the given character should not be
   1533      * quoted or escaped.  Usually this means it is a syntactic element
   1534      * such as > or $
   1535      * @param escapeUnprintable if true, then unprintable characters
   1536      * should be escaped using escapeUnprintable().  These escapes will
   1537      * appear outside of quotes.
   1538      * @param quoteBuf a buffer which is used to build up quoted
   1539      * substrings.  The caller should initially supply an empty buffer,
   1540      * and thereafter should not modify the buffer.  The buffer should be
   1541      * cleared out by, at the end, calling this method with a literal
   1542      * character (which may be -1).
   1543      */
   1544     public static void appendToRule(StringBuffer rule,
   1545             int c,
   1546             boolean isLiteral,
   1547             boolean escapeUnprintable,
   1548             StringBuffer quoteBuf) {
   1549         // If we are escaping unprintables, then escape them outside
   1550         // quotes.  \\u and \\U are not recognized within quotes.  The same
   1551         // logic applies to literals, but literals are never escaped.
   1552         if (isLiteral ||
   1553                 (escapeUnprintable && Utility.isUnprintable(c))) {
   1554             if (quoteBuf.length() > 0) {
   1555                 // We prefer backslash APOSTROPHE to double APOSTROPHE
   1556                 // (more readable, less similar to ") so if there are
   1557                 // double APOSTROPHEs at the ends, we pull them outside
   1558                 // of the quote.
   1559 
   1560                 // If the first thing in the quoteBuf is APOSTROPHE
   1561                 // (doubled) then pull it out.
   1562                 while (quoteBuf.length() >= 2 &&
   1563                         quoteBuf.charAt(0) == APOSTROPHE &&
   1564                         quoteBuf.charAt(1) == APOSTROPHE) {
   1565                     rule.append(BACKSLASH).append(APOSTROPHE);
   1566                     quoteBuf.delete(0, 2);
   1567                 }
   1568                 // If the last thing in the quoteBuf is APOSTROPHE
   1569                 // (doubled) then remove and count it and add it after.
   1570                 int trailingCount = 0;
   1571                 while (quoteBuf.length() >= 2 &&
   1572                         quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
   1573                         quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
   1574                     quoteBuf.setLength(quoteBuf.length()-2);
   1575                     ++trailingCount;
   1576                 }
   1577                 if (quoteBuf.length() > 0) {
   1578                     rule.append(APOSTROPHE);
   1579                     rule.append(quoteBuf);
   1580                     rule.append(APOSTROPHE);
   1581                     quoteBuf.setLength(0);
   1582                 }
   1583                 while (trailingCount-- > 0) {
   1584                     rule.append(BACKSLASH).append(APOSTROPHE);
   1585                 }
   1586             }
   1587             if (c != -1) {
   1588                 /* Since spaces are ignored during parsing, they are
   1589                  * emitted only for readability.  We emit one here
   1590                  * only if there isn't already one at the end of the
   1591                  * rule.
   1592                  */
   1593                 if (c == ' ') {
   1594                     int len = rule.length();
   1595                     if (len > 0 && rule.charAt(len-1) != ' ') {
   1596                         rule.append(' ');
   1597                     }
   1598                 } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
   1599                     rule.appendCodePoint(c);
   1600                 }
   1601             }
   1602         }
   1603 
   1604         // Escape ' and '\' and don't begin a quote just for them
   1605         else if (quoteBuf.length() == 0 &&
   1606                 (c == APOSTROPHE || c == BACKSLASH)) {
   1607             rule.append(BACKSLASH).append((char)c);
   1608         }
   1609 
   1610         // Specials (printable ascii that isn't [0-9a-zA-Z]) and
   1611         // whitespace need quoting.  Also append stuff to quotes if we are
   1612         // building up a quoted substring already.
   1613         else if (quoteBuf.length() > 0 ||
   1614                 (c >= 0x0021 && c <= 0x007E &&
   1615                         !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
   1616                                 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
   1617                                 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
   1618                                 PatternProps.isWhiteSpace(c)) {
   1619             quoteBuf.appendCodePoint(c);
   1620             // Double ' within a quote
   1621             if (c == APOSTROPHE) {
   1622                 quoteBuf.append((char)c);
   1623             }
   1624         }
   1625 
   1626         // Otherwise just append
   1627         else {
   1628             rule.appendCodePoint(c);
   1629         }
   1630     }
   1631 
   1632     /**
   1633      * Append the given string to the rule.  Calls the single-character
   1634      * version of appendToRule for each character.
   1635      */
   1636     public static void appendToRule(StringBuffer rule,
   1637             String text,
   1638             boolean isLiteral,
   1639             boolean escapeUnprintable,
   1640             StringBuffer quoteBuf) {
   1641         for (int i=0; i<text.length(); ++i) {
   1642             // Okay to process in 16-bit code units here
   1643             appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
   1644         }
   1645     }
   1646 
   1647     /**
   1648      * Given a matcher reference, which may be null, append its
   1649      * pattern as a literal to the given rule.
   1650      */
   1651     public static void appendToRule(StringBuffer rule,
   1652             UnicodeMatcher matcher,
   1653             boolean escapeUnprintable,
   1654             StringBuffer quoteBuf) {
   1655         if (matcher != null) {
   1656             appendToRule(rule, matcher.toPattern(escapeUnprintable),
   1657                     true, escapeUnprintable, quoteBuf);
   1658         }
   1659     }
   1660 
   1661     /**
   1662      * Compares 2 unsigned integers
   1663      * @param source 32 bit unsigned integer
   1664      * @param target 32 bit unsigned integer
   1665      * @return 0 if equals, 1 if source is greater than target and -1
   1666      *         otherwise
   1667      */
   1668     public static final int compareUnsigned(int source, int target)
   1669     {
   1670         source += MAGIC_UNSIGNED;
   1671         target += MAGIC_UNSIGNED;
   1672         if (source < target) {
   1673             return -1;
   1674         }
   1675         else if (source > target) {
   1676             return 1;
   1677         }
   1678         return 0;
   1679     }
   1680 
   1681     /**
   1682      * Find the highest bit in a positive integer. This is done
   1683      * by doing a binary search through the bits.
   1684      *
   1685      * @param n is the integer
   1686      *
   1687      * @return the bit number of the highest bit, with 0 being
   1688      * the low order bit, or -1 if <code>n</code> is not positive
   1689      */
   1690     public static final byte highBit(int n)
   1691     {
   1692         if (n <= 0) {
   1693             return -1;
   1694         }
   1695 
   1696         byte bit = 0;
   1697 
   1698         if (n >= 1 << 16) {
   1699             n >>= 16;
   1700         bit += 16;
   1701         }
   1702 
   1703         if (n >= 1 << 8) {
   1704             n >>= 8;
   1705         bit += 8;
   1706         }
   1707 
   1708         if (n >= 1 << 4) {
   1709             n >>= 4;
   1710         bit += 4;
   1711         }
   1712 
   1713         if (n >= 1 << 2) {
   1714             n >>= 2;
   1715         bit += 2;
   1716         }
   1717 
   1718         if (n >= 1 << 1) {
   1719             n >>= 1;
   1720         bit += 1;
   1721         }
   1722 
   1723         return bit;
   1724     }
   1725     /**
   1726      * Utility method to take a int[] containing codepoints and return
   1727      * a string representation with code units.
   1728      */
   1729     public static String valueOf(int[]source){
   1730         // TODO: Investigate why this method is not on UTF16 class
   1731         StringBuilder result = new StringBuilder(source.length);
   1732         for(int i=0; i<source.length; i++){
   1733             result.appendCodePoint(source[i]);
   1734         }
   1735         return result.toString();
   1736     }
   1737 
   1738 
   1739     /**
   1740      * Utility to duplicate a string count times
   1741      * @param s String to be duplicated.
   1742      * @param count Number of times to duplicate a string.
   1743      */
   1744     public static String repeat(String s, int count) {
   1745         if (count <= 0) return "";
   1746         if (count == 1) return s;
   1747         StringBuilder result = new StringBuilder();
   1748         for (int i = 0; i < count; ++i) {
   1749             result.append(s);
   1750         }
   1751         return result.toString();
   1752     }
   1753 
   1754     public static String[] splitString(String src, String target) {
   1755         return src.split("\\Q" + target + "\\E");
   1756     }
   1757 
   1758     /**
   1759      * Split the string at runs of ascii whitespace characters.
   1760      */
   1761     public static String[] splitWhitespace(String src) {
   1762         return src.split("\\s+");
   1763     }
   1764 
   1765     /**
   1766      * Parse a list of hex numbers and return a string
   1767      * @param string String of hex numbers.
   1768      * @param minLength Minimal length.
   1769      * @param separator Separator.
   1770      * @return A string from hex numbers.
   1771      */
   1772     public static String fromHex(String string, int minLength, String separator) {
   1773         return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
   1774     }
   1775 
   1776     /**
   1777      * Parse a list of hex numbers and return a string
   1778      * @param string String of hex numbers.
   1779      * @param minLength Minimal length.
   1780      * @param separator Separator.
   1781      * @return A string from hex numbers.
   1782      */
   1783     public static String fromHex(String string, int minLength, Pattern separator) {
   1784         StringBuilder buffer = new StringBuilder();
   1785         String[] parts = separator.split(string);
   1786         for (String part : parts) {
   1787             if (part.length() < minLength) {
   1788                 throw new IllegalArgumentException("code point too short: " + part);
   1789             }
   1790             int cp = Integer.parseInt(part, 16);
   1791             buffer.appendCodePoint(cp);
   1792         }
   1793         return buffer.toString();
   1794     }
   1795 }
   1796