Home | History | Annotate | Download | only in test
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /**
      5 *******************************************************************************
      6 * Copyright (C) 2002-2004, International Business Machines Corporation and    *
      7 * others. All Rights Reserved.                                                *
      8 *******************************************************************************
      9 */
     10 package android.icu.dev.test;
     11 
     12 import android.icu.testsharding.MainTestShard;
     13 
     14 /**
     15  * Utility class for supplementary code point
     16  * support. This one is written purely for updating
     17  * Normalization sample from the unicode.org site.
     18  * If you want the real thing, use UTF16 class
     19  * from ICU4J
     20  * @author Vladimir Weinstein, Markus Scherer
     21  */
     22 @MainTestShard
     23 public class UTF16Util {
     24     static final int suppOffset = (0xd800 << 10) + 0xdc00 - 0x10000;
     25 
     26     /**
     27      * Method nextCodePoint. Returns the next code point
     28      * in a string.
     29      * @param s String in question
     30      * @param i index from which we want a code point
     31      * @return int codepoint at index i
     32      */
     33     public static final int nextCodePoint(String s, int i) {
     34         int ch = s.charAt(i);
     35         if (0xd800 <= ch && ch <= 0xdbff && ++i < s.length()) {
     36             int ch2 = s.charAt(i);
     37             if (0xdc00 <= ch2 && ch2 <= 0xdfff) {
     38                 ch = (ch << 10) + ch2 - suppOffset;
     39             }
     40         }
     41         return ch;
     42     }
     43 
     44     /**
     45      * Method prevCodePoint. Gets the code point preceding
     46      * index i (predecrement).
     47      * @param s String in question
     48      * @param i index in string
     49      * @return int codepoint at index --i
     50      */
     51     public static final int prevCodePoint(String s, int i) {
     52         int ch = s.charAt(--i);
     53         if (0xdc00 <= ch && ch <= 0xdfff && --i >= 0) {
     54             int ch2 = s.charAt(i);
     55             if (0xd800 <= ch2 && ch2 <= 0xdbff) {
     56                 ch = (ch2 << 10) + ch - suppOffset;
     57             }
     58         }
     59         return ch;
     60     }
     61 
     62     /**
     63      * Method nextCodePoint. Returns the next code point
     64      * in a string.
     65      * @param s StringBuffer in question
     66      * @param i index from which we want a code point
     67      * @return int codepoint at index i
     68      */
     69     public static final int nextCodePoint(StringBuffer s, int i) {
     70         int ch = s.charAt(i);
     71         if (0xd800 <= ch && ch <= 0xdbff && ++i < s.length()) {
     72             int ch2 = s.charAt(i);
     73             if (0xdc00 <= ch2 && ch2 <= 0xdfff) {
     74                 ch = (ch << 10) + ch2 - suppOffset;
     75             }
     76         }
     77         return ch;
     78     }
     79 
     80     /**
     81      * Method prevCodePoint. Gets the code point preceding
     82      * index i (predecrement).
     83      * @param s StringBuffer in question
     84      * @param i index in string
     85      * @return int codepoint at index --i
     86      */
     87     public static final int prevCodePoint(StringBuffer s, int i) {
     88         int ch = s.charAt(--i);
     89         if (0xdc00 <= ch && ch <= 0xdfff && --i >= 0) {
     90             int ch2 = s.charAt(i);
     91             if (0xd800 <= ch2 && ch2 <= 0xdbff) {
     92                 ch = (ch2 << 10) + ch - suppOffset;
     93             }
     94         }
     95         return ch;
     96     }
     97 
     98     /**
     99      * Method codePointLength. Returns the length
    100      * in UTF-16 code units of a given code point
    101      * @param c code point in question
    102      * @return int length in UTF-16 code units. Can be 1 or 2
    103      */
    104     public static final int codePointLength(int c) {
    105         return c <= 0xffff ? 1 : 2;
    106     }
    107 
    108     /**
    109      * Method appendCodePoint. Appends a code point
    110      * to a StringBuffer
    111      * @param buffer StringBuffer in question
    112      * @param ch code point to append
    113      */
    114     public static final void appendCodePoint(StringBuffer buffer, int ch) {
    115         if (ch <= 0xffff) {
    116             buffer.append((char)ch);
    117         } else {
    118             buffer.append((char)(0xd7c0 + (ch >> 10)));
    119             buffer.append((char)(0xdc00 + (ch & 0x3ff)));
    120         }
    121     }
    122 
    123     /**
    124      * Method insertCodePoint. Inserts a code point in
    125      * a StringBuffer
    126      * @param buffer StringBuffer in question
    127      * @param i index at which we want code point to be inserted
    128      * @param ch code point to be inserted
    129      */
    130     public static final void insertCodePoint(StringBuffer buffer, int i, int ch) {
    131         if (ch <= 0xffff) {
    132             buffer.insert(i, (char)ch);
    133         } else {
    134             buffer.insert(i, (char)(0xd7c0 + (ch >> 10))).insert(i + 1, (char)(0xdc00 + (ch & 0x3ff)));
    135         }
    136     }
    137 
    138     /**
    139      * Method setCodePointAt. Changes a code point at a
    140      * given index. Can change the length of the string.
    141      * @param buffer StringBuffer in question
    142      * @param i index at which we want to change the contents
    143      * @param ch replacement code point
    144      * @return int difference in resulting StringBuffer length
    145      */
    146     public static final int setCodePointAt(StringBuffer buffer, int i, int ch) {
    147         int cp = nextCodePoint(buffer, i);
    148 
    149         if (ch <= 0xffff && cp <= 0xffff) { // Both BMP
    150             buffer.setCharAt(i, (char)ch);
    151             return 0;
    152         } else if (ch > 0xffff && cp > 0xffff) { // Both supplementary
    153             buffer.setCharAt(i, (char)(0xd7c0 + (ch >> 10)));
    154             buffer.setCharAt(i+1, (char)(0xdc00 + (ch & 0x3ff)));
    155             return 0;
    156         } else if (ch <= 0xffff && cp > 0xffff) { // putting BMP instead of supplementary, buffer shrinks
    157             buffer.setCharAt(i, (char)ch);
    158             buffer.deleteCharAt(i+1);
    159             return -1;
    160         } else { //if (ch > 0xffff && cp <= 0xffff) { // putting supplementary instead of BMP, buffer grows
    161             buffer.setCharAt(i, (char)(0xd7c0 + (ch >> 10)));
    162             buffer.insert(i+1, (char)(0xdc00 + (ch & 0x3ff)));
    163             return 1;
    164         }
    165     }
    166 
    167     /**
    168      * Method countCodePoint. Counts the UTF-32 code points
    169      * in a UTF-16 encoded string.
    170      * @param source String in question.
    171      * @return int number of code points in this string
    172      */
    173     public static final int countCodePoint(String source)
    174     {
    175         int result = 0;
    176         char ch;
    177         boolean hadLeadSurrogate = false;
    178 
    179         for (int i = 0; i < source.length(); ++ i)
    180         {
    181             ch = source.charAt(i);
    182             if (hadLeadSurrogate && 0xdc00 <= ch && ch <= 0xdfff) {
    183                 hadLeadSurrogate = false;           // count valid trail as zero
    184             }
    185             else
    186             {
    187                 hadLeadSurrogate = (0xd800 <= ch && ch <= 0xdbff);
    188                 ++ result;                          // count others as 1
    189             }
    190         }
    191 
    192         return result;
    193     }
    194 
    195     /**
    196      * Method countCodePoint. Counts the UTF-32 code points
    197      * in a UTF-16 encoded string.
    198      * @param source StringBuffer in question.
    199      * @return int number of code points in this string
    200      */
    201     public static final int countCodePoint(StringBuffer source)
    202     {
    203         int result = 0;
    204         char ch;
    205         boolean hadLeadSurrogate = false;
    206 
    207         for (int i = 0; i < source.length(); ++ i)
    208         {
    209             ch = source.charAt(i);
    210             if (hadLeadSurrogate && 0xdc00 <= ch && ch <= 0xdfff) {
    211                 hadLeadSurrogate = false;           // count valid trail as zero
    212             }
    213             else
    214             {
    215                 hadLeadSurrogate = (0xd800 <= ch && ch <= 0xdbff);
    216                 ++ result;                          // count others as 1
    217             }
    218         }
    219 
    220         return result;
    221     }
    222     /**
    223      * The minimum value for Supplementary code points
    224      */
    225     public static final int SUPPLEMENTARY_MIN_VALUE  = 0x10000;
    226     /**
    227      * Determines how many chars this char32 requires.
    228      * If a validity check is required, use <code>
    229      * <a href="../UCharacter.html#isLegal(char)">isLegal()</a></code> on
    230      * char32 before calling.
    231      * @param char32 the input codepoint.
    232      * @return 2 if is in supplementary space, otherwise 1.
    233      */
    234     public static int getCharCount(int char32)
    235     {
    236         if (char32 < SUPPLEMENTARY_MIN_VALUE) {
    237             return 1;
    238         }
    239         return 2;
    240     }
    241     /**
    242      * Lead surrogate maximum value
    243      * @stable ICU 2.1
    244      */
    245     public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
    246     /**
    247      * Lead surrogate minimum value
    248      * @stable ICU 2.1
    249      */
    250     public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
    251 
    252     /**
    253      * Trail surrogate minimum value
    254      * @stable ICU 2.1
    255      */
    256     public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
    257     /**
    258      * Trail surrogate maximum value
    259      * @stable ICU 2.1
    260      */
    261     public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
    262     /**
    263      * Determines whether the code value is a surrogate.
    264      * @param char16 the input character.
    265      * @return true iff the input character is a surrogate.
    266      * @stable ICU 2.1
    267      */
    268     public static boolean isSurrogate(char char16)
    269     {
    270         return LEAD_SURROGATE_MIN_VALUE <= char16 &&
    271             char16 <= TRAIL_SURROGATE_MAX_VALUE;
    272     }
    273 
    274     /**
    275      * Determines whether the character is a trail surrogate.
    276      * @param char16 the input character.
    277      * @return true iff the input character is a trail surrogate.
    278      * @stable ICU 2.1
    279      */
    280     public static boolean isTrailSurrogate(char char16)
    281     {
    282         return (TRAIL_SURROGATE_MIN_VALUE <= char16 &&
    283                 char16 <= TRAIL_SURROGATE_MAX_VALUE);
    284     }
    285 
    286     /**
    287      * Determines whether the character is a lead surrogate.
    288      * @param char16 the input character.
    289      * @return true iff the input character is a lead surrogate
    290      * @stable ICU 2.1
    291      */
    292     public static boolean isLeadSurrogate(char char16)
    293     {
    294         return LEAD_SURROGATE_MIN_VALUE <= char16 &&
    295             char16 <= LEAD_SURROGATE_MAX_VALUE;
    296     }
    297     /**
    298      * Extract a single UTF-32 value from a substring.
    299      * Used when iterating forwards or backwards (with
    300      * <code>UTF16.getCharCount()</code>, as well as random access. If a
    301      * validity check is required, use
    302      * <code><a href="../UCharacter.html#isLegal(char)">UCharacter.isLegal()
    303      * </a></code> on the return value.
    304      * If the char retrieved is part of a surrogate pair, its supplementary
    305      * character will be returned. If a complete supplementary character is
    306      * not found the incomplete character will be returned
    307      * @param source array of UTF-16 chars
    308      * @param start offset to substring in the source array for analyzing
    309      * @param limit offset to substring in the source array for analyzing
    310      * @param offset16 UTF-16 offset relative to start
    311      * @return UTF-32 value for the UTF-32 value that contains the char at
    312      *         offset16. The boundaries of that codepoint are the same as in
    313      *         <code>bounds32()</code>.
    314      * @exception IndexOutOfBoundsException thrown if offset16 is not within
    315      *            the range of start and limit.
    316      * @stable ICU 2.1
    317      */
    318     public static int charAt(char source[], int start, int limit,
    319                              int offset16)
    320     {
    321         offset16 += start;
    322         if (offset16 < start || offset16 >= limit) {
    323             throw new ArrayIndexOutOfBoundsException(offset16);
    324         }
    325 
    326         char single = source[offset16];
    327         if (!isSurrogate(single)) {
    328             return single;
    329         }
    330 
    331         // Convert the UTF-16 surrogate pair if necessary.
    332         // For simplicity in usage, and because the frequency of pairs is
    333         // low, look both directions.
    334         if (single <= LEAD_SURROGATE_MAX_VALUE) {
    335             offset16 ++;
    336             if (offset16 >= limit) {
    337                 return single;
    338             }
    339             char trail = source[offset16];
    340             if (isTrailSurrogate(trail)) {
    341                 return getRawSupplementary(single, trail);
    342             }
    343         }
    344         else { // isTrailSurrogate(single), so
    345             if (offset16 == start) {
    346                 return single;
    347             }
    348             offset16 --;
    349             char lead = source[offset16];
    350             if (isLeadSurrogate(lead))
    351                 return getRawSupplementary(lead, single);
    352         }
    353         return single; // return unmatched surrogate
    354     }
    355     /**
    356      * Shift value for lead surrogate to form a supplementary character.
    357      */
    358     private static final int LEAD_SURROGATE_SHIFT_ = 10;
    359 
    360     /**
    361      * Offset to add to combined surrogate pair to avoid msking.
    362      */
    363     private static final int SURROGATE_OFFSET_ =
    364                            SUPPLEMENTARY_MIN_VALUE -
    365                            (LEAD_SURROGATE_MIN_VALUE <<
    366                            LEAD_SURROGATE_SHIFT_) -
    367                            TRAIL_SURROGATE_MIN_VALUE;
    368 
    369 
    370    /**
    371     * Forms a supplementary code point from the argument character<br>
    372     * Note this is for internal use hence no checks for the validity of the
    373     * surrogate characters are done
    374     * @param lead lead surrogate character
    375     * @param trail trailing surrogate character
    376     * @return code point of the supplementary character
    377     */
    378     public static int getRawSupplementary(char lead, char trail)
    379     {
    380         return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
    381     }
    382 
    383 }
    384