Home | History | Annotate | Download | only in lang
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  ********************************************************************************
      5  * Copyright (C) 2010-2014, Google, International Business Machines Corporation *
      6  * and others. All Rights Reserved.                                                 *
      7  ********************************************************************************
      8  */
      9 package com.ibm.icu.lang;
     10 
     11 
     12 /**
     13  * A number of utilities for dealing with CharSequences and related classes.
     14  * For accessing codepoints with a CharSequence, also see
     15  * <ul>
     16  * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li>
     17  * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li>
     18  * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li>
     19  * <li>{@link java.lang.Character#charCount(int)}</li>
     20  * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li>
     21  * <li>{@link java.lang.Character#toChars(int, char[], int)}</li>
     22  * <li>{@link java.lang.Character#toCodePoint(char, char)}</li>
     23  * </ul>
     24  * @author markdavis
     25  * @internal
     26  * @deprecated This API is ICU internal only.
     27  */
     28 @Deprecated
     29 public class CharSequences {
     30     // TODO
     31     // compareTo(a, b);
     32     // compareToIgnoreCase(a, b)
     33     // contentEquals(a, b)
     34     // contentEqualsIgnoreCase(a, b)
     35 
     36     // contains(a, b) => indexOf >= 0
     37     // endsWith(a, b)
     38     // startsWith(a, b)
     39 
     40     // lastIndexOf(a, b, fromIndex)
     41     // indexOf(a, ch, fromIndex)
     42     // lastIndexOf(a, ch, fromIndex);
     43 
     44     // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set.
     45     // add UnicodeSet.split(CharSequence s);
     46 
     47     /**
     48      * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary.
     49      * @internal
     50      * @deprecated This API is ICU internal only.
     51      */
     52     @Deprecated
     53     public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) {
     54         int i = aIndex, j = bIndex;
     55         int alen = a.length();
     56         int blen = b.length();
     57         for (; i < alen && j < blen; ++i, ++j) {
     58             char ca = a.charAt(i);
     59             char cb = b.charAt(j);
     60             if (ca != cb) {
     61                 break;
     62             }
     63         }
     64         // if we failed a match make sure that we didn't match half a character
     65         int result = i - aIndex;
     66         if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) {
     67             --result; // backup
     68         }
     69         return result;
     70     }
     71 
     72     /**
     73      * Count the code point length. Unpaired surrogates count as 1.
     74      * @internal
     75      * @deprecated This API is ICU internal only.
     76      */
     77     @Deprecated
     78     public int codePointLength(CharSequence s) {
     79         return Character.codePointCount(s, 0, s.length());
     80 //        int length = s.length();
     81 //        int result = length;
     82 //        for (int i = 1; i < length; ++i) {
     83 //            char ch = s.charAt(i);
     84 //            if (0xDC00 <= ch && ch <= 0xDFFF) {
     85 //                char ch0 = s.charAt(i-1);
     86 //                if (0xD800 <= ch && ch <= 0xDbFF) {
     87 //                    --result;
     88 //                }
     89 //            }
     90 //        }
     91     }
     92 
     93     /**
     94      * Utility function for comparing codepoint to string without generating new
     95      * string.
     96      *
     97      * @internal
     98      * @deprecated This API is ICU internal only.
     99      */
    100     @Deprecated
    101     public static final boolean equals(int codepoint, CharSequence other) {
    102         if (other == null) {
    103             return false;
    104         }
    105         switch (other.length()) {
    106         case 1: return codepoint == other.charAt(0);
    107         case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0);
    108         default: return false;
    109         }
    110     }
    111 
    112     /**
    113      * @internal
    114      * @deprecated This API is ICU internal only.
    115      */
    116     @Deprecated
    117     public static final boolean equals(CharSequence other, int codepoint) {
    118         return equals(codepoint, other);
    119     }
    120 
    121     /**
    122      * Utility to compare a string to a code point.
    123      * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
    124      * and comparing, but much faster (no object creation).
    125      * Actually, there is one difference; a null compares as less.
    126      * Note that this (=String) order is UTF-16 order -- *not* code point order.
    127      *
    128      * @internal
    129      * @deprecated This API is ICU internal only.
    130      */
    131     @Deprecated
    132     public static int compare(CharSequence string, int codePoint) {
    133         if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) {
    134             throw new IllegalArgumentException();
    135         }
    136         int stringLength = string.length();
    137         if (stringLength == 0) {
    138             return -1;
    139         }
    140         char firstChar = string.charAt(0);
    141         int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
    142 
    143         if (offset < 0) { // BMP codePoint
    144             int result = firstChar - codePoint;
    145             if (result != 0) {
    146                 return result;
    147             }
    148             return stringLength - 1;
    149         }
    150         // non BMP
    151         char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
    152         int result = firstChar - lead;
    153         if (result != 0) {
    154             return result;
    155         }
    156         if (stringLength > 1) {
    157             char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
    158             result = string.charAt(1) - trail;
    159             if (result != 0) {
    160                 return result;
    161             }
    162         }
    163         return stringLength - 2;
    164     }
    165 
    166     /**
    167      * Utility to compare a string to a code point.
    168      * Same results as turning the code point into a string and comparing, but much faster (no object creation).
    169      * Actually, there is one difference; a null compares as less.
    170      * Note that this (=String) order is UTF-16 order -- *not* code point order.
    171      *
    172      * @internal
    173      * @deprecated This API is ICU internal only.
    174      */
    175     @Deprecated
    176     public static int compare(int codepoint, CharSequence a) {
    177         int result = compare(a, codepoint);
    178         return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order.
    179     }
    180 
    181     /**
    182      * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
    183      *
    184      * @internal
    185      * @deprecated This API is ICU internal only.
    186      */
    187     @Deprecated
    188     public static int getSingleCodePoint(CharSequence s) {
    189         int length = s.length();
    190         if (length < 1 || length > 2) {
    191             return Integer.MAX_VALUE;
    192         }
    193         int result = Character.codePointAt(s, 0);
    194         return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE;
    195     }
    196 
    197     /**
    198      * Utility function for comparing objects that may be null
    199      * string.
    200      *
    201      * @internal
    202      * @deprecated This API is ICU internal only.
    203      */
    204     @Deprecated
    205     public static final <T extends Object> boolean equals(T a, T b) {
    206         return a == null ? b == null
    207                 : b == null ? false
    208                         : a.equals(b);
    209     }
    210 
    211     /**
    212      * Utility for comparing the contents of CharSequences
    213      *
    214      * @internal
    215      * @deprecated This API is ICU internal only.
    216      */
    217     @Deprecated
    218     public static int compare(CharSequence a, CharSequence b) {
    219         int alength = a.length();
    220         int blength = b.length();
    221         int min = alength <= blength ? alength : blength;
    222         for (int i = 0; i < min; ++i) {
    223             int diff = a.charAt(i) - b.charAt(i);
    224             if (diff != 0) {
    225                 return diff;
    226             }
    227         }
    228         return alength - blength;
    229     }
    230 
    231     /**
    232      * Utility for comparing the contents of CharSequences
    233      *
    234      * @internal
    235      * @deprecated This API is ICU internal only.
    236      */
    237     @Deprecated
    238     public static boolean equalsChars(CharSequence a, CharSequence b) {
    239         // do length test first for fast path
    240         return a.length() == b.length() && compare(a,b) == 0;
    241     }
    242 
    243     /**
    244      * Are we on a character boundary?
    245      *
    246      * @internal
    247      * @deprecated This API is ICU internal only.
    248      */
    249     @Deprecated
    250     public static boolean onCharacterBoundary(CharSequence s, int i) {
    251         return i <= 0
    252         || i >= s.length()
    253         || !Character.isHighSurrogate(s.charAt(i-1))
    254         || !Character.isLowSurrogate(s.charAt(i));
    255     }
    256 
    257     /**
    258      * Find code point in string.
    259      *
    260      * @internal
    261      * @deprecated This API is ICU internal only.
    262      */
    263     @Deprecated
    264     public static int indexOf(CharSequence s, int codePoint) {
    265         int cp;
    266         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
    267             cp = Character.codePointAt(s, i);
    268             if (cp == codePoint) {
    269                 return i;
    270             }
    271         }
    272         return -1;
    273     }
    274 
    275     /**
    276      * Utility function for simplified, more robust loops, such as:
    277      * <pre>
    278      *   for (int codePoint : CharSequences.codePoints(string)) {
    279      *     doSomethingWith(codePoint);
    280      *   }
    281      * </pre>
    282      *
    283      * @internal
    284      * @deprecated This API is ICU internal only.
    285      */
    286     @Deprecated
    287     public static int[] codePoints(CharSequence s) {
    288         int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same
    289         int j = 0;
    290         for (int i = 0; i < s.length(); ++i) {
    291             char cp = s.charAt(i);
    292             if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed
    293                 char last = (char) result[j-1];
    294                 if (last >= 0xD800 && last <= 0xDBFF) {
    295                     // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block.
    296                     result[j-1] = Character.toCodePoint(last, cp);
    297                     continue;
    298                 }
    299             }
    300             result[j++] = cp;
    301         }
    302         if (j == result.length) {
    303             return result;
    304         }
    305         int[] shortResult = new int[j];
    306         System.arraycopy(result, 0, shortResult, 0, j);
    307         return shortResult;
    308     }
    309 
    310     private CharSequences() {
    311     }
    312 }
    313