Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 public class CharUtilities {
      4 
      5     /**
      6      * Simple wrapper for CharSequence
      7      *
      8      * @author markdavis
      9      *
     10      */
     11     public static class CharSourceWrapper<T extends CharSequence> implements CharSource {
     12         protected T source;
     13 
     14         public CharSourceWrapper(T source) {
     15             this.source = source;
     16         }
     17 
     18         public boolean hasCharAt(int index) {
     19             return index < source.length();
     20         }
     21 
     22         public char charAt(int index) {
     23             return source.charAt(index);
     24         }
     25 
     26         public int toSourceOffset(int index) {
     27             return index;
     28         }
     29 
     30         public CharSource sublist(int start, int end) {
     31             return new CharSourceWrapper<CharSequence>(source.subSequence(start, end));
     32         }
     33 
     34         public CharSource sublist(int start) {
     35             return new CharSourceWrapper<CharSequence>(source.subSequence(start, source.length()));
     36         }
     37 
     38         public int getKnownLength() {
     39             return source.length();
     40         }
     41 
     42         public CharSequence subSequence(int start, int end) {
     43             return source.subSequence(start, end);
     44         }
     45 
     46         @Override
     47         public String toString() {
     48             return source.toString();
     49         }
     50 
     51         public CharSequence sourceSubSequence(int start, int end) {
     52             return source.subSequence(toSourceOffset(start), toSourceOffset(end));
     53         }
     54 
     55         public int fromSourceOffset(int index) {
     56             return index;
     57         }
     58 
     59         public CharSource setStart(int index) {
     60             return this;
     61         }
     62 
     63         public int getStart() {
     64             return 0;
     65         }
     66     }
     67 
     68     /**
     69      * Return the code point order of two CharSequences.
     70      * If the text has isolated surrogates, they will not sort correctly.
     71      *
     72      * @param text1
     73      * @param text2
     74      * @return
     75      */
     76     public static int compare(CharSource text1, CharSource text2) {
     77         int i1 = 0;
     78         int i2 = 0;
     79 
     80         while (true) {
     81             // handle running out of room
     82             if (!text1.hasCharAt(i1)) {
     83                 if (text2.hasCharAt(i2)) {
     84                     return 0;
     85                 }
     86                 return -1;
     87             } else if (text2.hasCharAt(i2)) {
     88                 return 1;
     89             }
     90             int cp1 = text1.charAt(i1++);
     91             int cp2 = text2.charAt(i2++);
     92             // if they are different, do a fixup
     93 
     94             if (cp1 != cp2) {
     95                 return (cp1 + utf16Fixup[cp1 >> 11]) -
     96                     (cp2 + utf16Fixup[cp2 >> 11]);
     97             }
     98         }
     99     }
    100 
    101     private static final char utf16Fixup[] = {
    102         0, 0, 0, 0, 0, 0, 0, 0,
    103         0, 0, 0, 0, 0, 0, 0, 0,
    104         0, 0, 0, 0, 0, 0, 0, 0,
    105         0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800
    106     };
    107 
    108     /**
    109      * Return the code point order of two CharSequences.
    110      * If the text has isolated surrogates, they will not sort correctly.
    111      *
    112      * @param text1
    113      * @param text2
    114      * @return
    115      */
    116     public static int compare(CharSequence text1, CharSequence text2) {
    117         int i1 = 0;
    118         int i2 = 0;
    119 
    120         while (true) {
    121             // handle running out of room
    122             if (i1 >= text1.length()) {
    123                 if (i2 >= text2.length()) {
    124                     return 0;
    125                 }
    126                 return -1;
    127             } else if (i2 >= text2.length()) {
    128                 return 1;
    129             }
    130             int cp1 = text1.charAt(i1++);
    131             int cp2 = text2.charAt(i2++);
    132             // if they are different, do a fixup
    133 
    134             if (cp1 != cp2) {
    135                 return (cp1 + utf16Fixup[cp1 >> 11]) -
    136                     (cp2 + utf16Fixup[cp2 >> 11]);
    137             }
    138         }
    139     }
    140 
    141 }