1 package org.unicode.cldr.util; 2 3 public class CharUtilities { 4 5 /** 6 * Simple wrapper for CharSequence 7 * 8 * @author markdavis 9 * 10 */ 11 public static class CharSourceWrapper<T extends CharSequence> implements CharSource { 12 protected T source; 13 14 public CharSourceWrapper(T source) { 15 this.source = source; 16 } 17 18 public boolean hasCharAt(int index) { 19 return index < source.length(); 20 } 21 22 public char charAt(int index) { 23 return source.charAt(index); 24 } 25 26 public int toSourceOffset(int index) { 27 return index; 28 } 29 30 public CharSource sublist(int start, int end) { 31 return new CharSourceWrapper<CharSequence>(source.subSequence(start, end)); 32 } 33 34 public CharSource sublist(int start) { 35 return new CharSourceWrapper<CharSequence>(source.subSequence(start, source.length())); 36 } 37 38 public int getKnownLength() { 39 return source.length(); 40 } 41 42 public CharSequence subSequence(int start, int end) { 43 return source.subSequence(start, end); 44 } 45 46 @Override 47 public String toString() { 48 return source.toString(); 49 } 50 51 public CharSequence sourceSubSequence(int start, int end) { 52 return source.subSequence(toSourceOffset(start), toSourceOffset(end)); 53 } 54 55 public int fromSourceOffset(int index) { 56 return index; 57 } 58 59 public CharSource setStart(int index) { 60 return this; 61 } 62 63 public int getStart() { 64 return 0; 65 } 66 } 67 68 /** 69 * Return the code point order of two CharSequences. 70 * If the text has isolated surrogates, they will not sort correctly. 71 * 72 * @param text1 73 * @param text2 74 * @return 75 */ 76 public static int compare(CharSource text1, CharSource text2) { 77 int i1 = 0; 78 int i2 = 0; 79 80 while (true) { 81 // handle running out of room 82 if (!text1.hasCharAt(i1)) { 83 if (text2.hasCharAt(i2)) { 84 return 0; 85 } 86 return -1; 87 } else if (text2.hasCharAt(i2)) { 88 return 1; 89 } 90 int cp1 = text1.charAt(i1++); 91 int cp2 = text2.charAt(i2++); 92 // if they are different, do a fixup 93 94 if (cp1 != cp2) { 95 return (cp1 + utf16Fixup[cp1 >> 11]) - 96 (cp2 + utf16Fixup[cp2 >> 11]); 97 } 98 } 99 } 100 101 private static final char utf16Fixup[] = { 102 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800 106 }; 107 108 /** 109 * Return the code point order of two CharSequences. 110 * If the text has isolated surrogates, they will not sort correctly. 111 * 112 * @param text1 113 * @param text2 114 * @return 115 */ 116 public static int compare(CharSequence text1, CharSequence text2) { 117 int i1 = 0; 118 int i2 = 0; 119 120 while (true) { 121 // handle running out of room 122 if (i1 >= text1.length()) { 123 if (i2 >= text2.length()) { 124 return 0; 125 } 126 return -1; 127 } else if (i2 >= text2.length()) { 128 return 1; 129 } 130 int cp1 = text1.charAt(i1++); 131 int cp2 = text2.charAt(i2++); 132 // if they are different, do a fixup 133 134 if (cp1 != cp2) { 135 return (cp1 + utf16Fixup[cp1 >> 11]) - 136 (cp2 + utf16Fixup[cp2 >> 11]); 137 } 138 } 139 } 140 141 }