1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 2002-2004, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.dev.test; 11 12 import android.icu.testsharding.MainTestShard; 13 14 /** 15 * Utility class for supplementary code point 16 * support. This one is written purely for updating 17 * Normalization sample from the unicode.org site. 18 * If you want the real thing, use UTF16 class 19 * from ICU4J 20 * @author Vladimir Weinstein, Markus Scherer 21 */ 22 @MainTestShard 23 public class UTF16Util { 24 static final int suppOffset = (0xd800 << 10) + 0xdc00 - 0x10000; 25 26 /** 27 * Method nextCodePoint. Returns the next code point 28 * in a string. 29 * @param s String in question 30 * @param i index from which we want a code point 31 * @return int codepoint at index i 32 */ 33 public static final int nextCodePoint(String s, int i) { 34 int ch = s.charAt(i); 35 if (0xd800 <= ch && ch <= 0xdbff && ++i < s.length()) { 36 int ch2 = s.charAt(i); 37 if (0xdc00 <= ch2 && ch2 <= 0xdfff) { 38 ch = (ch << 10) + ch2 - suppOffset; 39 } 40 } 41 return ch; 42 } 43 44 /** 45 * Method prevCodePoint. Gets the code point preceding 46 * index i (predecrement). 47 * @param s String in question 48 * @param i index in string 49 * @return int codepoint at index --i 50 */ 51 public static final int prevCodePoint(String s, int i) { 52 int ch = s.charAt(--i); 53 if (0xdc00 <= ch && ch <= 0xdfff && --i >= 0) { 54 int ch2 = s.charAt(i); 55 if (0xd800 <= ch2 && ch2 <= 0xdbff) { 56 ch = (ch2 << 10) + ch - suppOffset; 57 } 58 } 59 return ch; 60 } 61 62 /** 63 * Method nextCodePoint. Returns the next code point 64 * in a string. 65 * @param s StringBuffer in question 66 * @param i index from which we want a code point 67 * @return int codepoint at index i 68 */ 69 public static final int nextCodePoint(StringBuffer s, int i) { 70 int ch = s.charAt(i); 71 if (0xd800 <= ch && ch <= 0xdbff && ++i < s.length()) { 72 int ch2 = s.charAt(i); 73 if (0xdc00 <= ch2 && ch2 <= 0xdfff) { 74 ch = (ch << 10) + ch2 - suppOffset; 75 } 76 } 77 return ch; 78 } 79 80 /** 81 * Method prevCodePoint. Gets the code point preceding 82 * index i (predecrement). 83 * @param s StringBuffer in question 84 * @param i index in string 85 * @return int codepoint at index --i 86 */ 87 public static final int prevCodePoint(StringBuffer s, int i) { 88 int ch = s.charAt(--i); 89 if (0xdc00 <= ch && ch <= 0xdfff && --i >= 0) { 90 int ch2 = s.charAt(i); 91 if (0xd800 <= ch2 && ch2 <= 0xdbff) { 92 ch = (ch2 << 10) + ch - suppOffset; 93 } 94 } 95 return ch; 96 } 97 98 /** 99 * Method codePointLength. Returns the length 100 * in UTF-16 code units of a given code point 101 * @param c code point in question 102 * @return int length in UTF-16 code units. Can be 1 or 2 103 */ 104 public static final int codePointLength(int c) { 105 return c <= 0xffff ? 1 : 2; 106 } 107 108 /** 109 * Method appendCodePoint. Appends a code point 110 * to a StringBuffer 111 * @param buffer StringBuffer in question 112 * @param ch code point to append 113 */ 114 public static final void appendCodePoint(StringBuffer buffer, int ch) { 115 if (ch <= 0xffff) { 116 buffer.append((char)ch); 117 } else { 118 buffer.append((char)(0xd7c0 + (ch >> 10))); 119 buffer.append((char)(0xdc00 + (ch & 0x3ff))); 120 } 121 } 122 123 /** 124 * Method insertCodePoint. Inserts a code point in 125 * a StringBuffer 126 * @param buffer StringBuffer in question 127 * @param i index at which we want code point to be inserted 128 * @param ch code point to be inserted 129 */ 130 public static final void insertCodePoint(StringBuffer buffer, int i, int ch) { 131 if (ch <= 0xffff) { 132 buffer.insert(i, (char)ch); 133 } else { 134 buffer.insert(i, (char)(0xd7c0 + (ch >> 10))).insert(i + 1, (char)(0xdc00 + (ch & 0x3ff))); 135 } 136 } 137 138 /** 139 * Method setCodePointAt. Changes a code point at a 140 * given index. Can change the length of the string. 141 * @param buffer StringBuffer in question 142 * @param i index at which we want to change the contents 143 * @param ch replacement code point 144 * @return int difference in resulting StringBuffer length 145 */ 146 public static final int setCodePointAt(StringBuffer buffer, int i, int ch) { 147 int cp = nextCodePoint(buffer, i); 148 149 if (ch <= 0xffff && cp <= 0xffff) { // Both BMP 150 buffer.setCharAt(i, (char)ch); 151 return 0; 152 } else if (ch > 0xffff && cp > 0xffff) { // Both supplementary 153 buffer.setCharAt(i, (char)(0xd7c0 + (ch >> 10))); 154 buffer.setCharAt(i+1, (char)(0xdc00 + (ch & 0x3ff))); 155 return 0; 156 } else if (ch <= 0xffff && cp > 0xffff) { // putting BMP instead of supplementary, buffer shrinks 157 buffer.setCharAt(i, (char)ch); 158 buffer.deleteCharAt(i+1); 159 return -1; 160 } else { //if (ch > 0xffff && cp <= 0xffff) { // putting supplementary instead of BMP, buffer grows 161 buffer.setCharAt(i, (char)(0xd7c0 + (ch >> 10))); 162 buffer.insert(i+1, (char)(0xdc00 + (ch & 0x3ff))); 163 return 1; 164 } 165 } 166 167 /** 168 * Method countCodePoint. Counts the UTF-32 code points 169 * in a UTF-16 encoded string. 170 * @param source String in question. 171 * @return int number of code points in this string 172 */ 173 public static final int countCodePoint(String source) 174 { 175 int result = 0; 176 char ch; 177 boolean hadLeadSurrogate = false; 178 179 for (int i = 0; i < source.length(); ++ i) 180 { 181 ch = source.charAt(i); 182 if (hadLeadSurrogate && 0xdc00 <= ch && ch <= 0xdfff) { 183 hadLeadSurrogate = false; // count valid trail as zero 184 } 185 else 186 { 187 hadLeadSurrogate = (0xd800 <= ch && ch <= 0xdbff); 188 ++ result; // count others as 1 189 } 190 } 191 192 return result; 193 } 194 195 /** 196 * Method countCodePoint. Counts the UTF-32 code points 197 * in a UTF-16 encoded string. 198 * @param source StringBuffer in question. 199 * @return int number of code points in this string 200 */ 201 public static final int countCodePoint(StringBuffer source) 202 { 203 int result = 0; 204 char ch; 205 boolean hadLeadSurrogate = false; 206 207 for (int i = 0; i < source.length(); ++ i) 208 { 209 ch = source.charAt(i); 210 if (hadLeadSurrogate && 0xdc00 <= ch && ch <= 0xdfff) { 211 hadLeadSurrogate = false; // count valid trail as zero 212 } 213 else 214 { 215 hadLeadSurrogate = (0xd800 <= ch && ch <= 0xdbff); 216 ++ result; // count others as 1 217 } 218 } 219 220 return result; 221 } 222 /** 223 * The minimum value for Supplementary code points 224 */ 225 public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000; 226 /** 227 * Determines how many chars this char32 requires. 228 * If a validity check is required, use <code> 229 * <a href="../UCharacter.html#isLegal(char)">isLegal()</a></code> on 230 * char32 before calling. 231 * @param char32 the input codepoint. 232 * @return 2 if is in supplementary space, otherwise 1. 233 */ 234 public static int getCharCount(int char32) 235 { 236 if (char32 < SUPPLEMENTARY_MIN_VALUE) { 237 return 1; 238 } 239 return 2; 240 } 241 /** 242 * Lead surrogate maximum value 243 * @stable ICU 2.1 244 */ 245 public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF; 246 /** 247 * Lead surrogate minimum value 248 * @stable ICU 2.1 249 */ 250 public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800; 251 252 /** 253 * Trail surrogate minimum value 254 * @stable ICU 2.1 255 */ 256 public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00; 257 /** 258 * Trail surrogate maximum value 259 * @stable ICU 2.1 260 */ 261 public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF; 262 /** 263 * Determines whether the code value is a surrogate. 264 * @param char16 the input character. 265 * @return true iff the input character is a surrogate. 266 * @stable ICU 2.1 267 */ 268 public static boolean isSurrogate(char char16) 269 { 270 return LEAD_SURROGATE_MIN_VALUE <= char16 && 271 char16 <= TRAIL_SURROGATE_MAX_VALUE; 272 } 273 274 /** 275 * Determines whether the character is a trail surrogate. 276 * @param char16 the input character. 277 * @return true iff the input character is a trail surrogate. 278 * @stable ICU 2.1 279 */ 280 public static boolean isTrailSurrogate(char char16) 281 { 282 return (TRAIL_SURROGATE_MIN_VALUE <= char16 && 283 char16 <= TRAIL_SURROGATE_MAX_VALUE); 284 } 285 286 /** 287 * Determines whether the character is a lead surrogate. 288 * @param char16 the input character. 289 * @return true iff the input character is a lead surrogate 290 * @stable ICU 2.1 291 */ 292 public static boolean isLeadSurrogate(char char16) 293 { 294 return LEAD_SURROGATE_MIN_VALUE <= char16 && 295 char16 <= LEAD_SURROGATE_MAX_VALUE; 296 } 297 /** 298 * Extract a single UTF-32 value from a substring. 299 * Used when iterating forwards or backwards (with 300 * <code>UTF16.getCharCount()</code>, as well as random access. If a 301 * validity check is required, use 302 * <code><a href="../UCharacter.html#isLegal(char)">UCharacter.isLegal() 303 * </a></code> on the return value. 304 * If the char retrieved is part of a surrogate pair, its supplementary 305 * character will be returned. If a complete supplementary character is 306 * not found the incomplete character will be returned 307 * @param source array of UTF-16 chars 308 * @param start offset to substring in the source array for analyzing 309 * @param limit offset to substring in the source array for analyzing 310 * @param offset16 UTF-16 offset relative to start 311 * @return UTF-32 value for the UTF-32 value that contains the char at 312 * offset16. The boundaries of that codepoint are the same as in 313 * <code>bounds32()</code>. 314 * @exception IndexOutOfBoundsException thrown if offset16 is not within 315 * the range of start and limit. 316 * @stable ICU 2.1 317 */ 318 public static int charAt(char source[], int start, int limit, 319 int offset16) 320 { 321 offset16 += start; 322 if (offset16 < start || offset16 >= limit) { 323 throw new ArrayIndexOutOfBoundsException(offset16); 324 } 325 326 char single = source[offset16]; 327 if (!isSurrogate(single)) { 328 return single; 329 } 330 331 // Convert the UTF-16 surrogate pair if necessary. 332 // For simplicity in usage, and because the frequency of pairs is 333 // low, look both directions. 334 if (single <= LEAD_SURROGATE_MAX_VALUE) { 335 offset16 ++; 336 if (offset16 >= limit) { 337 return single; 338 } 339 char trail = source[offset16]; 340 if (isTrailSurrogate(trail)) { 341 return getRawSupplementary(single, trail); 342 } 343 } 344 else { // isTrailSurrogate(single), so 345 if (offset16 == start) { 346 return single; 347 } 348 offset16 --; 349 char lead = source[offset16]; 350 if (isLeadSurrogate(lead)) 351 return getRawSupplementary(lead, single); 352 } 353 return single; // return unmatched surrogate 354 } 355 /** 356 * Shift value for lead surrogate to form a supplementary character. 357 */ 358 private static final int LEAD_SURROGATE_SHIFT_ = 10; 359 360 /** 361 * Offset to add to combined surrogate pair to avoid msking. 362 */ 363 private static final int SURROGATE_OFFSET_ = 364 SUPPLEMENTARY_MIN_VALUE - 365 (LEAD_SURROGATE_MIN_VALUE << 366 LEAD_SURROGATE_SHIFT_) - 367 TRAIL_SURROGATE_MIN_VALUE; 368 369 370 /** 371 * Forms a supplementary code point from the argument character<br> 372 * Note this is for internal use hence no checks for the validity of the 373 * surrogate characters are done 374 * @param lead lead surrogate character 375 * @param trail trailing surrogate character 376 * @return code point of the supplementary character 377 */ 378 public static int getRawSupplementary(char lead, char trail) 379 { 380 return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_; 381 } 382 383 } 384