1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * 7 * Copyright (C) 1999-2014, International Business Machines 8 * Corporation and others. All Rights Reserved. 9 * 10 ******************************************************************************* 11 */ 12 13 package android.icu.lang; 14 15 import android.icu.text.UTF16; 16 17 /** 18 * <code>UScriptRun</code> is used to find runs of characters in 19 * the same script, as defined in the <code>UScript</code> class. 20 * It implements a simple iterator over an array of characters. 21 * The iterator will assign <code>COMMON</code> and <code>INHERITED</code> 22 * characters to the same script as the preceeding characters. If the 23 * COMMON and INHERITED characters are first, they will be assigned to 24 * the same script as the following characters. 25 * 26 * The iterator will try to match paired punctuation. If it sees an 27 * opening punctuation character, it will remember the script that 28 * was assigned to that character, and assign the same script to the 29 * matching closing punctuation. 30 * 31 * No attempt is made to combine related scripts into a single run. In 32 * particular, Hiragana, Katakana, and Han characters will appear in separate 33 * runs. 34 35 * Here is an example of how to iterate over script runs: 36 * <pre> 37 * void printScriptRuns(char[] text) 38 * { 39 * UScriptRun scriptRun = new UScriptRun(text); 40 * 41 * while (scriptRun.next()) { 42 * int start = scriptRun.getScriptStart(); 43 * int limit = scriptRun.getScriptLimit(); 44 * int script = scriptRun.getScriptCode(); 45 * 46 * System.out.println("Script \"" + UScript.getName(script) + "\" from " + 47 * start + " to " + limit + "."); 48 * } 49 * } 50 * </pre> 51 * 52 * @deprecated This API is ICU internal only. 53 * @hide Only a subset of ICU is exposed in Android 54 * @hide draft / provisional / internal are hidden on Android 55 */ 56 @Deprecated 57 public final class UScriptRun 58 { 59 /** 60 * Construct an empty <code>UScriptRun</code> object. The <code>next()</code> 61 * method will return <code>false</code> the first time it is called. 62 * 63 * @deprecated This API is ICU internal only. 64 * @hide draft / provisional / internal are hidden on Android 65 */ 66 @Deprecated 67 public UScriptRun() 68 { 69 char[] nullChars = null; 70 71 reset(nullChars, 0, 0); 72 } 73 74 /** 75 * Construct a <code>UScriptRun</code> object which iterates over the 76 * characters in the given string. 77 * 78 * @param text the string of characters over which to iterate. 79 * 80 * @deprecated This API is ICU internal only. 81 * @hide draft / provisional / internal are hidden on Android 82 */ 83 @Deprecated 84 public UScriptRun(String text) 85 { 86 reset (text); 87 } 88 89 /** 90 * Construct a <code>UScriptRun</code> object which iterates over a subrange 91 * of the characetrs in the given string. 92 * 93 * @param text the string of characters over which to iterate. 94 * @param start the index of the first character over which to iterate 95 * @param count the number of characters over which to iterate 96 * 97 * @deprecated This API is ICU internal only. 98 * @hide draft / provisional / internal are hidden on Android 99 */ 100 @Deprecated 101 public UScriptRun(String text, int start, int count) 102 { 103 reset(text, start, count); 104 } 105 106 /** 107 * Construct a <code>UScriptRun</code> object which iterates over the given 108 * characetrs. 109 * 110 * @param chars the array of characters over which to iterate. 111 * 112 * @deprecated This API is ICU internal only. 113 * @hide draft / provisional / internal are hidden on Android 114 */ 115 @Deprecated 116 public UScriptRun(char[] chars) 117 { 118 reset(chars); 119 } 120 121 /** 122 * Construct a <code>UScriptRun</code> object which iterates over a subrange 123 * of the given characetrs. 124 * 125 * @param chars the array of characters over which to iterate. 126 * @param start the index of the first character over which to iterate 127 * @param count the number of characters over which to iterate 128 * 129 * @deprecated This API is ICU internal only. 130 * @hide draft / provisional / internal are hidden on Android 131 */ 132 @Deprecated 133 public UScriptRun(char[] chars, int start, int count) 134 { 135 reset(chars, start, count); 136 } 137 138 139 /** 140 * Reset the iterator to the start of the text. 141 * 142 * @deprecated This API is ICU internal only. 143 * @hide draft / provisional / internal are hidden on Android 144 */ 145 @Deprecated 146 public final void reset() 147 { 148 // empty any old parenStack contents. 149 // NOTE: this is not the most efficient way 150 // to do this, but it's the easiest to write... 151 while (stackIsNotEmpty()) { 152 pop(); 153 } 154 155 scriptStart = textStart; 156 scriptLimit = textStart; 157 scriptCode = UScript.INVALID_CODE; 158 parenSP = -1; 159 pushCount = 0; 160 fixupCount = 0; 161 162 textIndex = textStart; 163 } 164 165 /** 166 * Reset the iterator to iterate over the given range of the text. Throws 167 * IllegalArgumentException if the range is outside of the bounds of the 168 * character array. 169 * 170 * @param start the index of the new first character over which to iterate 171 * @param count the new number of characters over which to iterate. 172 * @exception IllegalArgumentException If invalid arguments are passed. 173 * 174 * @deprecated This API is ICU internal only. 175 * @hide draft / provisional / internal are hidden on Android 176 */ 177 @Deprecated 178 public final void reset(int start, int count) 179 throws IllegalArgumentException 180 { 181 int len = 0; 182 183 if (text != null) { 184 len = text.length; 185 } 186 187 if (start < 0 || count < 0 || start > len - count) { 188 throw new IllegalArgumentException(); 189 } 190 191 textStart = start; 192 textLimit = start + count; 193 194 reset(); 195 } 196 197 /** 198 * Reset the iterator to iterate over <code>count</code> characters 199 * in <code>chars</code> starting at <code>start</code>. This allows 200 * clients to reuse an iterator. 201 * 202 * @param chars the new array of characters over which to iterate. 203 * @param start the index of the first character over which to iterate. 204 * @param count the number of characters over which to iterate. 205 * 206 * @deprecated This API is ICU internal only. 207 * @hide draft / provisional / internal are hidden on Android 208 */ 209 @Deprecated 210 public final void reset(char[] chars, int start, int count) 211 { 212 if (chars == null) { 213 chars = emptyCharArray; 214 } 215 216 text = chars; 217 218 reset(start, count); 219 } 220 221 /** 222 * Reset the iterator to iterate over the characters 223 * in <code>chars</code>. This allows clients to reuse an iterator. 224 * 225 * @param chars the new array of characters over which to iterate. 226 * 227 * @deprecated This API is ICU internal only. 228 * @hide draft / provisional / internal are hidden on Android 229 */ 230 @Deprecated 231 public final void reset(char[] chars) 232 { 233 int length = 0; 234 235 if (chars != null) { 236 length = chars.length; 237 } 238 239 reset(chars, 0, length); 240 } 241 242 /** 243 * Reset the iterator to iterate over <code>count</code> characters 244 * in <code>text</code> starting at <code>start</code>. This allows 245 * clients to reuse an iterator. 246 * 247 * @param str the new string of characters over which to iterate. 248 * @param start the index of the first character over which to iterate. 249 * @param count the nuber of characters over which to iterate. 250 * 251 * @deprecated This API is ICU internal only. 252 * @hide draft / provisional / internal are hidden on Android 253 */ 254 @Deprecated 255 public final void reset(String str, int start, int count) 256 { 257 char[] chars = null; 258 259 if (str != null) { 260 chars = str.toCharArray(); 261 } 262 263 reset(chars, start, count); 264 } 265 266 /** 267 * Reset the iterator to iterate over the characters 268 * in <code>text</code>. This allows clients to reuse an iterator. 269 * 270 * @param str the new string of characters over which to iterate. 271 * 272 * @deprecated This API is ICU internal only. 273 * @hide draft / provisional / internal are hidden on Android 274 */ 275 @Deprecated 276 public final void reset(String str) 277 { 278 int length = 0; 279 280 if (str != null) { 281 length = str.length(); 282 } 283 284 reset(str, 0, length); 285 } 286 287 288 289 /** 290 * Get the starting index of the current script run. 291 * 292 * @return the index of the first character in the current script run. 293 * 294 * @deprecated This API is ICU internal only. 295 * @hide draft / provisional / internal are hidden on Android 296 */ 297 @Deprecated 298 public final int getScriptStart() 299 { 300 return scriptStart; 301 } 302 303 /** 304 * Get the index of the first character after the current script run. 305 * 306 * @return the index of the first character after the current script run. 307 * 308 * @deprecated This API is ICU internal only. 309 * @hide draft / provisional / internal are hidden on Android 310 */ 311 @Deprecated 312 public final int getScriptLimit() 313 { 314 return scriptLimit; 315 } 316 317 /** 318 * Get the script code for the script of the current script run. 319 * 320 * @return the script code for the script of the current script run. 321 * @see android.icu.lang.UScript 322 * 323 * @deprecated This API is ICU internal only. 324 * @hide draft / provisional / internal are hidden on Android 325 */ 326 @Deprecated 327 public final int getScriptCode() 328 { 329 return scriptCode; 330 } 331 332 /** 333 * Find the next script run. Returns <code>false</code> if there 334 * isn't another run, returns <code>true</code> if there is. 335 * 336 * @return <code>false</code> if there isn't another run, <code>true</code> if there is. 337 * 338 * @deprecated This API is ICU internal only. 339 * @hide draft / provisional / internal are hidden on Android 340 */ 341 @Deprecated 342 public final boolean next() 343 { 344 // if we've fallen off the end of the text, we're done 345 if (scriptLimit >= textLimit) { 346 return false; 347 } 348 349 scriptCode = UScript.COMMON; 350 scriptStart = scriptLimit; 351 352 syncFixup(); 353 354 while (textIndex < textLimit) { 355 int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart); 356 int codePointCount = UTF16.getCharCount(ch); 357 int sc = UScript.getScript(ch); 358 int pairIndex = getPairIndex(ch); 359 360 textIndex += codePointCount; 361 362 // Paired character handling: 363 // 364 // if it's an open character, push it onto the stack. 365 // if it's a close character, find the matching open on the 366 // stack, and use that script code. Any non-matching open 367 // characters above it on the stack will be poped. 368 if (pairIndex >= 0) { 369 if ((pairIndex & 1) == 0) { 370 push(pairIndex, scriptCode); 371 } else { 372 int pi = pairIndex & ~1; 373 374 while (stackIsNotEmpty() && top().pairIndex != pi) { 375 pop(); 376 } 377 378 if (stackIsNotEmpty()) { 379 sc = top().scriptCode; 380 } 381 } 382 } 383 384 if (sameScript(scriptCode, sc)) { 385 if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) { 386 scriptCode = sc; 387 388 fixup(scriptCode); 389 } 390 391 // if this character is a close paired character, 392 // pop the matching open character from the stack 393 if (pairIndex >= 0 && (pairIndex & 1) != 0) { 394 pop(); 395 } 396 } else { 397 // We've just seen the first character of 398 // the next run. Back over it so we'll see 399 // it again the next time. 400 textIndex -= codePointCount; 401 break; 402 } 403 } 404 405 scriptLimit = textIndex; 406 return true; 407 } 408 409 /** 410 * Compare two script codes to see if they are in the same script. If one script is 411 * a strong script, and the other is INHERITED or COMMON, it will compare equal. 412 * 413 * @param scriptOne one of the script codes. 414 * @param scriptTwo the other script code. 415 * @return <code>true</code> if the two scripts are the same. 416 * @see android.icu.lang.UScript 417 */ 418 private static boolean sameScript(int scriptOne, int scriptTwo) 419 { 420 return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED || scriptOne == scriptTwo; 421 } 422 423 /* 424 * An internal class which holds entries on the paren stack. 425 */ 426 private static final class ParenStackEntry 427 { 428 int pairIndex; 429 int scriptCode; 430 431 public ParenStackEntry(int thePairIndex, int theScriptCode) 432 { 433 pairIndex = thePairIndex; 434 scriptCode = theScriptCode; 435 } 436 } 437 438 private static final int mod(int sp) 439 { 440 return sp % PAREN_STACK_DEPTH; 441 } 442 443 private static final int inc(int sp, int count) 444 { 445 return mod(sp + count); 446 } 447 448 private static final int inc(int sp) 449 { 450 return inc(sp, 1); 451 } 452 453 private static final int dec(int sp, int count) 454 { 455 return mod(sp + PAREN_STACK_DEPTH - count); 456 } 457 458 private static final int dec(int sp) 459 { 460 return dec(sp, 1); 461 } 462 463 private static final int limitInc(int count) 464 { 465 if (count < PAREN_STACK_DEPTH) { 466 count += 1; 467 } 468 469 return count; 470 } 471 472 private final boolean stackIsEmpty() 473 { 474 return pushCount <= 0; 475 } 476 477 private final boolean stackIsNotEmpty() 478 { 479 return ! stackIsEmpty(); 480 } 481 482 private final void push(int pairIndex, int scrptCode) 483 { 484 pushCount = limitInc(pushCount); 485 fixupCount = limitInc(fixupCount); 486 487 parenSP = inc(parenSP); 488 parenStack[parenSP] = new ParenStackEntry(pairIndex, scrptCode); 489 } 490 491 private final void pop() 492 { 493 494 if (stackIsEmpty()) { 495 return; 496 } 497 498 parenStack[parenSP] = null; 499 500 if (fixupCount > 0) { 501 fixupCount -= 1; 502 } 503 504 pushCount -= 1; 505 parenSP = dec(parenSP); 506 507 // If the stack is now empty, reset the stack 508 // pointers to their initial values. 509 if (stackIsEmpty()) { 510 parenSP = -1; 511 } 512 } 513 514 private final ParenStackEntry top() 515 { 516 return parenStack[parenSP]; 517 } 518 519 private final void syncFixup() 520 { 521 fixupCount = 0; 522 } 523 524 private final void fixup(int scrptCode) 525 { 526 int fixupSP = dec(parenSP, fixupCount); 527 528 while (fixupCount-- > 0) { 529 fixupSP = inc(fixupSP); 530 parenStack[fixupSP].scriptCode = scrptCode; 531 } 532 } 533 534 private char[] emptyCharArray = {}; 535 536 private char[] text; 537 538 private int textIndex; 539 private int textStart; 540 private int textLimit; 541 542 private int scriptStart; 543 private int scriptLimit; 544 private int scriptCode; 545 546 private static int PAREN_STACK_DEPTH = 32; 547 private static ParenStackEntry parenStack[] = new ParenStackEntry[PAREN_STACK_DEPTH]; 548 private int parenSP = -1; 549 private int pushCount = 0; 550 private int fixupCount = 0; 551 552 /** 553 * Find the highest bit that's set in a word. Uses a binary search through 554 * the bits. 555 * 556 * @param n the word in which to find the highest bit that's set. 557 * @return the bit number (counting from the low order bit) of the highest bit. 558 */ 559 private static final byte highBit(int n) 560 { 561 if (n <= 0) { 562 return -32; 563 } 564 565 byte bit = 0; 566 567 if (n >= 1 << 16) { 568 n >>= 16; 569 bit += 16; 570 } 571 572 if (n >= 1 << 8) { 573 n >>= 8; 574 bit += 8; 575 } 576 577 if (n >= 1 << 4) { 578 n >>= 4; 579 bit += 4; 580 } 581 582 if (n >= 1 << 2) { 583 n >>= 2; 584 bit += 2; 585 } 586 587 if (n >= 1 << 1) { 588 n >>= 1; 589 bit += 1; 590 } 591 592 return bit; 593 } 594 595 /** 596 * Search the pairedChars array for the given character. 597 * 598 * @param ch the character for which to search. 599 * @return the index of the character in the table, or -1 if it's not there. 600 */ 601 private static int getPairIndex(int ch) 602 { 603 int probe = pairedCharPower; 604 int index = 0; 605 606 if (ch >= pairedChars[pairedCharExtra]) { 607 index = pairedCharExtra; 608 } 609 610 while (probe > (1 << 0)) { 611 probe >>= 1; 612 613 if (ch >= pairedChars[index + probe]) { 614 index += probe; 615 } 616 } 617 618 if (pairedChars[index] != ch) { 619 index = -1; 620 } 621 622 return index; 623 } 624 625 private static int pairedChars[] = { 626 0x0028, 0x0029, // ascii paired punctuation 627 0x003c, 0x003e, 628 0x005b, 0x005d, 629 0x007b, 0x007d, 630 0x00ab, 0x00bb, // guillemets 631 0x2018, 0x2019, // general punctuation 632 0x201c, 0x201d, 633 0x2039, 0x203a, 634 0x3008, 0x3009, // chinese paired punctuation 635 0x300a, 0x300b, 636 0x300c, 0x300d, 637 0x300e, 0x300f, 638 0x3010, 0x3011, 639 0x3014, 0x3015, 640 0x3016, 0x3017, 641 0x3018, 0x3019, 642 0x301a, 0x301b 643 }; 644 645 private static int pairedCharPower = 1 << highBit(pairedChars.length); 646 private static int pairedCharExtra = pairedChars.length - pairedCharPower; 647 } 648 649