1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.text; 19 20 import java.awt.font.NumericShaper; 21 import java.awt.font.TextAttribute; 22 import java.util.ArrayList; 23 import java.util.Arrays; 24 25 /** 26 * Implements the <a href="http://unicode.org/reports/tr9/">Unicode Bidirectional Algorithm</a>. 27 * 28 * <p>Use a {@code Bidi} object to get the information on the position reordering of a 29 * bidirectional text, such as Arabic or Hebrew. The natural display ordering of 30 * horizontal text in these languages is from right to left, while they order 31 * numbers from left to right. 32 * 33 * <p>If the text contains multiple runs, the information of each run can be 34 * obtained from the run index. The level of any particular run indicates the 35 * direction of the text as well as the nesting level. Left-to-right runs have 36 * even levels while right-to-left runs have odd levels. 37 */ 38 public final class Bidi { 39 /** 40 * Constant that indicates the default base level. If there is no strong 41 * character, then set the paragraph level to 0 (left-to-right). 42 */ 43 public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2; 44 45 /** 46 * Constant that indicates the default base level. If there is no strong 47 * character, then set the paragraph level to 1 (right-to-left). 48 */ 49 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1; 50 51 /** 52 * Constant that specifies the default base level as 0 (left-to-right). 53 */ 54 public static final int DIRECTION_LEFT_TO_RIGHT = 0; 55 56 /** 57 * Constant that specifies the default base level as 1 (right-to-left). 58 */ 59 public static final int DIRECTION_RIGHT_TO_LEFT = 1; 60 61 /** 62 * TODO: if we care about performance, we might just want to use an int[] instead of a Run[]. 63 */ 64 static class Run { 65 private final int start; 66 private final int limit; 67 private final int level; 68 69 public Run(int start, int limit, int level) { 70 this.start = start; 71 this.limit = limit; 72 this.level = level; 73 } 74 75 public int getLevel() { 76 return level; 77 } 78 79 public int getLimit() { 80 return limit; 81 } 82 83 public int getStart() { 84 return start; 85 } 86 } 87 88 /** 89 * Creates a {@code Bidi} object from the {@code 90 * AttributedCharacterIterator} of a paragraph text. The RUN_DIRECTION 91 * attribute determines the base direction of the bidirectional text. If it 92 * is not specified explicitly, the algorithm uses 93 * DIRECTION_DEFAULT_LEFT_TO_RIGHT by default. The BIDI_EMBEDDING attribute 94 * specifies the level of embedding for each character. Values between -1 95 * and -62 denote overrides at the level's absolute value, values from 1 to 96 * 62 indicate embeddings, and the 0 value indicates the level is calculated 97 * by the algorithm automatically. For the character with no BIDI_EMBEDDING 98 * attribute or with a improper attribute value, such as a {@code null} 99 * value, the algorithm treats its embedding level as 0. The NUMERIC_SHAPING 100 * attribute specifies the instance of NumericShaper used to convert 101 * European digits to other decimal digits before performing the bidi 102 * algorithm. 103 * 104 * @param paragraph 105 * the String containing the paragraph text to perform the 106 * algorithm. 107 * @throws IllegalArgumentException if {@code paragraph == null} 108 * @see java.awt.font.TextAttribute#BIDI_EMBEDDING 109 * @see java.awt.font.TextAttribute#NUMERIC_SHAPING 110 * @see java.awt.font.TextAttribute#RUN_DIRECTION 111 */ 112 public Bidi(AttributedCharacterIterator paragraph) { 113 if (paragraph == null) { 114 throw new IllegalArgumentException("paragraph is null"); 115 } 116 117 int begin = paragraph.getBeginIndex(); 118 int end = paragraph.getEndIndex(); 119 int length = end - begin; 120 char[] text = new char[length + 1]; // One more char for AttributedCharacterIterator.DONE 121 122 if (length != 0) { 123 text[0] = paragraph.first(); 124 } else { 125 paragraph.first(); 126 } 127 128 // First check the RUN_DIRECTION attribute. 129 int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT; 130 Object direction = paragraph.getAttribute(TextAttribute.RUN_DIRECTION); 131 if (direction != null && direction instanceof Boolean) { 132 if (direction.equals(TextAttribute.RUN_DIRECTION_LTR)) { 133 flags = DIRECTION_LEFT_TO_RIGHT; 134 } else { 135 flags = DIRECTION_RIGHT_TO_LEFT; 136 } 137 } 138 139 // Retrieve the text and gather BIDI_EMBEDDINGS 140 byte[] embeddings = null; 141 for (int textLimit = 1, i = 1; i < length; textLimit = paragraph 142 .getRunLimit(TextAttribute.BIDI_EMBEDDING) 143 - begin + 1) { 144 Object embedding = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING); 145 if (embedding != null && embedding instanceof Integer) { 146 int embLevel = ((Integer) embedding).intValue(); 147 148 if (embeddings == null) { 149 embeddings = new byte[length]; 150 } 151 152 for (; i < textLimit; i++) { 153 text[i] = paragraph.next(); 154 embeddings[i - 1] = (byte) embLevel; 155 } 156 } else { 157 for (; i < textLimit; i++) { 158 text[i] = paragraph.next(); 159 } 160 } 161 } 162 163 // Apply NumericShaper to the text 164 Object numericShaper = paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING); 165 if (numericShaper != null && numericShaper instanceof NumericShaper) { 166 ((NumericShaper) numericShaper).shape(text, 0, length); 167 } 168 169 long bidi = 0; 170 try { 171 bidi = createUBiDi(text, 0, embeddings, 0, length, flags); 172 readBidiInfo(bidi); 173 } finally { 174 ubidi_close(bidi); 175 } 176 } 177 178 /** 179 * Creates a {@code Bidi} object. 180 * 181 * @param text 182 * the char array of the paragraph text that is processed. 183 * @param textStart 184 * the index in {@code text} of the start of the paragraph. 185 * @param embeddings 186 * the embedding level array of the paragraph text, specifying 187 * the embedding level information for each character. Values 188 * between -1 and -61 denote overrides at the level's absolute 189 * value, values from 1 to 61 indicate embeddings, and the 0 190 * value indicates the level is calculated by the algorithm 191 * automatically. 192 * @param embStart 193 * the index in {@code embeddings} of the start of the paragraph. 194 * @param paragraphLength 195 * the length of the text to perform the algorithm. 196 * @param flags 197 * indicates the base direction of the bidirectional text. It is 198 * expected that this will be one of the direction constant 199 * values defined in this class. An unknown value is treated as 200 * DIRECTION_DEFAULT_LEFT_TO_RIGHT. 201 * @throws IllegalArgumentException 202 * if {@code textStart}, {@code embStart}, or {@code 203 * paragraphLength} is negative; if 204 * {@code text.length < textStart + paragraphLength} or 205 * {@code embeddings.length < embStart + paragraphLength}. 206 * @see #DIRECTION_LEFT_TO_RIGHT 207 * @see #DIRECTION_RIGHT_TO_LEFT 208 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 209 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 210 */ 211 public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, 212 int paragraphLength, int flags) { 213 214 if (text == null || text.length - textStart < paragraphLength) { 215 throw new IllegalArgumentException(); 216 } 217 218 if (embeddings != null) { 219 if (embeddings.length - embStart < paragraphLength) { 220 throw new IllegalArgumentException(); 221 } 222 } 223 224 if (textStart < 0) { 225 throw new IllegalArgumentException("Negative textStart value " + textStart); 226 } 227 if (embStart < 0) { 228 throw new IllegalArgumentException("Negative embStart value " + embStart); 229 } 230 if (paragraphLength < 0) { 231 throw new IllegalArgumentException("Negative paragraph length " + paragraphLength); 232 } 233 234 long bidi = 0; 235 try { 236 bidi = createUBiDi(text, textStart, embeddings, embStart, paragraphLength, flags); 237 readBidiInfo(bidi); 238 } finally { 239 ubidi_close(bidi); 240 } 241 } 242 243 /** 244 * Creates a {@code Bidi} object. 245 * 246 * @param paragraph 247 * the string containing the paragraph text to perform the 248 * algorithm on. 249 * @param flags 250 * indicates the base direction of the bidirectional text. It is 251 * expected that this will be one of the direction constant 252 * values defined in this class. An unknown value is treated as 253 * DIRECTION_DEFAULT_LEFT_TO_RIGHT. 254 * @see #DIRECTION_LEFT_TO_RIGHT 255 * @see #DIRECTION_RIGHT_TO_LEFT 256 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 257 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 258 */ 259 public Bidi(String paragraph, int flags) { 260 this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0, 261 (paragraph == null ? 0 : paragraph.length()), flags); 262 } 263 264 // create the native UBiDi struct, need to be closed with ubidi_close(). 265 private static long createUBiDi(char[] text, int textStart, 266 byte[] embeddings, int embStart, int paragraphLength, int flags) { 267 char[] realText = null; 268 269 byte[] realEmbeddings = null; 270 271 if (text == null || text.length - textStart < paragraphLength) { 272 throw new IllegalArgumentException(); 273 } 274 realText = new char[paragraphLength]; 275 System.arraycopy(text, textStart, realText, 0, paragraphLength); 276 277 if (embeddings != null) { 278 if (embeddings.length - embStart < paragraphLength) { 279 throw new IllegalArgumentException(); 280 } 281 if (paragraphLength > 0) { 282 Bidi temp = new Bidi(text, textStart, null, 0, paragraphLength, flags); 283 realEmbeddings = new byte[paragraphLength]; 284 System.arraycopy(temp.offsetLevel, 0, realEmbeddings, 0, paragraphLength); 285 for (int i = 0; i < paragraphLength; i++) { 286 byte e = embeddings[i]; 287 if (e < 0) { 288 realEmbeddings[i] = (byte) (UBIDI_LEVEL_OVERRIDE - e); 289 } else if (e > 0) { 290 realEmbeddings[i] = e; 291 } else { 292 realEmbeddings[i] |= (byte) UBIDI_LEVEL_OVERRIDE; 293 } 294 } 295 } 296 } 297 298 if (flags > 1 || flags < -2) { 299 flags = 0; 300 } 301 302 long bidi = 0; 303 boolean needsDeletion = true; 304 try { 305 bidi = ubidi_open(); 306 ubidi_setPara(bidi, realText, paragraphLength, flags, realEmbeddings); 307 needsDeletion = false; 308 } finally { 309 if (needsDeletion) { 310 ubidi_close(bidi); 311 } 312 } 313 return bidi; 314 } 315 316 /* private constructor used by createLineBidi() */ 317 private Bidi(long pBidi) { 318 readBidiInfo(pBidi); 319 } 320 321 // read info from the native UBiDi struct 322 private void readBidiInfo(long pBidi) { 323 length = ubidi_getLength(pBidi); 324 325 offsetLevel = (length == 0) ? null : ubidi_getLevels(pBidi); 326 327 baseLevel = ubidi_getParaLevel(pBidi); 328 329 int runCount = ubidi_countRuns(pBidi); 330 if (runCount == 0) { 331 unidirectional = true; 332 runs = null; 333 } else if (runCount < 0) { 334 runs = null; 335 } else { 336 runs = ubidi_getRuns(pBidi); 337 338 // Simplified case for one run which has the base level 339 if (runCount == 1 && runs[0].getLevel() == baseLevel) { 340 unidirectional = true; 341 runs = null; 342 } 343 } 344 345 direction = ubidi_getDirection(pBidi); 346 } 347 348 private int baseLevel; 349 350 private int length; 351 352 private byte[] offsetLevel; 353 354 private Run[] runs; 355 356 private int direction; 357 358 private boolean unidirectional; 359 360 /** 361 * Returns whether the base level is from left to right. 362 * 363 * @return true if the base level is from left to right. 364 */ 365 public boolean baseIsLeftToRight() { 366 return baseLevel % 2 == 0 ? true : false; 367 } 368 369 /** 370 * Creates a new {@code Bidi} object containing the information of one line 371 * from this object. 372 * 373 * @param lineStart 374 * the start offset of the line. 375 * @param lineLimit 376 * the limit of the line. 377 * @return the new line Bidi object. In this new object, the indices will 378 * range from 0 to (limit - start - 1). 379 * @throws IllegalArgumentException 380 * if {@code lineStart < 0}, {@code lineLimit < 0}, {@code 381 * lineStart > lineLimit} or if {@code lineStart} is greater 382 * than the length of this object's paragraph text. 383 */ 384 public Bidi createLineBidi(int lineStart, int lineLimit) { 385 if (lineStart < 0 || lineLimit < 0 || lineLimit > length || lineStart > lineLimit) { 386 throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " + 387 "limit=" + lineLimit + ", length=" + length + ")"); 388 } 389 390 char[] text = new char[this.length]; 391 Arrays.fill(text, 'a'); 392 byte[] embeddings = new byte[this.length]; 393 for (int i = 0; i < embeddings.length; i++) { 394 embeddings[i] = (byte) -this.offsetLevel[i]; 395 } 396 397 int dir = this.baseIsLeftToRight() 398 ? Bidi.DIRECTION_LEFT_TO_RIGHT 399 : Bidi.DIRECTION_RIGHT_TO_LEFT; 400 long parent = 0; 401 try { 402 parent = createUBiDi(text, 0, embeddings, 0, this.length, dir); 403 if (lineStart == lineLimit) { 404 return createEmptyLineBidi(parent); 405 } 406 return new Bidi(ubidi_setLine(parent, lineStart, lineLimit)); 407 } finally { 408 ubidi_close(parent); 409 } 410 } 411 412 private Bidi createEmptyLineBidi(long parent) { 413 // ICU4C doesn't allow this case, but the RI does. 414 Bidi result = new Bidi(parent); 415 result.length = 0; 416 result.offsetLevel = null; 417 result.runs = null; 418 result.unidirectional = true; 419 return result; 420 } 421 422 /** 423 * Returns the base level. 424 * 425 * @return the base level. 426 */ 427 public int getBaseLevel() { 428 return baseLevel; 429 } 430 431 /** 432 * Returns the length of the text in the {@code Bidi} object. 433 * 434 * @return the length. 435 */ 436 public int getLength() { 437 return length; 438 } 439 440 /** 441 * Returns the level of a specified character. 442 * 443 * @param offset 444 * the offset of the character. 445 * @return the level. 446 */ 447 public int getLevelAt(int offset) { 448 try { 449 return offsetLevel[offset] & ~UBIDI_LEVEL_OVERRIDE; 450 } catch (RuntimeException e) { 451 return baseLevel; 452 } 453 } 454 455 /** 456 * Returns the number of runs in the bidirectional text. 457 * 458 * @return the number of runs, at least 1. 459 */ 460 public int getRunCount() { 461 return unidirectional ? 1 : runs.length; 462 } 463 464 /** 465 * Returns the level of the specified run. 466 * 467 * @param run 468 * the index of the run. 469 * @return the level of the run. 470 */ 471 public int getRunLevel(int run) { 472 return unidirectional ? baseLevel : runs[run].getLevel(); 473 } 474 475 /** 476 * Returns the limit offset of the specified run. 477 * 478 * @param run 479 * the index of the run. 480 * @return the limit offset of the run. 481 */ 482 public int getRunLimit(int run) { 483 return unidirectional ? length : runs[run].getLimit(); 484 } 485 486 /** 487 * Returns the start offset of the specified run. 488 * 489 * @param run 490 * the index of the run. 491 * @return the start offset of the run. 492 */ 493 public int getRunStart(int run) { 494 return unidirectional ? 0 : runs[run].getStart(); 495 } 496 497 /** 498 * Indicates whether the text is from left to right, that is, both the base 499 * direction and the text direction is from left to right. 500 * 501 * @return {@code true} if the text is from left to right; {@code false} 502 * otherwise. 503 */ 504 public boolean isLeftToRight() { 505 return direction == UBiDiDirection_UBIDI_LTR; 506 } 507 508 /** 509 * Indicates whether the text direction is mixed. 510 * 511 * @return {@code true} if the text direction is mixed; {@code false} 512 * otherwise. 513 */ 514 public boolean isMixed() { 515 return direction == UBiDiDirection_UBIDI_MIXED; 516 } 517 518 /** 519 * Indicates whether the text is from right to left, that is, both the base 520 * direction and the text direction is from right to left. 521 * 522 * @return {@code true} if the text is from right to left; {@code false} 523 * otherwise. 524 */ 525 public boolean isRightToLeft() { 526 return direction == UBiDiDirection_UBIDI_RTL; 527 } 528 529 /** 530 * Reorders a range of objects according to their specified levels. This is 531 * a convenience function that does not use a {@code Bidi} object. The range 532 * of objects at {@code index} from {@code objectStart} to {@code 533 * objectStart + count} will be reordered according to the range of levels 534 * at {@code index} from {@code levelStart} to {@code levelStart + count}. 535 * 536 * @param levels 537 * the level array, which is already determined. 538 * @param levelStart 539 * the start offset of the range of the levels. 540 * @param objects 541 * the object array to reorder. 542 * @param objectStart 543 * the start offset of the range of objects. 544 * @param count 545 * the count of the range of objects to reorder. 546 * @throws IllegalArgumentException 547 * if {@code count}, {@code levelStart} or {@code objectStart} 548 * is negative; if {@code count > levels.length - levelStart} or 549 * if {@code count > objects.length - objectStart}. 550 */ 551 public static void reorderVisually(byte[] levels, int levelStart, 552 Object[] objects, int objectStart, int count) { 553 if (count < 0 || levelStart < 0 || objectStart < 0 554 || count > levels.length - levelStart 555 || count > objects.length - objectStart) { 556 throw new IllegalArgumentException("Invalid ranges (levels=" + levels.length + 557 ", levelStart=" + levelStart + ", objects=" + objects.length + 558 ", objectStart=" + objectStart + ", count=" + count + ")"); 559 } 560 561 byte[] realLevels = new byte[count]; 562 System.arraycopy(levels, levelStart, realLevels, 0, count); 563 564 int[] indices = ubidi_reorderVisual(realLevels, count); 565 566 ArrayList<Object> result = new ArrayList<Object>(count); 567 for (int i = 0; i < count; i++) { 568 result.add(objects[objectStart + indices[i]]); 569 } 570 571 System.arraycopy(result.toArray(), 0, objects, objectStart, count); 572 } 573 574 /** 575 * Indicates whether a range of characters of a text requires a {@code Bidi} 576 * object to display properly. 577 * 578 * @param text 579 * the char array of the text. 580 * @param start 581 * the start offset of the range of characters. 582 * @param limit 583 * the limit offset of the range of characters. 584 * @return {@code true} if the range of characters requires a {@code Bidi} 585 * object; {@code false} otherwise. 586 * @throws IllegalArgumentException 587 * if {@code start} or {@code limit} is negative; {@code start > 588 * limit} or {@code limit} is greater than the length of this 589 * object's paragraph text. 590 */ 591 public static boolean requiresBidi(char[] text, int start, int limit) { 592 if (limit < 0 || start < 0 || start > limit || limit > text.length) { 593 throw new IllegalArgumentException(); 594 } 595 596 Bidi bidi = new Bidi(text, start, null, 0, limit - start, 0); 597 return !bidi.isLeftToRight(); 598 } 599 600 @Override 601 public String toString() { 602 return getClass().getName() 603 + "[direction: " + direction + " baseLevel: " + baseLevel 604 + " length: " + length + " runs: " + Arrays.toString(runs) + "]"; 605 } 606 607 // ICU4C constants. 608 private static final int UBIDI_LEVEL_OVERRIDE = 0x80; 609 private static final int UBiDiDirection_UBIDI_LTR = 0; 610 private static final int UBiDiDirection_UBIDI_RTL = 1; 611 private static final int UBiDiDirection_UBIDI_MIXED = 2; 612 613 // ICU4C functions. 614 private static native long ubidi_open(); 615 private static native void ubidi_close(long pBiDi); 616 private static native void ubidi_setPara(long pBiDi, char[] text, int length, int paraLevel, byte[] embeddingLevels); 617 private static native long ubidi_setLine(final long pParaBiDi, int start, int limit); 618 private static native int ubidi_getDirection(final long pBiDi); 619 private static native int ubidi_getLength(final long pBiDi); 620 private static native byte ubidi_getParaLevel(final long pBiDi); 621 private static native byte[] ubidi_getLevels(long pBiDi); 622 private static native int ubidi_countRuns(long pBiDi); 623 private static native Bidi.Run[] ubidi_getRuns(long pBidi); 624 private static native int[] ubidi_reorderVisual(byte[] levels, int length); 625 } 626