1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.text; 19 20 import java.awt.font.NumericShaper; 21 import java.awt.font.TextAttribute; 22 import java.util.ArrayList; 23 import java.util.Arrays; 24 import org.apache.harmony.text.BidiRun; 25 import org.apache.harmony.text.NativeBidi; 26 27 /** 28 * Provides the Unicode Bidirectional Algorithm. The algorithm is 29 * defined in the Unicode Standard Annex #9, version 13, also described in The 30 * Unicode Standard, Version 4.0 . 31 * 32 * Use a {@code Bidi} object to get the information on the position reordering of a 33 * bidirectional text, such as Arabic or Hebrew. The natural display ordering of 34 * horizontal text in these languages is from right to left, while they order 35 * numbers from left to right. 36 * 37 * If the text contains multiple runs, the information of each run can be 38 * obtained from the run index. The level of any particular run indicates the 39 * direction of the text as well as the nesting level. Left-to-right runs have 40 * even levels while right-to-left runs have odd levels. 41 */ 42 public final class Bidi { 43 /** 44 * Constant that indicates the default base level. If there is no strong 45 * character, then set the paragraph level to 0 (left-to-right). 46 */ 47 public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2; 48 49 /** 50 * Constant that indicates the default base level. If there is no strong 51 * character, then set the paragraph level to 1 (right-to-left). 52 */ 53 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1; 54 55 /** 56 * Constant that specifies the default base level as 0 (left-to-right). 57 */ 58 public static final int DIRECTION_LEFT_TO_RIGHT = 0; 59 60 /** 61 * Constant that specifies the default base level as 1 (right-to-left). 62 */ 63 public static final int DIRECTION_RIGHT_TO_LEFT = 1; 64 65 /** 66 * Creates a {@code Bidi} object from the {@code 67 * AttributedCharacterIterator} of a paragraph text. The RUN_DIRECTION 68 * attribute determines the base direction of the bidirectional text. If it 69 * is not specified explicitly, the algorithm uses 70 * DIRECTION_DEFAULT_LEFT_TO_RIGHT by default. The BIDI_EMBEDDING attribute 71 * specifies the level of embedding for each character. Values between -1 72 * and -62 denote overrides at the level's absolute value, values from 1 to 73 * 62 indicate embeddings, and the 0 value indicates the level is calculated 74 * by the algorithm automatically. For the character with no BIDI_EMBEDDING 75 * attribute or with a improper attribute value, such as a {@code null} 76 * value, the algorithm treats its embedding level as 0. The NUMERIC_SHAPING 77 * attribute specifies the instance of NumericShaper used to convert 78 * European digits to other decimal digits before performing the bidi 79 * algorithm. 80 * 81 * @param paragraph 82 * the String containing the paragraph text to perform the 83 * algorithm. 84 * @throws IllegalArgumentException if {@code paragraph == null} 85 * @see java.awt.font.TextAttribute#BIDI_EMBEDDING 86 * @see java.awt.font.TextAttribute#NUMERIC_SHAPING 87 * @see java.awt.font.TextAttribute#RUN_DIRECTION 88 */ 89 public Bidi(AttributedCharacterIterator paragraph) { 90 if (paragraph == null) { 91 throw new IllegalArgumentException("paragraph is null"); 92 } 93 94 int begin = paragraph.getBeginIndex(); 95 int end = paragraph.getEndIndex(); 96 int length = end - begin; 97 char[] text = new char[length + 1]; // One more char for AttributedCharacterIterator.DONE 98 99 if (length != 0) { 100 text[0] = paragraph.first(); 101 } else { 102 paragraph.first(); 103 } 104 105 // First check the RUN_DIRECTION attribute. 106 int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT; 107 Object direction = paragraph.getAttribute(TextAttribute.RUN_DIRECTION); 108 if (direction != null && direction instanceof Boolean) { 109 if (direction.equals(TextAttribute.RUN_DIRECTION_LTR)) { 110 flags = DIRECTION_LEFT_TO_RIGHT; 111 } else { 112 flags = DIRECTION_RIGHT_TO_LEFT; 113 } 114 } 115 116 // Retrieve the text and gather BIDI_EMBEDDINGS 117 byte[] embeddings = null; 118 for (int textLimit = 1, i = 1; i < length; textLimit = paragraph 119 .getRunLimit(TextAttribute.BIDI_EMBEDDING) 120 - begin + 1) { 121 Object embedding = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING); 122 if (embedding != null && embedding instanceof Integer) { 123 int embLevel = ((Integer) embedding).intValue(); 124 125 if (embeddings == null) { 126 embeddings = new byte[length]; 127 } 128 129 for (; i < textLimit; i++) { 130 text[i] = paragraph.next(); 131 embeddings[i - 1] = (byte) embLevel; 132 } 133 } else { 134 for (; i < textLimit; i++) { 135 text[i] = paragraph.next(); 136 } 137 } 138 } 139 140 // Apply NumericShaper to the text 141 Object numericShaper = paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING); 142 if (numericShaper != null && numericShaper instanceof NumericShaper) { 143 ((NumericShaper) numericShaper).shape(text, 0, length); 144 } 145 146 long bidi = 0; 147 try { 148 bidi = createUBiDi(text, 0, embeddings, 0, length, flags); 149 readBidiInfo(bidi); 150 } finally { 151 NativeBidi.ubidi_close(bidi); 152 } 153 } 154 155 /** 156 * Creates a {@code Bidi} object. 157 * 158 * @param text 159 * the char array of the paragraph text that is processed. 160 * @param textStart 161 * the index in {@code text} of the start of the paragraph. 162 * @param embeddings 163 * the embedding level array of the paragraph text, specifying 164 * the embedding level information for each character. Values 165 * between -1 and -61 denote overrides at the level's absolute 166 * value, values from 1 to 61 indicate embeddings, and the 0 167 * value indicates the level is calculated by the algorithm 168 * automatically. 169 * @param embStart 170 * the index in {@code embeddings} of the start of the paragraph. 171 * @param paragraphLength 172 * the length of the text to perform the algorithm. 173 * @param flags 174 * indicates the base direction of the bidirectional text. It is 175 * expected that this will be one of the direction constant 176 * values defined in this class. An unknown value is treated as 177 * DIRECTION_DEFAULT_LEFT_TO_RIGHT. 178 * @throws IllegalArgumentException 179 * if {@code textStart}, {@code embStart}, or {@code 180 * paragraphLength} is negative; if 181 * {@code text.length < textStart + paragraphLength} or 182 * {@code embeddings.length < embStart + paragraphLength}. 183 * @see #DIRECTION_LEFT_TO_RIGHT 184 * @see #DIRECTION_RIGHT_TO_LEFT 185 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 186 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 187 */ 188 public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, 189 int paragraphLength, int flags) { 190 191 if (text == null || text.length - textStart < paragraphLength) { 192 throw new IllegalArgumentException(); 193 } 194 195 if (embeddings != null) { 196 if (embeddings.length - embStart < paragraphLength) { 197 throw new IllegalArgumentException(); 198 } 199 } 200 201 if (textStart < 0) { 202 throw new IllegalArgumentException("Negative textStart value " + textStart); 203 } 204 if (embStart < 0) { 205 throw new IllegalArgumentException("Negative embStart value " + embStart); 206 } 207 if (paragraphLength < 0) { 208 throw new IllegalArgumentException("Negative paragraph length " + paragraphLength); 209 } 210 211 long bidi = 0; 212 try { 213 bidi = createUBiDi(text, textStart, embeddings, embStart, paragraphLength, flags); 214 readBidiInfo(bidi); 215 } finally { 216 NativeBidi.ubidi_close(bidi); 217 } 218 } 219 220 /** 221 * Creates a {@code Bidi} object. 222 * 223 * @param paragraph 224 * the string containing the paragraph text to perform the 225 * algorithm on. 226 * @param flags 227 * indicates the base direction of the bidirectional text. It is 228 * expected that this will be one of the direction constant 229 * values defined in this class. An unknown value is treated as 230 * DIRECTION_DEFAULT_LEFT_TO_RIGHT. 231 * @see #DIRECTION_LEFT_TO_RIGHT 232 * @see #DIRECTION_RIGHT_TO_LEFT 233 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 234 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 235 */ 236 public Bidi(String paragraph, int flags) { 237 this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0, 238 (paragraph == null ? 0 : paragraph.length()), flags); 239 } 240 241 // create the native UBiDi struct, need to be closed with ubidi_close(). 242 private static long createUBiDi(char[] text, int textStart, 243 byte[] embeddings, int embStart, int paragraphLength, int flags) { 244 char[] realText = null; 245 246 byte[] realEmbeddings = null; 247 248 if (text == null || text.length - textStart < paragraphLength) { 249 throw new IllegalArgumentException(); 250 } 251 realText = new char[paragraphLength]; 252 System.arraycopy(text, textStart, realText, 0, paragraphLength); 253 254 if (embeddings != null) { 255 if (embeddings.length - embStart < paragraphLength) { 256 throw new IllegalArgumentException(); 257 } 258 if (paragraphLength > 0) { 259 Bidi temp = new Bidi(text, textStart, null, 0, paragraphLength, flags); 260 realEmbeddings = new byte[paragraphLength]; 261 System.arraycopy(temp.offsetLevel, 0, realEmbeddings, 0, paragraphLength); 262 for (int i = 0; i < paragraphLength; i++) { 263 byte e = embeddings[i]; 264 if (e < 0) { 265 realEmbeddings[i] = (byte) (NativeBidi.UBIDI_LEVEL_OVERRIDE - e); 266 } else if (e > 0) { 267 realEmbeddings[i] = e; 268 } else { 269 realEmbeddings[i] |= (byte) NativeBidi.UBIDI_LEVEL_OVERRIDE; 270 } 271 } 272 } 273 } 274 275 if (flags > 1 || flags < -2) { 276 flags = 0; 277 } 278 279 long bidi = 0; 280 boolean needsDeletion = true; 281 try { 282 bidi = NativeBidi.ubidi_open(); 283 NativeBidi.ubidi_setPara(bidi, realText, paragraphLength, flags, realEmbeddings); 284 needsDeletion = false; 285 } finally { 286 if (needsDeletion) { 287 NativeBidi.ubidi_close(bidi); 288 } 289 } 290 return bidi; 291 } 292 293 /* private constructor used by createLineBidi() */ 294 private Bidi(long pBidi) { 295 readBidiInfo(pBidi); 296 } 297 298 // read info from the native UBiDi struct 299 private void readBidiInfo(long pBidi) { 300 length = NativeBidi.ubidi_getLength(pBidi); 301 302 offsetLevel = (length == 0) ? null : NativeBidi.ubidi_getLevels(pBidi); 303 304 baseLevel = NativeBidi.ubidi_getParaLevel(pBidi); 305 306 int runCount = NativeBidi.ubidi_countRuns(pBidi); 307 if (runCount == 0) { 308 unidirectional = true; 309 runs = null; 310 } else if (runCount < 0) { 311 runs = null; 312 } else { 313 runs = NativeBidi.ubidi_getRuns(pBidi); 314 315 // Simplified case for one run which has the base level 316 if (runCount == 1 && runs[0].getLevel() == baseLevel) { 317 unidirectional = true; 318 runs = null; 319 } 320 } 321 322 direction = NativeBidi.ubidi_getDirection(pBidi); 323 } 324 325 private int baseLevel; 326 327 private int length; 328 329 private byte[] offsetLevel; 330 331 private BidiRun[] runs; 332 333 private int direction; 334 335 private boolean unidirectional; 336 337 /** 338 * Returns whether the base level is from left to right. 339 * 340 * @return true if the base level is from left to right. 341 */ 342 public boolean baseIsLeftToRight() { 343 return baseLevel % 2 == 0 ? true : false; 344 } 345 346 /** 347 * Creates a new {@code Bidi} object containing the information of one line 348 * from this object. 349 * 350 * @param lineStart 351 * the start offset of the line. 352 * @param lineLimit 353 * the limit of the line. 354 * @return the new line Bidi object. In this new object, the indices will 355 * range from 0 to (limit - start - 1). 356 * @throws IllegalArgumentException 357 * if {@code lineStart < 0}, {@code lineLimit < 0}, {@code 358 * lineStart > lineLimit} or if {@code lineStart} is greater 359 * than the length of this object's paragraph text. 360 */ 361 public Bidi createLineBidi(int lineStart, int lineLimit) { 362 if (lineStart < 0 || lineLimit < 0 || lineLimit > length || lineStart > lineLimit) { 363 throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " + 364 "limit=" + lineLimit + ", length=" + length + ")"); 365 } 366 367 char[] text = new char[this.length]; 368 Arrays.fill(text, 'a'); 369 byte[] embeddings = new byte[this.length]; 370 for (int i = 0; i < embeddings.length; i++) { 371 embeddings[i] = (byte) -this.offsetLevel[i]; 372 } 373 374 int dir = this.baseIsLeftToRight() 375 ? Bidi.DIRECTION_LEFT_TO_RIGHT 376 : Bidi.DIRECTION_RIGHT_TO_LEFT; 377 long parent = 0; 378 try { 379 parent = createUBiDi(text, 0, embeddings, 0, this.length, dir); 380 if (lineStart == lineLimit) { 381 return createEmptyLineBidi(parent); 382 } 383 return new Bidi(NativeBidi.ubidi_setLine(parent, lineStart, lineLimit)); 384 } finally { 385 NativeBidi.ubidi_close(parent); 386 } 387 } 388 389 private Bidi createEmptyLineBidi(long parent) { 390 // ICU4C doesn't allow this case, but the RI does. 391 Bidi result = new Bidi(parent); 392 result.length = 0; 393 result.offsetLevel = null; 394 result.runs = null; 395 result.unidirectional = true; 396 return result; 397 } 398 399 /** 400 * Returns the base level. 401 * 402 * @return the base level. 403 */ 404 public int getBaseLevel() { 405 return baseLevel; 406 } 407 408 /** 409 * Returns the length of the text in the {@code Bidi} object. 410 * 411 * @return the length. 412 */ 413 public int getLength() { 414 return length; 415 } 416 417 /** 418 * Returns the level of a specified character. 419 * 420 * @param offset 421 * the offset of the character. 422 * @return the level. 423 */ 424 public int getLevelAt(int offset) { 425 try { 426 return offsetLevel[offset] & ~NativeBidi.UBIDI_LEVEL_OVERRIDE; 427 } catch (RuntimeException e) { 428 return baseLevel; 429 } 430 } 431 432 /** 433 * Returns the number of runs in the bidirectional text. 434 * 435 * @return the number of runs, at least 1. 436 */ 437 public int getRunCount() { 438 return unidirectional ? 1 : runs.length; 439 } 440 441 /** 442 * Returns the level of the specified run. 443 * 444 * @param run 445 * the index of the run. 446 * @return the level of the run. 447 */ 448 public int getRunLevel(int run) { 449 return unidirectional ? baseLevel : runs[run].getLevel(); 450 } 451 452 /** 453 * Returns the limit offset of the specified run. 454 * 455 * @param run 456 * the index of the run. 457 * @return the limit offset of the run. 458 */ 459 public int getRunLimit(int run) { 460 return unidirectional ? length : runs[run].getLimit(); 461 } 462 463 /** 464 * Returns the start offset of the specified run. 465 * 466 * @param run 467 * the index of the run. 468 * @return the start offset of the run. 469 */ 470 public int getRunStart(int run) { 471 return unidirectional ? 0 : runs[run].getStart(); 472 } 473 474 /** 475 * Indicates whether the text is from left to right, that is, both the base 476 * direction and the text direction is from left to right. 477 * 478 * @return {@code true} if the text is from left to right; {@code false} 479 * otherwise. 480 */ 481 public boolean isLeftToRight() { 482 return direction == NativeBidi.UBiDiDirection_UBIDI_LTR; 483 } 484 485 /** 486 * Indicates whether the text direction is mixed. 487 * 488 * @return {@code true} if the text direction is mixed; {@code false} 489 * otherwise. 490 */ 491 public boolean isMixed() { 492 return direction == NativeBidi.UBiDiDirection_UBIDI_MIXED; 493 } 494 495 /** 496 * Indicates whether the text is from right to left, that is, both the base 497 * direction and the text direction is from right to left. 498 * 499 * @return {@code true} if the text is from right to left; {@code false} 500 * otherwise. 501 */ 502 public boolean isRightToLeft() { 503 return direction == NativeBidi.UBiDiDirection_UBIDI_RTL; 504 } 505 506 /** 507 * Reorders a range of objects according to their specified levels. This is 508 * a convenience function that does not use a {@code Bidi} object. The range 509 * of objects at {@code index} from {@code objectStart} to {@code 510 * objectStart + count} will be reordered according to the range of levels 511 * at {@code index} from {@code levelStart} to {@code levelStart + count}. 512 * 513 * @param levels 514 * the level array, which is already determined. 515 * @param levelStart 516 * the start offset of the range of the levels. 517 * @param objects 518 * the object array to reorder. 519 * @param objectStart 520 * the start offset of the range of objects. 521 * @param count 522 * the count of the range of objects to reorder. 523 * @throws IllegalArgumentException 524 * if {@code count}, {@code levelStart} or {@code objectStart} 525 * is negative; if {@code count > levels.length - levelStart} or 526 * if {@code count > objects.length - objectStart}. 527 */ 528 public static void reorderVisually(byte[] levels, int levelStart, 529 Object[] objects, int objectStart, int count) { 530 if (count < 0 || levelStart < 0 || objectStart < 0 531 || count > levels.length - levelStart 532 || count > objects.length - objectStart) { 533 throw new IllegalArgumentException("Invalid ranges (levels=" + levels.length + 534 ", levelStart=" + levelStart + ", objects=" + objects.length + 535 ", objectStart=" + objectStart + ", count=" + count + ")"); 536 } 537 538 byte[] realLevels = new byte[count]; 539 System.arraycopy(levels, levelStart, realLevels, 0, count); 540 541 int[] indices = NativeBidi.ubidi_reorderVisual(realLevels, count); 542 543 ArrayList<Object> result = new ArrayList<Object>(count); 544 for (int i = 0; i < count; i++) { 545 result.add(objects[objectStart + indices[i]]); 546 } 547 548 System.arraycopy(result.toArray(), 0, objects, objectStart, count); 549 } 550 551 /** 552 * Indicates whether a range of characters of a text requires a {@code Bidi} 553 * object to display properly. 554 * 555 * @param text 556 * the char array of the text. 557 * @param start 558 * the start offset of the range of characters. 559 * @param limit 560 * the limit offset of the range of characters. 561 * @return {@code true} if the range of characters requires a {@code Bidi} 562 * object; {@code false} otherwise. 563 * @throws IllegalArgumentException 564 * if {@code start} or {@code limit} is negative; {@code start > 565 * limit} or {@code limit} is greater than the length of this 566 * object's paragraph text. 567 */ 568 public static boolean requiresBidi(char[] text, int start, int limit) { 569 if (limit < 0 || start < 0 || start > limit || limit > text.length) { 570 throw new IllegalArgumentException(); 571 } 572 573 Bidi bidi = new Bidi(text, start, null, 0, limit - start, 0); 574 return !bidi.isLeftToRight(); 575 } 576 577 /** 578 * Returns the internal message of the {@code Bidi} object, used in 579 * debugging. 580 * 581 * @return a string containing the internal message. 582 */ 583 @Override 584 public String toString() { 585 return getClass().getName() 586 + "[direction: " + direction + " baseLevel: " + baseLevel 587 + " length: " + length + " runs: " + Arrays.toString(runs) + "]"; 588 } 589 } 590