Home | History | Annotate | Download | only in text
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package java.text;
     19 
     20 import java.awt.font.NumericShaper;
     21 import java.awt.font.TextAttribute;
     22 import java.util.ArrayList;
     23 import java.util.Arrays;
     24 
     25 /**
     26  * Implements the <a href="http://unicode.org/reports/tr9/">Unicode Bidirectional Algorithm</a>.
     27  *
     28  * <p>Use a {@code Bidi} object to get the information on the position reordering of a
     29  * bidirectional text, such as Arabic or Hebrew. The natural display ordering of
     30  * horizontal text in these languages is from right to left, while they order
     31  * numbers from left to right.
     32  *
     33  * <p>If the text contains multiple runs, the information of each run can be
     34  * obtained from the run index. The level of any particular run indicates the
     35  * direction of the text as well as the nesting level. Left-to-right runs have
     36  * even levels while right-to-left runs have odd levels.
     37  */
     38 public final class Bidi {
     39     /**
     40      * Constant that indicates the default base level. If there is no strong
     41      * character, then set the paragraph level to 0 (left-to-right).
     42      */
     43     public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
     44 
     45     /**
     46      * Constant that indicates the default base level. If there is no strong
     47      * character, then set the paragraph level to 1 (right-to-left).
     48      */
     49     public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
     50 
     51     /**
     52      * Constant that specifies the default base level as 0 (left-to-right).
     53      */
     54     public static final int DIRECTION_LEFT_TO_RIGHT = 0;
     55 
     56     /**
     57      * Constant that specifies the default base level as 1 (right-to-left).
     58      */
     59     public static final int DIRECTION_RIGHT_TO_LEFT = 1;
     60 
     61     /**
     62      * TODO: if we care about performance, we might just want to use an int[] instead of a Run[].
     63      */
     64     static class Run {
     65         private final int start;
     66         private final int limit;
     67         private final int level;
     68 
     69         public Run(int start, int limit, int level) {
     70             this.start = start;
     71             this.limit = limit;
     72             this.level = level;
     73         }
     74 
     75         public int getLevel() {
     76             return level;
     77         }
     78 
     79         public int getLimit() {
     80             return limit;
     81         }
     82 
     83         public int getStart() {
     84             return start;
     85         }
     86     }
     87 
     88     /**
     89      * Creates a {@code Bidi} object from the {@code
     90      * AttributedCharacterIterator} of a paragraph text. The RUN_DIRECTION
     91      * attribute determines the base direction of the bidirectional text. If it
     92      * is not specified explicitly, the algorithm uses
     93      * DIRECTION_DEFAULT_LEFT_TO_RIGHT by default. The BIDI_EMBEDDING attribute
     94      * specifies the level of embedding for each character. Values between -1
     95      * and -62 denote overrides at the level's absolute value, values from 1 to
     96      * 62 indicate embeddings, and the 0 value indicates the level is calculated
     97      * by the algorithm automatically. For the character with no BIDI_EMBEDDING
     98      * attribute or with a improper attribute value, such as a {@code null}
     99      * value, the algorithm treats its embedding level as 0. The NUMERIC_SHAPING
    100      * attribute specifies the instance of NumericShaper used to convert
    101      * European digits to other decimal digits before performing the bidi
    102      * algorithm.
    103      *
    104      * @param paragraph
    105      *            the String containing the paragraph text to perform the
    106      *            algorithm.
    107      * @throws IllegalArgumentException if {@code paragraph == null}
    108      * @see java.awt.font.TextAttribute#BIDI_EMBEDDING
    109      * @see java.awt.font.TextAttribute#NUMERIC_SHAPING
    110      * @see java.awt.font.TextAttribute#RUN_DIRECTION
    111      */
    112     public Bidi(AttributedCharacterIterator paragraph) {
    113         if (paragraph == null) {
    114             throw new IllegalArgumentException("paragraph is null");
    115         }
    116 
    117         int begin = paragraph.getBeginIndex();
    118         int end = paragraph.getEndIndex();
    119         int length = end - begin;
    120         char[] text = new char[length + 1]; // One more char for AttributedCharacterIterator.DONE
    121 
    122         if (length != 0) {
    123             text[0] = paragraph.first();
    124         } else {
    125             paragraph.first();
    126         }
    127 
    128         // First check the RUN_DIRECTION attribute.
    129         int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
    130         Object direction = paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
    131         if (direction != null && direction instanceof Boolean) {
    132             if (direction.equals(TextAttribute.RUN_DIRECTION_LTR)) {
    133                 flags = DIRECTION_LEFT_TO_RIGHT;
    134             } else {
    135                 flags = DIRECTION_RIGHT_TO_LEFT;
    136             }
    137         }
    138 
    139         // Retrieve the text and gather BIDI_EMBEDDINGS
    140         byte[] embeddings = null;
    141         for (int textLimit = 1, i = 1; i < length; textLimit = paragraph
    142                 .getRunLimit(TextAttribute.BIDI_EMBEDDING)
    143                 - begin + 1) {
    144             Object embedding = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
    145             if (embedding != null && embedding instanceof Integer) {
    146                 int embLevel = ((Integer) embedding).intValue();
    147 
    148                 if (embeddings == null) {
    149                     embeddings = new byte[length];
    150                 }
    151 
    152                 for (; i < textLimit; i++) {
    153                     text[i] = paragraph.next();
    154                     embeddings[i - 1] = (byte) embLevel;
    155                 }
    156             } else {
    157                 for (; i < textLimit; i++) {
    158                     text[i] = paragraph.next();
    159                 }
    160             }
    161         }
    162 
    163         // Apply NumericShaper to the text
    164         Object numericShaper = paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
    165         if (numericShaper != null && numericShaper instanceof NumericShaper) {
    166             ((NumericShaper) numericShaper).shape(text, 0, length);
    167         }
    168 
    169         long bidi = 0;
    170         try {
    171             bidi = createUBiDi(text, 0, embeddings, 0, length, flags);
    172             readBidiInfo(bidi);
    173         } finally {
    174             ubidi_close(bidi);
    175         }
    176     }
    177 
    178     /**
    179      * Creates a {@code Bidi} object.
    180      *
    181      * @param text
    182      *            the char array of the paragraph text that is processed.
    183      * @param textStart
    184      *            the index in {@code text} of the start of the paragraph.
    185      * @param embeddings
    186      *            the embedding level array of the paragraph text, specifying
    187      *            the embedding level information for each character. Values
    188      *            between -1 and -61 denote overrides at the level's absolute
    189      *            value, values from 1 to 61 indicate embeddings, and the 0
    190      *            value indicates the level is calculated by the algorithm
    191      *            automatically.
    192      * @param embStart
    193      *            the index in {@code embeddings} of the start of the paragraph.
    194      * @param paragraphLength
    195      *            the length of the text to perform the algorithm.
    196      * @param flags
    197      *            indicates the base direction of the bidirectional text. It is
    198      *            expected that this will be one of the direction constant
    199      *            values defined in this class. An unknown value is treated as
    200      *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
    201      * @throws IllegalArgumentException
    202      *             if {@code textStart}, {@code embStart}, or {@code
    203      *             paragraphLength} is negative; if
    204      *             {@code text.length < textStart + paragraphLength} or
    205      *             {@code embeddings.length < embStart + paragraphLength}.
    206      * @see #DIRECTION_LEFT_TO_RIGHT
    207      * @see #DIRECTION_RIGHT_TO_LEFT
    208      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
    209      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
    210      */
    211     public Bidi(char[] text, int textStart, byte[] embeddings, int embStart,
    212             int paragraphLength, int flags) {
    213 
    214         if (text == null || text.length - textStart < paragraphLength) {
    215             throw new IllegalArgumentException();
    216         }
    217 
    218         if (embeddings != null) {
    219             if (embeddings.length - embStart < paragraphLength) {
    220                 throw new IllegalArgumentException();
    221             }
    222         }
    223 
    224         if (textStart < 0) {
    225             throw new IllegalArgumentException("Negative textStart value " + textStart);
    226         }
    227         if (embStart < 0) {
    228             throw new IllegalArgumentException("Negative embStart value " + embStart);
    229         }
    230         if (paragraphLength < 0) {
    231             throw new IllegalArgumentException("Negative paragraph length " + paragraphLength);
    232         }
    233 
    234         long bidi = 0;
    235         try {
    236             bidi = createUBiDi(text, textStart, embeddings, embStart, paragraphLength, flags);
    237             readBidiInfo(bidi);
    238         } finally {
    239             ubidi_close(bidi);
    240         }
    241     }
    242 
    243     /**
    244      * Creates a {@code Bidi} object.
    245      *
    246      * @param paragraph
    247      *            the string containing the paragraph text to perform the
    248      *            algorithm on.
    249      * @param flags
    250      *            indicates the base direction of the bidirectional text. It is
    251      *            expected that this will be one of the direction constant
    252      *            values defined in this class. An unknown value is treated as
    253      *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
    254      * @see #DIRECTION_LEFT_TO_RIGHT
    255      * @see #DIRECTION_RIGHT_TO_LEFT
    256      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
    257      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
    258      */
    259     public Bidi(String paragraph, int flags) {
    260         this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0,
    261                 (paragraph == null ? 0 : paragraph.length()), flags);
    262     }
    263 
    264     // create the native UBiDi struct, need to be closed with ubidi_close().
    265     private static long createUBiDi(char[] text, int textStart,
    266             byte[] embeddings, int embStart, int paragraphLength, int flags) {
    267         char[] realText = null;
    268 
    269         byte[] realEmbeddings = null;
    270 
    271         if (text == null || text.length - textStart < paragraphLength) {
    272             throw new IllegalArgumentException();
    273         }
    274         realText = new char[paragraphLength];
    275         System.arraycopy(text, textStart, realText, 0, paragraphLength);
    276 
    277         if (embeddings != null) {
    278             if (embeddings.length - embStart < paragraphLength) {
    279                 throw new IllegalArgumentException();
    280             }
    281             if (paragraphLength > 0) {
    282                 Bidi temp = new Bidi(text, textStart, null, 0, paragraphLength, flags);
    283                 realEmbeddings = new byte[paragraphLength];
    284                 System.arraycopy(temp.offsetLevel, 0, realEmbeddings, 0, paragraphLength);
    285                 for (int i = 0; i < paragraphLength; i++) {
    286                     byte e = embeddings[i];
    287                     if (e < 0) {
    288                         realEmbeddings[i] = (byte) (UBIDI_LEVEL_OVERRIDE - e);
    289                     } else if (e > 0) {
    290                         realEmbeddings[i] = e;
    291                     } else {
    292                         realEmbeddings[i] |= (byte) UBIDI_LEVEL_OVERRIDE;
    293                     }
    294                 }
    295             }
    296         }
    297 
    298         if (flags > 1 || flags < -2) {
    299             flags = 0;
    300         }
    301 
    302         long bidi = 0;
    303         boolean needsDeletion = true;
    304         try {
    305             bidi = ubidi_open();
    306             ubidi_setPara(bidi, realText, paragraphLength, flags, realEmbeddings);
    307             needsDeletion = false;
    308         } finally {
    309             if (needsDeletion) {
    310                 ubidi_close(bidi);
    311             }
    312         }
    313         return bidi;
    314     }
    315 
    316     /* private constructor used by createLineBidi() */
    317     private Bidi(long pBidi) {
    318         readBidiInfo(pBidi);
    319     }
    320 
    321     // read info from the native UBiDi struct
    322     private void readBidiInfo(long pBidi) {
    323         length = ubidi_getLength(pBidi);
    324 
    325         offsetLevel = (length == 0) ? null : ubidi_getLevels(pBidi);
    326 
    327         baseLevel = ubidi_getParaLevel(pBidi);
    328 
    329         int runCount = ubidi_countRuns(pBidi);
    330         if (runCount == 0) {
    331             unidirectional = true;
    332             runs = null;
    333         } else if (runCount < 0) {
    334             runs = null;
    335         } else {
    336             runs = ubidi_getRuns(pBidi);
    337 
    338             // Simplified case for one run which has the base level
    339             if (runCount == 1 && runs[0].getLevel() == baseLevel) {
    340                 unidirectional = true;
    341                 runs = null;
    342             }
    343         }
    344 
    345         direction = ubidi_getDirection(pBidi);
    346     }
    347 
    348     private int baseLevel;
    349 
    350     private int length;
    351 
    352     private byte[] offsetLevel;
    353 
    354     private Run[] runs;
    355 
    356     private int direction;
    357 
    358     private boolean unidirectional;
    359 
    360     /**
    361      * Returns whether the base level is from left to right.
    362      *
    363      * @return true if the base level is from left to right.
    364      */
    365     public boolean baseIsLeftToRight() {
    366         return baseLevel % 2 == 0 ? true : false;
    367     }
    368 
    369     /**
    370      * Creates a new {@code Bidi} object containing the information of one line
    371      * from this object.
    372      *
    373      * @param lineStart
    374      *            the start offset of the line.
    375      * @param lineLimit
    376      *            the limit of the line.
    377      * @return the new line Bidi object. In this new object, the indices will
    378      *         range from 0 to (limit - start - 1).
    379      * @throws IllegalArgumentException
    380      *             if {@code lineStart < 0}, {@code lineLimit < 0}, {@code
    381      *             lineStart > lineLimit} or if {@code lineStart} is greater
    382      *             than the length of this object's paragraph text.
    383      */
    384     public Bidi createLineBidi(int lineStart, int lineLimit) {
    385         if (lineStart < 0 || lineLimit < 0 || lineLimit > length || lineStart > lineLimit) {
    386             throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " +
    387                     "limit=" + lineLimit + ", length=" + length + ")");
    388         }
    389 
    390         char[] text = new char[this.length];
    391         Arrays.fill(text, 'a');
    392         byte[] embeddings = new byte[this.length];
    393         for (int i = 0; i < embeddings.length; i++) {
    394             embeddings[i] = (byte) -this.offsetLevel[i];
    395         }
    396 
    397         int dir = this.baseIsLeftToRight()
    398                 ? Bidi.DIRECTION_LEFT_TO_RIGHT
    399                 : Bidi.DIRECTION_RIGHT_TO_LEFT;
    400         long parent = 0;
    401         try {
    402             parent = createUBiDi(text, 0, embeddings, 0, this.length, dir);
    403             if (lineStart == lineLimit) {
    404                 return createEmptyLineBidi(parent);
    405             }
    406             return new Bidi(ubidi_setLine(parent, lineStart, lineLimit));
    407         } finally {
    408             ubidi_close(parent);
    409         }
    410     }
    411 
    412     private Bidi createEmptyLineBidi(long parent) {
    413         // ICU4C doesn't allow this case, but the RI does.
    414         Bidi result = new Bidi(parent);
    415         result.length = 0;
    416         result.offsetLevel = null;
    417         result.runs = null;
    418         result.unidirectional = true;
    419         return result;
    420     }
    421 
    422     /**
    423      * Returns the base level.
    424      *
    425      * @return the base level.
    426      */
    427     public int getBaseLevel() {
    428         return baseLevel;
    429     }
    430 
    431     /**
    432      * Returns the length of the text in the {@code Bidi} object.
    433      *
    434      * @return the length.
    435      */
    436     public int getLength() {
    437         return length;
    438     }
    439 
    440     /**
    441      * Returns the level of a specified character.
    442      *
    443      * @param offset
    444      *            the offset of the character.
    445      * @return the level.
    446      */
    447     public int getLevelAt(int offset) {
    448         try {
    449             return offsetLevel[offset] & ~UBIDI_LEVEL_OVERRIDE;
    450         } catch (RuntimeException e) {
    451             return baseLevel;
    452         }
    453     }
    454 
    455     /**
    456      * Returns the number of runs in the bidirectional text.
    457      *
    458      * @return the number of runs, at least 1.
    459      */
    460     public int getRunCount() {
    461         return unidirectional ? 1 : runs.length;
    462     }
    463 
    464     /**
    465      * Returns the level of the specified run.
    466      *
    467      * @param run
    468      *            the index of the run.
    469      * @return the level of the run.
    470      */
    471     public int getRunLevel(int run) {
    472         return unidirectional ? baseLevel : runs[run].getLevel();
    473     }
    474 
    475     /**
    476      * Returns the limit offset of the specified run.
    477      *
    478      * @param run
    479      *            the index of the run.
    480      * @return the limit offset of the run.
    481      */
    482     public int getRunLimit(int run) {
    483         return unidirectional ? length : runs[run].getLimit();
    484     }
    485 
    486     /**
    487      * Returns the start offset of the specified run.
    488      *
    489      * @param run
    490      *            the index of the run.
    491      * @return the start offset of the run.
    492      */
    493     public int getRunStart(int run) {
    494         return unidirectional ? 0 : runs[run].getStart();
    495     }
    496 
    497     /**
    498      * Indicates whether the text is from left to right, that is, both the base
    499      * direction and the text direction is from left to right.
    500      *
    501      * @return {@code true} if the text is from left to right; {@code false}
    502      *         otherwise.
    503      */
    504     public boolean isLeftToRight() {
    505         return direction == UBiDiDirection_UBIDI_LTR;
    506     }
    507 
    508     /**
    509      * Indicates whether the text direction is mixed.
    510      *
    511      * @return {@code true} if the text direction is mixed; {@code false}
    512      *         otherwise.
    513      */
    514     public boolean isMixed() {
    515         return direction == UBiDiDirection_UBIDI_MIXED;
    516     }
    517 
    518     /**
    519      * Indicates whether the text is from right to left, that is, both the base
    520      * direction and the text direction is from right to left.
    521      *
    522      * @return {@code true} if the text is from right to left; {@code false}
    523      *         otherwise.
    524      */
    525     public boolean isRightToLeft() {
    526         return direction == UBiDiDirection_UBIDI_RTL;
    527     }
    528 
    529     /**
    530      * Reorders a range of objects according to their specified levels. This is
    531      * a convenience function that does not use a {@code Bidi} object. The range
    532      * of objects at {@code index} from {@code objectStart} to {@code
    533      * objectStart + count} will be reordered according to the range of levels
    534      * at {@code index} from {@code levelStart} to {@code levelStart + count}.
    535      *
    536      * @param levels
    537      *            the level array, which is already determined.
    538      * @param levelStart
    539      *            the start offset of the range of the levels.
    540      * @param objects
    541      *            the object array to reorder.
    542      * @param objectStart
    543      *            the start offset of the range of objects.
    544      * @param count
    545      *            the count of the range of objects to reorder.
    546      * @throws IllegalArgumentException
    547      *             if {@code count}, {@code levelStart} or {@code objectStart}
    548      *             is negative; if {@code count > levels.length - levelStart} or
    549      *             if {@code count > objects.length - objectStart}.
    550      */
    551     public static void reorderVisually(byte[] levels, int levelStart,
    552             Object[] objects, int objectStart, int count) {
    553         if (count < 0 || levelStart < 0 || objectStart < 0
    554                 || count > levels.length - levelStart
    555                 || count > objects.length - objectStart) {
    556             throw new IllegalArgumentException("Invalid ranges (levels=" + levels.length +
    557                     ", levelStart=" + levelStart + ", objects=" + objects.length +
    558                     ", objectStart=" + objectStart + ", count=" + count + ")");
    559         }
    560 
    561         byte[] realLevels = new byte[count];
    562         System.arraycopy(levels, levelStart, realLevels, 0, count);
    563 
    564         int[] indices = ubidi_reorderVisual(realLevels, count);
    565 
    566         ArrayList<Object> result = new ArrayList<Object>(count);
    567         for (int i = 0; i < count; i++) {
    568             result.add(objects[objectStart + indices[i]]);
    569         }
    570 
    571         System.arraycopy(result.toArray(), 0, objects, objectStart, count);
    572     }
    573 
    574     /**
    575      * Indicates whether a range of characters of a text requires a {@code Bidi}
    576      * object to display properly.
    577      *
    578      * @param text
    579      *            the char array of the text.
    580      * @param start
    581      *            the start offset of the range of characters.
    582      * @param limit
    583      *            the limit offset of the range of characters.
    584      * @return {@code true} if the range of characters requires a {@code Bidi}
    585      *         object; {@code false} otherwise.
    586      * @throws IllegalArgumentException
    587      *             if {@code start} or {@code limit} is negative; {@code start >
    588      *             limit} or {@code limit} is greater than the length of this
    589      *             object's paragraph text.
    590      */
    591     public static boolean requiresBidi(char[] text, int start, int limit) {
    592         if (limit < 0 || start < 0 || start > limit || limit > text.length) {
    593             throw new IllegalArgumentException();
    594         }
    595 
    596         Bidi bidi = new Bidi(text, start, null, 0, limit - start, 0);
    597         return !bidi.isLeftToRight();
    598     }
    599 
    600     @Override
    601     public String toString() {
    602         return getClass().getName()
    603                 + "[direction: " + direction + " baseLevel: " + baseLevel
    604                 + " length: " + length + " runs: " + Arrays.toString(runs) + "]";
    605     }
    606 
    607     // ICU4C constants.
    608     private static final int UBIDI_LEVEL_OVERRIDE = 0x80;
    609     private static final int UBiDiDirection_UBIDI_LTR = 0;
    610     private static final int UBiDiDirection_UBIDI_RTL = 1;
    611     private static final int UBiDiDirection_UBIDI_MIXED = 2;
    612 
    613     // ICU4C functions.
    614     private static native long ubidi_open();
    615     private static native void ubidi_close(long pBiDi);
    616     private static native void ubidi_setPara(long pBiDi, char[] text, int length, int paraLevel, byte[] embeddingLevels);
    617     private static native long ubidi_setLine(final long pParaBiDi, int start, int limit);
    618     private static native int ubidi_getDirection(final long pBiDi);
    619     private static native int ubidi_getLength(final long pBiDi);
    620     private static native byte ubidi_getParaLevel(final long pBiDi);
    621     private static native byte[] ubidi_getLevels(long pBiDi);
    622     private static native int ubidi_countRuns(long pBiDi);
    623     private static native Bidi.Run[] ubidi_getRuns(long pBidi);
    624     private static native int[] ubidi_reorderVisual(byte[] levels, int length);
    625 }
    626