Home | History | Annotate | Download | only in text
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package java.text;
     19 
     20 import java.awt.font.NumericShaper;
     21 import java.awt.font.TextAttribute;
     22 import java.util.ArrayList;
     23 import java.util.Arrays;
     24 import org.apache.harmony.text.BidiRun;
     25 import org.apache.harmony.text.NativeBidi;
     26 
     27 /**
     28  * Provides the Unicode Bidirectional Algorithm. The algorithm is
     29  * defined in the Unicode Standard Annex #9, version 13, also described in The
     30  * Unicode Standard, Version 4.0 .
     31  *
     32  * Use a {@code Bidi} object to get the information on the position reordering of a
     33  * bidirectional text, such as Arabic or Hebrew. The natural display ordering of
     34  * horizontal text in these languages is from right to left, while they order
     35  * numbers from left to right.
     36  *
     37  * If the text contains multiple runs, the information of each run can be
     38  * obtained from the run index. The level of any particular run indicates the
     39  * direction of the text as well as the nesting level. Left-to-right runs have
     40  * even levels while right-to-left runs have odd levels.
     41  */
     42 public final class Bidi {
     43     /**
     44      * Constant that indicates the default base level. If there is no strong
     45      * character, then set the paragraph level to 0 (left-to-right).
     46      */
     47     public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
     48 
     49     /**
     50      * Constant that indicates the default base level. If there is no strong
     51      * character, then set the paragraph level to 1 (right-to-left).
     52      */
     53     public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
     54 
     55     /**
     56      * Constant that specifies the default base level as 0 (left-to-right).
     57      */
     58     public static final int DIRECTION_LEFT_TO_RIGHT = 0;
     59 
     60     /**
     61      * Constant that specifies the default base level as 1 (right-to-left).
     62      */
     63     public static final int DIRECTION_RIGHT_TO_LEFT = 1;
     64 
     65     /**
     66      * Creates a {@code Bidi} object from the {@code
     67      * AttributedCharacterIterator} of a paragraph text. The RUN_DIRECTION
     68      * attribute determines the base direction of the bidirectional text. If it
     69      * is not specified explicitly, the algorithm uses
     70      * DIRECTION_DEFAULT_LEFT_TO_RIGHT by default. The BIDI_EMBEDDING attribute
     71      * specifies the level of embedding for each character. Values between -1
     72      * and -62 denote overrides at the level's absolute value, values from 1 to
     73      * 62 indicate embeddings, and the 0 value indicates the level is calculated
     74      * by the algorithm automatically. For the character with no BIDI_EMBEDDING
     75      * attribute or with a improper attribute value, such as a {@code null}
     76      * value, the algorithm treats its embedding level as 0. The NUMERIC_SHAPING
     77      * attribute specifies the instance of NumericShaper used to convert
     78      * European digits to other decimal digits before performing the bidi
     79      * algorithm.
     80      *
     81      * @param paragraph
     82      *            the String containing the paragraph text to perform the
     83      *            algorithm.
     84      * @throws IllegalArgumentException if {@code paragraph == null}
     85      * @see java.awt.font.TextAttribute#BIDI_EMBEDDING
     86      * @see java.awt.font.TextAttribute#NUMERIC_SHAPING
     87      * @see java.awt.font.TextAttribute#RUN_DIRECTION
     88      */
     89     public Bidi(AttributedCharacterIterator paragraph) {
     90         if (paragraph == null) {
     91             throw new IllegalArgumentException("paragraph is null");
     92         }
     93 
     94         int begin = paragraph.getBeginIndex();
     95         int end = paragraph.getEndIndex();
     96         int length = end - begin;
     97         char[] text = new char[length + 1]; // One more char for AttributedCharacterIterator.DONE
     98 
     99         if (length != 0) {
    100             text[0] = paragraph.first();
    101         } else {
    102             paragraph.first();
    103         }
    104 
    105         // First check the RUN_DIRECTION attribute.
    106         int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
    107         Object direction = paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
    108         if (direction != null && direction instanceof Boolean) {
    109             if (direction.equals(TextAttribute.RUN_DIRECTION_LTR)) {
    110                 flags = DIRECTION_LEFT_TO_RIGHT;
    111             } else {
    112                 flags = DIRECTION_RIGHT_TO_LEFT;
    113             }
    114         }
    115 
    116         // Retrieve the text and gather BIDI_EMBEDDINGS
    117         byte[] embeddings = null;
    118         for (int textLimit = 1, i = 1; i < length; textLimit = paragraph
    119                 .getRunLimit(TextAttribute.BIDI_EMBEDDING)
    120                 - begin + 1) {
    121             Object embedding = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
    122             if (embedding != null && embedding instanceof Integer) {
    123                 int embLevel = ((Integer) embedding).intValue();
    124 
    125                 if (embeddings == null) {
    126                     embeddings = new byte[length];
    127                 }
    128 
    129                 for (; i < textLimit; i++) {
    130                     text[i] = paragraph.next();
    131                     embeddings[i - 1] = (byte) embLevel;
    132                 }
    133             } else {
    134                 for (; i < textLimit; i++) {
    135                     text[i] = paragraph.next();
    136                 }
    137             }
    138         }
    139 
    140         // Apply NumericShaper to the text
    141         Object numericShaper = paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
    142         if (numericShaper != null && numericShaper instanceof NumericShaper) {
    143             ((NumericShaper) numericShaper).shape(text, 0, length);
    144         }
    145 
    146         long bidi = 0;
    147         try {
    148             bidi = createUBiDi(text, 0, embeddings, 0, length, flags);
    149             readBidiInfo(bidi);
    150         } finally {
    151             NativeBidi.ubidi_close(bidi);
    152         }
    153     }
    154 
    155     /**
    156      * Creates a {@code Bidi} object.
    157      *
    158      * @param text
    159      *            the char array of the paragraph text that is processed.
    160      * @param textStart
    161      *            the index in {@code text} of the start of the paragraph.
    162      * @param embeddings
    163      *            the embedding level array of the paragraph text, specifying
    164      *            the embedding level information for each character. Values
    165      *            between -1 and -61 denote overrides at the level's absolute
    166      *            value, values from 1 to 61 indicate embeddings, and the 0
    167      *            value indicates the level is calculated by the algorithm
    168      *            automatically.
    169      * @param embStart
    170      *            the index in {@code embeddings} of the start of the paragraph.
    171      * @param paragraphLength
    172      *            the length of the text to perform the algorithm.
    173      * @param flags
    174      *            indicates the base direction of the bidirectional text. It is
    175      *            expected that this will be one of the direction constant
    176      *            values defined in this class. An unknown value is treated as
    177      *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
    178      * @throws IllegalArgumentException
    179      *             if {@code textStart}, {@code embStart}, or {@code
    180      *             paragraphLength} is negative; if
    181      *             {@code text.length < textStart + paragraphLength} or
    182      *             {@code embeddings.length < embStart + paragraphLength}.
    183      * @see #DIRECTION_LEFT_TO_RIGHT
    184      * @see #DIRECTION_RIGHT_TO_LEFT
    185      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
    186      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
    187      */
    188     public Bidi(char[] text, int textStart, byte[] embeddings, int embStart,
    189             int paragraphLength, int flags) {
    190 
    191         if (text == null || text.length - textStart < paragraphLength) {
    192             throw new IllegalArgumentException();
    193         }
    194 
    195         if (embeddings != null) {
    196             if (embeddings.length - embStart < paragraphLength) {
    197                 throw new IllegalArgumentException();
    198             }
    199         }
    200 
    201         if (textStart < 0) {
    202             throw new IllegalArgumentException("Negative textStart value " + textStart);
    203         }
    204         if (embStart < 0) {
    205             throw new IllegalArgumentException("Negative embStart value " + embStart);
    206         }
    207         if (paragraphLength < 0) {
    208             throw new IllegalArgumentException("Negative paragraph length " + paragraphLength);
    209         }
    210 
    211         long bidi = 0;
    212         try {
    213             bidi = createUBiDi(text, textStart, embeddings, embStart, paragraphLength, flags);
    214             readBidiInfo(bidi);
    215         } finally {
    216             NativeBidi.ubidi_close(bidi);
    217         }
    218     }
    219 
    220     /**
    221      * Creates a {@code Bidi} object.
    222      *
    223      * @param paragraph
    224      *            the string containing the paragraph text to perform the
    225      *            algorithm on.
    226      * @param flags
    227      *            indicates the base direction of the bidirectional text. It is
    228      *            expected that this will be one of the direction constant
    229      *            values defined in this class. An unknown value is treated as
    230      *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
    231      * @see #DIRECTION_LEFT_TO_RIGHT
    232      * @see #DIRECTION_RIGHT_TO_LEFT
    233      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
    234      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
    235      */
    236     public Bidi(String paragraph, int flags) {
    237         this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0,
    238                 (paragraph == null ? 0 : paragraph.length()), flags);
    239     }
    240 
    241     // create the native UBiDi struct, need to be closed with ubidi_close().
    242     private static long createUBiDi(char[] text, int textStart,
    243             byte[] embeddings, int embStart, int paragraphLength, int flags) {
    244         char[] realText = null;
    245 
    246         byte[] realEmbeddings = null;
    247 
    248         if (text == null || text.length - textStart < paragraphLength) {
    249             throw new IllegalArgumentException();
    250         }
    251         realText = new char[paragraphLength];
    252         System.arraycopy(text, textStart, realText, 0, paragraphLength);
    253 
    254         if (embeddings != null) {
    255             if (embeddings.length - embStart < paragraphLength) {
    256                 throw new IllegalArgumentException();
    257             }
    258             if (paragraphLength > 0) {
    259                 Bidi temp = new Bidi(text, textStart, null, 0, paragraphLength, flags);
    260                 realEmbeddings = new byte[paragraphLength];
    261                 System.arraycopy(temp.offsetLevel, 0, realEmbeddings, 0, paragraphLength);
    262                 for (int i = 0; i < paragraphLength; i++) {
    263                     byte e = embeddings[i];
    264                     if (e < 0) {
    265                         realEmbeddings[i] = (byte) (NativeBidi.UBIDI_LEVEL_OVERRIDE - e);
    266                     } else if (e > 0) {
    267                         realEmbeddings[i] = e;
    268                     } else {
    269                         realEmbeddings[i] |= (byte) NativeBidi.UBIDI_LEVEL_OVERRIDE;
    270                     }
    271                 }
    272             }
    273         }
    274 
    275         if (flags > 1 || flags < -2) {
    276             flags = 0;
    277         }
    278 
    279         long bidi = 0;
    280         boolean needsDeletion = true;
    281         try {
    282             bidi = NativeBidi.ubidi_open();
    283             NativeBidi.ubidi_setPara(bidi, realText, paragraphLength, flags, realEmbeddings);
    284             needsDeletion = false;
    285         } finally {
    286             if (needsDeletion) {
    287                 NativeBidi.ubidi_close(bidi);
    288             }
    289         }
    290         return bidi;
    291     }
    292 
    293     /* private constructor used by createLineBidi() */
    294     private Bidi(long pBidi) {
    295         readBidiInfo(pBidi);
    296     }
    297 
    298     // read info from the native UBiDi struct
    299     private void readBidiInfo(long pBidi) {
    300         length = NativeBidi.ubidi_getLength(pBidi);
    301 
    302         offsetLevel = (length == 0) ? null : NativeBidi.ubidi_getLevels(pBidi);
    303 
    304         baseLevel = NativeBidi.ubidi_getParaLevel(pBidi);
    305 
    306         int runCount = NativeBidi.ubidi_countRuns(pBidi);
    307         if (runCount == 0) {
    308             unidirectional = true;
    309             runs = null;
    310         } else if (runCount < 0) {
    311             runs = null;
    312         } else {
    313             runs = NativeBidi.ubidi_getRuns(pBidi);
    314 
    315             // Simplified case for one run which has the base level
    316             if (runCount == 1 && runs[0].getLevel() == baseLevel) {
    317                 unidirectional = true;
    318                 runs = null;
    319             }
    320         }
    321 
    322         direction = NativeBidi.ubidi_getDirection(pBidi);
    323     }
    324 
    325     private int baseLevel;
    326 
    327     private int length;
    328 
    329     private byte[] offsetLevel;
    330 
    331     private BidiRun[] runs;
    332 
    333     private int direction;
    334 
    335     private boolean unidirectional;
    336 
    337     /**
    338      * Returns whether the base level is from left to right.
    339      *
    340      * @return true if the base level is from left to right.
    341      */
    342     public boolean baseIsLeftToRight() {
    343         return baseLevel % 2 == 0 ? true : false;
    344     }
    345 
    346     /**
    347      * Creates a new {@code Bidi} object containing the information of one line
    348      * from this object.
    349      *
    350      * @param lineStart
    351      *            the start offset of the line.
    352      * @param lineLimit
    353      *            the limit of the line.
    354      * @return the new line Bidi object. In this new object, the indices will
    355      *         range from 0 to (limit - start - 1).
    356      * @throws IllegalArgumentException
    357      *             if {@code lineStart < 0}, {@code lineLimit < 0}, {@code
    358      *             lineStart > lineLimit} or if {@code lineStart} is greater
    359      *             than the length of this object's paragraph text.
    360      */
    361     public Bidi createLineBidi(int lineStart, int lineLimit) {
    362         if (lineStart < 0 || lineLimit < 0 || lineLimit > length || lineStart > lineLimit) {
    363             throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " +
    364                     "limit=" + lineLimit + ", length=" + length + ")");
    365         }
    366 
    367         char[] text = new char[this.length];
    368         Arrays.fill(text, 'a');
    369         byte[] embeddings = new byte[this.length];
    370         for (int i = 0; i < embeddings.length; i++) {
    371             embeddings[i] = (byte) -this.offsetLevel[i];
    372         }
    373 
    374         int dir = this.baseIsLeftToRight()
    375                 ? Bidi.DIRECTION_LEFT_TO_RIGHT
    376                 : Bidi.DIRECTION_RIGHT_TO_LEFT;
    377         long parent = 0;
    378         try {
    379             parent = createUBiDi(text, 0, embeddings, 0, this.length, dir);
    380             if (lineStart == lineLimit) {
    381                 return createEmptyLineBidi(parent);
    382             }
    383             return new Bidi(NativeBidi.ubidi_setLine(parent, lineStart, lineLimit));
    384         } finally {
    385             NativeBidi.ubidi_close(parent);
    386         }
    387     }
    388 
    389     private Bidi createEmptyLineBidi(long parent) {
    390         // ICU4C doesn't allow this case, but the RI does.
    391         Bidi result = new Bidi(parent);
    392         result.length = 0;
    393         result.offsetLevel = null;
    394         result.runs = null;
    395         result.unidirectional = true;
    396         return result;
    397     }
    398 
    399     /**
    400      * Returns the base level.
    401      *
    402      * @return the base level.
    403      */
    404     public int getBaseLevel() {
    405         return baseLevel;
    406     }
    407 
    408     /**
    409      * Returns the length of the text in the {@code Bidi} object.
    410      *
    411      * @return the length.
    412      */
    413     public int getLength() {
    414         return length;
    415     }
    416 
    417     /**
    418      * Returns the level of a specified character.
    419      *
    420      * @param offset
    421      *            the offset of the character.
    422      * @return the level.
    423      */
    424     public int getLevelAt(int offset) {
    425         try {
    426             return offsetLevel[offset] & ~NativeBidi.UBIDI_LEVEL_OVERRIDE;
    427         } catch (RuntimeException e) {
    428             return baseLevel;
    429         }
    430     }
    431 
    432     /**
    433      * Returns the number of runs in the bidirectional text.
    434      *
    435      * @return the number of runs, at least 1.
    436      */
    437     public int getRunCount() {
    438         return unidirectional ? 1 : runs.length;
    439     }
    440 
    441     /**
    442      * Returns the level of the specified run.
    443      *
    444      * @param run
    445      *            the index of the run.
    446      * @return the level of the run.
    447      */
    448     public int getRunLevel(int run) {
    449         return unidirectional ? baseLevel : runs[run].getLevel();
    450     }
    451 
    452     /**
    453      * Returns the limit offset of the specified run.
    454      *
    455      * @param run
    456      *            the index of the run.
    457      * @return the limit offset of the run.
    458      */
    459     public int getRunLimit(int run) {
    460         return unidirectional ? length : runs[run].getLimit();
    461     }
    462 
    463     /**
    464      * Returns the start offset of the specified run.
    465      *
    466      * @param run
    467      *            the index of the run.
    468      * @return the start offset of the run.
    469      */
    470     public int getRunStart(int run) {
    471         return unidirectional ? 0 : runs[run].getStart();
    472     }
    473 
    474     /**
    475      * Indicates whether the text is from left to right, that is, both the base
    476      * direction and the text direction is from left to right.
    477      *
    478      * @return {@code true} if the text is from left to right; {@code false}
    479      *         otherwise.
    480      */
    481     public boolean isLeftToRight() {
    482         return direction == NativeBidi.UBiDiDirection_UBIDI_LTR;
    483     }
    484 
    485     /**
    486      * Indicates whether the text direction is mixed.
    487      *
    488      * @return {@code true} if the text direction is mixed; {@code false}
    489      *         otherwise.
    490      */
    491     public boolean isMixed() {
    492         return direction == NativeBidi.UBiDiDirection_UBIDI_MIXED;
    493     }
    494 
    495     /**
    496      * Indicates whether the text is from right to left, that is, both the base
    497      * direction and the text direction is from right to left.
    498      *
    499      * @return {@code true} if the text is from right to left; {@code false}
    500      *         otherwise.
    501      */
    502     public boolean isRightToLeft() {
    503         return direction == NativeBidi.UBiDiDirection_UBIDI_RTL;
    504     }
    505 
    506     /**
    507      * Reorders a range of objects according to their specified levels. This is
    508      * a convenience function that does not use a {@code Bidi} object. The range
    509      * of objects at {@code index} from {@code objectStart} to {@code
    510      * objectStart + count} will be reordered according to the range of levels
    511      * at {@code index} from {@code levelStart} to {@code levelStart + count}.
    512      *
    513      * @param levels
    514      *            the level array, which is already determined.
    515      * @param levelStart
    516      *            the start offset of the range of the levels.
    517      * @param objects
    518      *            the object array to reorder.
    519      * @param objectStart
    520      *            the start offset of the range of objects.
    521      * @param count
    522      *            the count of the range of objects to reorder.
    523      * @throws IllegalArgumentException
    524      *             if {@code count}, {@code levelStart} or {@code objectStart}
    525      *             is negative; if {@code count > levels.length - levelStart} or
    526      *             if {@code count > objects.length - objectStart}.
    527      */
    528     public static void reorderVisually(byte[] levels, int levelStart,
    529             Object[] objects, int objectStart, int count) {
    530         if (count < 0 || levelStart < 0 || objectStart < 0
    531                 || count > levels.length - levelStart
    532                 || count > objects.length - objectStart) {
    533             throw new IllegalArgumentException("Invalid ranges (levels=" + levels.length +
    534                     ", levelStart=" + levelStart + ", objects=" + objects.length +
    535                     ", objectStart=" + objectStart + ", count=" + count + ")");
    536         }
    537 
    538         byte[] realLevels = new byte[count];
    539         System.arraycopy(levels, levelStart, realLevels, 0, count);
    540 
    541         int[] indices = NativeBidi.ubidi_reorderVisual(realLevels, count);
    542 
    543         ArrayList<Object> result = new ArrayList<Object>(count);
    544         for (int i = 0; i < count; i++) {
    545             result.add(objects[objectStart + indices[i]]);
    546         }
    547 
    548         System.arraycopy(result.toArray(), 0, objects, objectStart, count);
    549     }
    550 
    551     /**
    552      * Indicates whether a range of characters of a text requires a {@code Bidi}
    553      * object to display properly.
    554      *
    555      * @param text
    556      *            the char array of the text.
    557      * @param start
    558      *            the start offset of the range of characters.
    559      * @param limit
    560      *            the limit offset of the range of characters.
    561      * @return {@code true} if the range of characters requires a {@code Bidi}
    562      *         object; {@code false} otherwise.
    563      * @throws IllegalArgumentException
    564      *             if {@code start} or {@code limit} is negative; {@code start >
    565      *             limit} or {@code limit} is greater than the length of this
    566      *             object's paragraph text.
    567      */
    568     public static boolean requiresBidi(char[] text, int start, int limit) {
    569         if (limit < 0 || start < 0 || start > limit || limit > text.length) {
    570             throw new IllegalArgumentException();
    571         }
    572 
    573         Bidi bidi = new Bidi(text, start, null, 0, limit - start, 0);
    574         return !bidi.isLeftToRight();
    575     }
    576 
    577     /**
    578      * Returns the internal message of the {@code Bidi} object, used in
    579      * debugging.
    580      *
    581      * @return a string containing the internal message.
    582      */
    583     @Override
    584     public String toString() {
    585         return getClass().getName()
    586                 + "[direction: " + direction + " baseLevel: " + baseLevel
    587                 + " length: " + length + " runs: " + Arrays.toString(runs) + "]";
    588     }
    589 }
    590