Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 /*
     28  * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved
     29  *
     30  * The original version of this source code and documentation is
     31  * copyrighted and owned by IBM. These materials are provided
     32  * under terms of a License Agreement between IBM and Sun.
     33  * This technology is protected by multiple US and International
     34  * patents. This notice and attribution to IBM may not be removed.
     35  */
     36 
     37 package java.text;
     38 
     39 /**
     40  * This class implements the Unicode Bidirectional Algorithm.
     41  * <p>
     42  * A Bidi object provides information on the bidirectional reordering of the text
     43  * used to create it.  This is required, for example, to properly display Arabic
     44  * or Hebrew text.  These languages are inherently mixed directional, as they order
     45  * numbers from left-to-right while ordering most other text from right-to-left.
     46  * <p>
     47  * Once created, a Bidi object can be queried to see if the text it represents is
     48  * all left-to-right or all right-to-left.  Such objects are very lightweight and
     49  * this text is relatively easy to process.
     50  * <p>
     51  * If there are multiple runs of text, information about the runs can be accessed
     52  * by indexing to get the start, limit, and level of a run.  The level represents
     53  * both the direction and the 'nesting level' of a directional run.  Odd levels
     54  * are right-to-left, while even levels are left-to-right.  So for example level
     55  * 0 represents left-to-right text, while level 1 represents right-to-left text, and
     56  * level 2 represents left-to-right text embedded in a right-to-left run.
     57  *
     58  * @since 1.4
     59  */
     60 public final class Bidi {
     61 
     62     /** Constant indicating base direction is left-to-right. */
     63     public static final int DIRECTION_LEFT_TO_RIGHT = 0;
     64 
     65     /** Constant indicating base direction is right-to-left. */
     66     public static final int DIRECTION_RIGHT_TO_LEFT = 1;
     67 
     68     /**
     69      * Constant indicating that the base direction depends on the first strong
     70      * directional character in the text according to the Unicode
     71      * Bidirectional Algorithm.  If no strong directional character is present,
     72      * the base direction is left-to-right.
     73      */
     74     public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
     75 
     76     /**
     77      * Constant indicating that the base direction depends on the first strong
     78      * directional character in the text according to the Unicode
     79      * Bidirectional Algorithm.  If no strong directional character is present,
     80      * the base direction is right-to-left.
     81      */
     82     public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
     83 
     84     private static int translateConstToIcu(int javaInt) {
     85         switch (javaInt) {
     86             case DIRECTION_DEFAULT_LEFT_TO_RIGHT:
     87                 return android.icu.text.Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT;
     88             case DIRECTION_DEFAULT_RIGHT_TO_LEFT:
     89                 return android.icu.text.Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT;
     90             case DIRECTION_LEFT_TO_RIGHT:
     91                 return android.icu.text.Bidi.DIRECTION_LEFT_TO_RIGHT;
     92             case DIRECTION_RIGHT_TO_LEFT:
     93                 return android.icu.text.Bidi.DIRECTION_RIGHT_TO_LEFT;
     94             // If the parameter was unrecognized use LEFT_TO_RIGHT.
     95             default:
     96                 return android.icu.text.Bidi.DIRECTION_LEFT_TO_RIGHT;
     97         }
     98     }
     99 
    100     private android.icu.text.Bidi bidiBase;
    101 
    102     /**
    103      * Create Bidi from the given paragraph of text and base direction.
    104      * @param paragraph a paragraph of text
    105      * @param flags a collection of flags that control the algorithm.  The
    106      * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
    107      * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
    108      * Other values are reserved.
    109      */
    110     public Bidi(String paragraph, int flags) {
    111         this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0,
    112                 (paragraph == null ? 0 : paragraph.length()), flags);
    113     }
    114 
    115     /**
    116      * Create Bidi from the given paragraph of text.
    117      * <p>
    118      * The RUN_DIRECTION attribute in the text, if present, determines the base
    119      * direction (left-to-right or right-to-left).  If not present, the base
    120      * direction is computes using the Unicode Bidirectional Algorithm, defaulting to left-to-right
    121      * if there are no strong directional characters in the text.  This attribute, if
    122      * present, must be applied to all the text in the paragraph.
    123      * <p>
    124      * The BIDI_EMBEDDING attribute in the text, if present, represents embedding level
    125      * information.  Negative values from -1 to -62 indicate overrides at the absolute value
    126      * of the level.  Positive values from 1 to 62 indicate embeddings.  Where values are
    127      * zero or not defined, the base embedding level as determined by the base direction
    128      * is assumed.
    129      * <p>
    130      * The NUMERIC_SHAPING attribute in the text, if present, converts European digits to
    131      * other decimal digits before running the bidi algorithm.  This attribute, if present,
    132      * must be applied to all the text in the paragraph.
    133      *
    134      * @param paragraph a paragraph of text with optional character and paragraph attribute information
    135      *
    136      * @see java.awt.font.TextAttribute#BIDI_EMBEDDING
    137      * @see java.awt.font.TextAttribute#NUMERIC_SHAPING
    138      * @see java.awt.font.TextAttribute#RUN_DIRECTION
    139      */
    140     public Bidi(AttributedCharacterIterator paragraph) {
    141         if (paragraph == null) {
    142             throw new IllegalArgumentException("paragraph is null");
    143         }
    144 
    145         this.bidiBase = new android.icu.text.Bidi(paragraph);
    146     }
    147 
    148     /**
    149      * Create Bidi from the given text, embedding, and direction information.
    150      * The embeddings array may be null.  If present, the values represent embedding level
    151      * information.  Negative values from -1 to -61 indicate overrides at the absolute value
    152      * of the level.  Positive values from 1 to 61 indicate embeddings.  Where values are
    153      * zero, the base embedding level as determined by the base direction is assumed.
    154      * @param text an array containing the paragraph of text to process.
    155      * @param textStart the index into the text array of the start of the paragraph.
    156      * @param embeddings an array containing embedding values for each character in the paragraph.
    157      * This can be null, in which case it is assumed that there is no external embedding information.
    158      * @param embStart the index into the embedding array of the start of the paragraph.
    159      * @param paragraphLength the length of the paragraph in the text and embeddings arrays.
    160      * @param flags a collection of flags that control the algorithm.  The
    161      * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
    162      * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
    163      * Other values are reserved.
    164      */
    165     public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags) {
    166         if (text == null) {
    167             throw new IllegalArgumentException("text is null");
    168         }
    169         if (paragraphLength < 0) {
    170             throw new IllegalArgumentException("bad length: " + paragraphLength);
    171         }
    172         if (textStart < 0 || paragraphLength > text.length - textStart) {
    173             throw new IllegalArgumentException("bad range: " + textStart +
    174                                                " length: " + paragraphLength +
    175                                                " for text of length: " + text.length);
    176         }
    177         if (embeddings != null && (embStart < 0 || paragraphLength > embeddings.length - embStart)) {
    178             throw new IllegalArgumentException("bad range: " + embStart +
    179                                                " length: " + paragraphLength +
    180                                                " for embeddings of length: " + text.length);
    181         }
    182 
    183         bidiBase = new android.icu.text.Bidi(text, textStart, embeddings, embStart,
    184                                              paragraphLength, translateConstToIcu(flags));
    185     }
    186 
    187     private Bidi(android.icu.text.Bidi bidiBase) {
    188         this.bidiBase = bidiBase;
    189     }
    190 
    191     /**
    192      * Create a Bidi object representing the bidi information on a line of text within
    193      * the paragraph represented by the current Bidi.  This call is not required if the
    194      * entire paragraph fits on one line.
    195      *
    196      * @param lineStart the offset from the start of the paragraph to the start of the line.
    197      * @param lineLimit the offset from the start of the paragraph to the limit of the line.
    198      * @return a {@code Bidi} object
    199      */
    200     public Bidi createLineBidi(int lineStart, int lineLimit) {
    201         if (lineStart < 0 || lineLimit < 0 || lineStart > lineLimit || lineLimit > getLength()) {
    202             throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " +
    203                                                "limit=" + lineLimit + ", length=" + getLength() + ")");
    204         }
    205 
    206         // In the special case where the start and end positions are the same, we return a new bidi
    207         // instance which is empty. Note that the default constructor for an empty ICU4J bidi
    208         // instance is not the same as passing in empty values. This way allows one to call
    209         // .getLength() for example and return a correct value instead of an IllegalStateException
    210         // being thrown, which happens in the case of using the empty constructor.
    211         if (lineStart == lineLimit) {
    212             return new Bidi(new android.icu.text.Bidi(new char[] {}, 0, new byte[] {}, 0, 0,
    213                                                       translateConstToIcu(DIRECTION_LEFT_TO_RIGHT)));
    214          }
    215 
    216         return new Bidi(bidiBase.createLineBidi(lineStart, lineLimit));
    217     }
    218 
    219     /**
    220      * Return true if the line is not left-to-right or right-to-left.  This means it either has mixed runs of left-to-right
    221      * and right-to-left text, or the base direction differs from the direction of the only run of text.
    222      *
    223      * @return true if the line is not left-to-right or right-to-left.
    224      */
    225     public boolean isMixed() {
    226         return bidiBase.isMixed();
    227     }
    228 
    229     /**
    230      * Return true if the line is all left-to-right text and the base direction is left-to-right.
    231      *
    232      * @return true if the line is all left-to-right text and the base direction is left-to-right
    233      */
    234     public boolean isLeftToRight() {
    235         return bidiBase.isLeftToRight();
    236     }
    237 
    238     /**
    239      * Return true if the line is all right-to-left text, and the base direction is right-to-left.
    240      * @return true if the line is all right-to-left text, and the base direction is right-to-left
    241      */
    242     public boolean isRightToLeft() {
    243         return bidiBase.isRightToLeft();
    244     }
    245 
    246     /**
    247      * Return the length of text in the line.
    248      * @return the length of text in the line
    249      */
    250     public int getLength() {
    251         return bidiBase.getLength();
    252     }
    253 
    254     /**
    255      * Return true if the base direction is left-to-right.
    256      * @return true if the base direction is left-to-right
    257      */
    258     public boolean baseIsLeftToRight() {
    259         return bidiBase.baseIsLeftToRight();
    260     }
    261 
    262     /**
    263      * Return the base level (0 if left-to-right, 1 if right-to-left).
    264      * @return the base level
    265      */
    266     public int getBaseLevel() {
    267         return bidiBase.getParaLevel();
    268     }
    269 
    270     /**
    271      * Return the resolved level of the character at offset.  If offset is
    272      * {@literal <} 0 or &ge; the length of the line, return the base direction
    273      * level.
    274      *
    275      * @param offset the index of the character for which to return the level
    276      * @return the resolved level of the character at offset
    277      */
    278     public int getLevelAt(int offset) {
    279         try {
    280             return bidiBase.getLevelAt(offset);
    281         } catch (IllegalArgumentException e) {
    282             return getBaseLevel();
    283         }
    284     }
    285 
    286     /**
    287      * Return the number of level runs.
    288      * @return the number of level runs
    289      */
    290     public int getRunCount() {
    291         int runCount = bidiBase.countRuns();
    292         return (runCount == 0 ? 1 : runCount);
    293     }
    294 
    295     /**
    296      * Return the level of the nth logical run in this line.
    297      * @param run the index of the run, between 0 and <code>getRunCount()</code>
    298      * @return the level of the run
    299      */
    300     public int getRunLevel(int run) {
    301         // Paper over a the ICU4J behaviour of strictly enforcing run must be strictly less than
    302         // the number of runs. Done to maintain compatibility with previous C implementation.
    303         if (run == getRunCount()) {
    304             return getBaseLevel();
    305         }
    306         return (bidiBase.countRuns() == 0 ? bidiBase.getBaseLevel() : bidiBase.getRunLevel(run));
    307     }
    308 
    309     /**
    310      * Return the index of the character at the start of the nth logical run in this line, as
    311      * an offset from the start of the line.
    312      * @param run the index of the run, between 0 and <code>getRunCount()</code>
    313      * @return the start of the run
    314      */
    315     public int getRunStart(int run) {
    316         // Paper over a the ICU4J behaviour of strictly enforcing run must be strictly less than
    317         // the number of runs. Done to maintain compatibility with previous C implementation.
    318         if (run == getRunCount()) {
    319             return getBaseLevel();
    320         }
    321         return (bidiBase.countRuns() == 0 ? 0 : bidiBase.getRunStart(run));
    322     }
    323 
    324     /**
    325      * Return the index of the character past the end of the nth logical run in this line, as
    326      * an offset from the start of the line.  For example, this will return the length
    327      * of the line for the last run on the line.
    328      * @param run the index of the run, between 0 and <code>getRunCount()</code>
    329      * @return limit the limit of the run
    330      */
    331     public int getRunLimit(int run) {
    332         // Paper over a the ICU4J behaviour of strictly enforcing run must be strictly less than
    333         // the number of runs. Done to maintain compatibility with previous C implementation.
    334         if (run == getRunCount()) {
    335             return getBaseLevel();
    336         }
    337         return (bidiBase.countRuns() == 0 ? bidiBase.getLength() : bidiBase.getRunLimit(run));
    338     }
    339 
    340     /**
    341      * Return true if the specified text requires bidi analysis.  If this returns false,
    342      * the text will display left-to-right.  Clients can then avoid constructing a Bidi object.
    343      * Text in the Arabic Presentation Forms area of Unicode is presumed to already be shaped
    344      * and ordered for display, and so will not cause this function to return true.
    345      *
    346      * @param text the text containing the characters to test
    347      * @param start the start of the range of characters to test
    348      * @param limit the limit of the range of characters to test
    349      * @return true if the range of characters requires bidi analysis
    350      */
    351     public static boolean requiresBidi(char[] text, int start, int limit) {
    352         if (0 > start || start > limit || limit > text.length) {
    353             throw new IllegalArgumentException("Value start " + start +
    354                                                " is out of range 0 to " + limit);
    355         }
    356         return android.icu.text.Bidi.requiresBidi(text, start, limit);
    357     }
    358 
    359     /**
    360      * Reorder the objects in the array into visual order based on their levels.
    361      * This is a utility function to use when you have a collection of objects
    362      * representing runs of text in logical order, each run containing text
    363      * at a single level.  The elements at <code>index</code> from
    364      * <code>objectStart</code> up to <code>objectStart + count</code>
    365      * in the objects array will be reordered into visual order assuming
    366      * each run of text has the level indicated by the corresponding element
    367      * in the levels array (at <code>index - objectStart + levelStart</code>).
    368      *
    369      * @param levels an array representing the bidi level of each object
    370      * @param levelStart the start position in the levels array
    371      * @param objects the array of objects to be reordered into visual order
    372      * @param objectStart the start position in the objects array
    373      * @param count the number of objects to reorder
    374      */
    375     public static void reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count) {
    376         if (0 > levelStart || levels.length <= levelStart) {
    377             throw new IllegalArgumentException("Value levelStart " +
    378                       levelStart + " is out of range 0 to " +
    379                       (levels.length-1));
    380         }
    381         if (0 > objectStart || objects.length <= objectStart) {
    382             throw new IllegalArgumentException("Value objectStart " +
    383                       levelStart + " is out of range 0 to " +
    384                       (objects.length-1));
    385         }
    386         if (0 > count || objects.length < (objectStart+count)) {
    387             throw new IllegalArgumentException("Value count " +
    388                       levelStart + " is out of range 0 to " +
    389                       (objects.length - objectStart));
    390         }
    391         android.icu.text.Bidi.reorderVisually(levels, levelStart, objects, objectStart, count);
    392     }
    393 
    394     /**
    395      * Display the bidi internal state, used in debugging.
    396      */
    397     public String toString() {
    398         return getClass().getName()
    399             + "[direction: " + bidiBase.getDirection() + " baseLevel: " + bidiBase.getBaseLevel()
    400             + " length: " + bidiBase.getLength() + " runs: " + bidiBase.getRunCount() + "]";
    401     }
    402 }
    403