Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 *   Copyright (C) 2001-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 */
     10 
     11 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
     12  * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
     13  * concept of RUNS_ONLY which is a double operation.
     14  * It could be advantageous to divide this into 3 concepts:
     15  * a) Operation: direct / inverse / RUNS_ONLY
     16  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
     17  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
     18  * This would allow combinations not possible today like RUNS_ONLY with
     19  * NUMBERS_SPECIAL.
     20  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
     21  * REMOVE_CONTROLS for the inverse step.
     22  * Not all combinations would be supported, and probably not all do make sense.
     23  * This would need to document which ones are supported and what are the
     24  * fallbacks for unsupported combinations.
     25  */
     26 
     27 //TODO: make sample program do something simple but real and complete
     28 
     29 package android.icu.text;
     30 
     31 import java.awt.font.NumericShaper;
     32 import java.awt.font.TextAttribute;
     33 import java.lang.reflect.Array;
     34 import java.text.AttributedCharacterIterator;
     35 import java.util.Arrays;
     36 
     37 import android.icu.impl.UBiDiProps;
     38 import android.icu.lang.UCharacter;
     39 import android.icu.lang.UCharacterDirection;
     40 import android.icu.lang.UProperty;
     41 
     42 /**
     43  *
     44  * <h2>Bidi algorithm for ICU</h2>
     45  *
     46  * This is an implementation of the Unicode Bidirectional Algorithm. The
     47  * algorithm is defined in the <a
     48  * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.
     49  * <p>
     50  *
     51  * Note: Libraries that perform a bidirectional algorithm and reorder strings
     52  * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
     53  * shaping (ArabicShaping) classes can be used at the core of such "Storage
     54  * Layout Engines".
     55  *
     56  * <h3>General remarks about the API:</h3>
     57  *
     58  * The &quot;limit&quot; of a sequence of characters is the position just after
     59  * their last character, i.e., one more than that position.
     60  * <p>
     61  *
     62  * Some of the API methods provide access to &quot;runs&quot;. Such a
     63  * &quot;run&quot; is defined as a sequence of characters that are at the same
     64  * embedding level after performing the Bidi algorithm.
     65  *
     66  * <h3>Basic concept: paragraph</h3>
     67  * A piece of text can be divided into several paragraphs by characters
     68  * with the Bidi class <code>Block Separator</code>. For handling of
     69  * paragraphs, see:
     70  * <ul>
     71  * <li>{@link #countParagraphs}
     72  * <li>{@link #getParaLevel}
     73  * <li>{@link #getParagraph}
     74  * <li>{@link #getParagraphByIndex}
     75  * </ul>
     76  *
     77  * <h3>Basic concept: text direction</h3>
     78  * The direction of a piece of text may be:
     79  * <ul>
     80  * <li>{@link #LTR}
     81  * <li>{@link #RTL}
     82  * <li>{@link #MIXED}
     83  * <li>{@link #NEUTRAL}
     84  * </ul>
     85  *
     86  * <h3>Basic concept: levels</h3>
     87  *
     88  * Levels in this API represent embedding levels according to the Unicode
     89  * Bidirectional Algorithm.
     90  * Their low-order bit (even/odd value) indicates the visual direction.<p>
     91  *
     92  * Levels can be abstract values when used for the
     93  * <code>paraLevel</code> and <code>embeddingLevels</code>
     94  * arguments of <code>setPara()</code>; there:
     95  * <ul>
     96  * <li>the high-order bit of an <code>embeddingLevels[]</code>
     97  * value indicates whether the using application is
     98  * specifying the level of a character to <i>override</i> whatever the
     99  * Bidi implementation would resolve it to.</li>
    100  * <li><code>paraLevel</code> can be set to the
    101  * pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
    102  * and <code>LEVEL_DEFAULT_RTL</code>.</li>
    103  * </ul>
    104  *
    105  * <p>The related constants are not real, valid level values.
    106  * <code>DEFAULT_XXX</code> can be used to specify
    107  * a default for the paragraph level for
    108  * when the <code>setPara()</code> method
    109  * shall determine it but there is no
    110  * strongly typed character in the input.<p>
    111  *
    112  * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
    113  * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
    114  * just like with normal LTR and RTL level values -
    115  * these special values are designed that way. Also, the implementation
    116  * assumes that MAX_EXPLICIT_LEVEL is odd.
    117  *
    118  * <b>See Also:</b>
    119  * <ul>
    120  * <li>{@link #LEVEL_DEFAULT_LTR}
    121  * <li>{@link #LEVEL_DEFAULT_RTL}
    122  * <li>{@link #LEVEL_OVERRIDE}
    123  * <li>{@link #MAX_EXPLICIT_LEVEL}
    124  * <li>{@link #setPara}
    125  * </ul>
    126  *
    127  * <h3>Basic concept: Reordering Mode</h3>
    128  * Reordering mode values indicate which variant of the Bidi algorithm to
    129  * use.
    130  *
    131  * <b>See Also:</b>
    132  * <ul>
    133  * <li>{@link #setReorderingMode}
    134  * <li>{@link #REORDER_DEFAULT}
    135  * <li>{@link #REORDER_NUMBERS_SPECIAL}
    136  * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
    137  * <li>{@link #REORDER_RUNS_ONLY}
    138  * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
    139  * <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
    140  * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
    141  * </ul>
    142  *
    143  * <h3>Basic concept: Reordering Options</h3>
    144  * Reordering options can be applied during Bidi text transformations.
    145  *
    146  * <b>See Also:</b>
    147  * <ul>
    148  * <li>{@link #setReorderingOptions}
    149  * <li>{@link #OPTION_DEFAULT}
    150  * <li>{@link #OPTION_INSERT_MARKS}
    151  * <li>{@link #OPTION_REMOVE_CONTROLS}
    152  * <li>{@link #OPTION_STREAMING}
    153  * </ul>
    154  *
    155  * <h4> Sample code for the ICU Bidi API </h4>
    156  *
    157  * <h5>Rendering a paragraph with the ICU Bidi API</h5>
    158  *
    159  * This is (hypothetical) sample code that illustrates how the ICU Bidi API
    160  * could be used to render a paragraph of text. Rendering code depends highly on
    161  * the graphics system, therefore this sample code must make a lot of
    162  * assumptions, which may or may not match any existing graphics system's
    163  * properties.
    164  *
    165  * <p>
    166  * The basic assumptions are:
    167  *
    168  * <ul>
    169  * <li>Rendering is done from left to right on a horizontal line.</li>
    170  * <li>A run of single-style, unidirectional text can be rendered at once.
    171  * </li>
    172  * <li>Such a run of text is passed to the graphics system with characters
    173  * (code units) in logical order.</li>
    174  * <li>The line-breaking algorithm is very complicated and Locale-dependent -
    175  * and therefore its implementation omitted from this sample code.</li>
    176  * </ul>
    177  *
    178  * <pre>
    179  *
    180  *  package android.icu.dev.test.bidi;
    181  *
    182  *  import android.icu.text.Bidi;
    183  *  import android.icu.text.BidiRun;
    184  *
    185  *  public class Sample {
    186  *
    187  *      static final int styleNormal = 0;
    188  *      static final int styleSelected = 1;
    189  *      static final int styleBold = 2;
    190  *      static final int styleItalics = 4;
    191  *      static final int styleSuper=8;
    192  *      static final int styleSub = 16;
    193  *
    194  *      static class StyleRun {
    195  *          int limit;
    196  *          int style;
    197  *
    198  *          public StyleRun(int limit, int style) {
    199  *              this.limit = limit;
    200  *              this.style = style;
    201  *          }
    202  *      }
    203  *
    204  *      static class Bounds {
    205  *          int start;
    206  *          int limit;
    207  *
    208  *          public Bounds(int start, int limit) {
    209  *              this.start = start;
    210  *              this.limit = limit;
    211  *          }
    212  *      }
    213  *
    214  *      static int getTextWidth(String text, int start, int limit,
    215  *                              StyleRun[] styleRuns, int styleRunCount) {
    216  *          // simplistic way to compute the width
    217  *          return limit - start;
    218  *      }
    219  *
    220  *      // set limit and StyleRun limit for a line
    221  *      // from text[start] and from styleRuns[styleRunStart]
    222  *      // using Bidi.getLogicalRun(...)
    223  *      // returns line width
    224  *      static int getLineBreak(String text, Bounds line, Bidi para,
    225  *                              StyleRun styleRuns[], Bounds styleRun) {
    226  *          // dummy return
    227  *          return 0;
    228  *      }
    229  *
    230  *      // render runs on a line sequentially, always from left to right
    231  *
    232  *      // prepare rendering a new line
    233  *      static void startLine(byte textDirection, int lineWidth) {
    234  *          System.out.println();
    235  *      }
    236  *
    237  *      // render a run of text and advance to the right by the run width
    238  *      // the text[start..limit-1] is always in logical order
    239  *      static void renderRun(String text, int start, int limit,
    240  *                            byte textDirection, int style) {
    241  *      }
    242  *
    243  *      // We could compute a cross-product
    244  *      // from the style runs with the directional runs
    245  *      // and then reorder it.
    246  *      // Instead, here we iterate over each run type
    247  *      // and render the intersections -
    248  *      // with shortcuts in simple (and common) cases.
    249  *      // renderParagraph() is the main function.
    250  *
    251  *      // render a directional run with
    252  *      // (possibly) multiple style runs intersecting with it
    253  *      static void renderDirectionalRun(String text, int start, int limit,
    254  *                                       byte direction, StyleRun styleRuns[],
    255  *                                       int styleRunCount) {
    256  *          int i;
    257  *
    258  *          // iterate over style runs
    259  *          if (direction == Bidi.LTR) {
    260  *              int styleLimit;
    261  *              for (i = 0; i &lt; styleRunCount; ++i) {
    262  *                  styleLimit = styleRuns[i].limit;
    263  *                  if (start &lt; styleLimit) {
    264  *                      if (styleLimit &gt; limit) {
    265  *                          styleLimit = limit;
    266  *                      }
    267  *                      renderRun(text, start, styleLimit,
    268  *                                direction, styleRuns[i].style);
    269  *                      if (styleLimit == limit) {
    270  *                          break;
    271  *                      }
    272  *                      start = styleLimit;
    273  *                  }
    274  *              }
    275  *          } else {
    276  *              int styleStart;
    277  *
    278  *              for (i = styleRunCount-1; i &gt;= 0; --i) {
    279  *                  if (i &gt; 0) {
    280  *                      styleStart = styleRuns[i-1].limit;
    281  *                  } else {
    282  *                      styleStart = 0;
    283  *                  }
    284  *                  if (limit &gt;= styleStart) {
    285  *                      if (styleStart &lt; start) {
    286  *                          styleStart = start;
    287  *                      }
    288  *                      renderRun(text, styleStart, limit, direction,
    289  *                                styleRuns[i].style);
    290  *                      if (styleStart == start) {
    291  *                          break;
    292  *                      }
    293  *                      limit = styleStart;
    294  *                  }
    295  *              }
    296  *          }
    297  *      }
    298  *
    299  *      // the line object represents text[start..limit-1]
    300  *      static void renderLine(Bidi line, String text, int start, int limit,
    301  *                             StyleRun styleRuns[], int styleRunCount) {
    302  *          byte direction = line.getDirection();
    303  *          if (direction != Bidi.MIXED) {
    304  *              // unidirectional
    305  *              if (styleRunCount &lt;= 1) {
    306  *                  renderRun(text, start, limit, direction, styleRuns[0].style);
    307  *              } else {
    308  *                  renderDirectionalRun(text, start, limit, direction,
    309  *                                       styleRuns, styleRunCount);
    310  *              }
    311  *          } else {
    312  *              // mixed-directional
    313  *              int count, i;
    314  *              BidiRun run;
    315  *
    316  *              try {
    317  *                  count = line.countRuns();
    318  *              } catch (IllegalStateException e) {
    319  *                  e.printStackTrace();
    320  *                  return;
    321  *              }
    322  *              if (styleRunCount &lt;= 1) {
    323  *                  int style = styleRuns[0].style;
    324  *
    325  *                  // iterate over directional runs
    326  *                  for (i = 0; i &lt; count; ++i) {
    327  *                      run = line.getVisualRun(i);
    328  *                      renderRun(text, run.getStart(), run.getLimit(),
    329  *                                run.getDirection(), style);
    330  *                  }
    331  *              } else {
    332  *                  // iterate over both directional and style runs
    333  *                  for (i = 0; i &lt; count; ++i) {
    334  *                      run = line.getVisualRun(i);
    335  *                      renderDirectionalRun(text, run.getStart(),
    336  *                                           run.getLimit(), run.getDirection(),
    337  *                                           styleRuns, styleRunCount);
    338  *                  }
    339  *              }
    340  *          }
    341  *      }
    342  *
    343  *      static void renderParagraph(String text, byte textDirection,
    344  *                                  StyleRun styleRuns[], int styleRunCount,
    345  *                                  int lineWidth) {
    346  *          int length = text.length();
    347  *          Bidi para = new Bidi();
    348  *          try {
    349  *              para.setPara(text,
    350  *                           textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
    351  *                                              : Bidi.LEVEL_DEFAULT_LTR,
    352  *                           null);
    353  *          } catch (Exception e) {
    354  *              e.printStackTrace();
    355  *              return;
    356  *          }
    357  *          byte paraLevel = (byte)(1 &amp; para.getParaLevel());
    358  *          StyleRun styleRun = new StyleRun(length, styleNormal);
    359  *
    360  *          if (styleRuns == null || styleRunCount &lt;= 0) {
    361  *              styleRuns = new StyleRun[1];
    362  *              styleRunCount = 1;
    363  *              styleRuns[0] = styleRun;
    364  *          }
    365  *          // assume styleRuns[styleRunCount-1].limit&gt;=length
    366  *
    367  *          int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
    368  *          if (width &lt;= lineWidth) {
    369  *              // everything fits onto one line
    370  *
    371  *              // prepare rendering a new line from either left or right
    372  *              startLine(paraLevel, width);
    373  *
    374  *              renderLine(para, text, 0, length, styleRuns, styleRunCount);
    375  *          } else {
    376  *              // we need to render several lines
    377  *              Bidi line = new Bidi(length, 0);
    378  *              int start = 0, limit;
    379  *              int styleRunStart = 0, styleRunLimit;
    380  *
    381  *              for (;;) {
    382  *                  limit = length;
    383  *                  styleRunLimit = styleRunCount;
    384  *                  width = getLineBreak(text, new Bounds(start, limit),
    385  *                                       para, styleRuns,
    386  *                                       new Bounds(styleRunStart, styleRunLimit));
    387  *                  try {
    388  *                      line = para.setLine(start, limit);
    389  *                  } catch (Exception e) {
    390  *                      e.printStackTrace();
    391  *                      return;
    392  *                  }
    393  *                  // prepare rendering a new line
    394  *                  // from either left or right
    395  *                  startLine(paraLevel, width);
    396  *
    397  *                  if (styleRunStart &gt; 0) {
    398  *                      int newRunCount = styleRuns.length - styleRunStart;
    399  *                      StyleRun[] newRuns = new StyleRun[newRunCount];
    400  *                      System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
    401  *                                       newRunCount);
    402  *                      renderLine(line, text, start, limit, newRuns,
    403  *                                 styleRunLimit - styleRunStart);
    404  *                  } else {
    405  *                      renderLine(line, text, start, limit, styleRuns,
    406  *                                 styleRunLimit - styleRunStart);
    407  *                  }
    408  *                  if (limit == length) {
    409  *                      break;
    410  *                  }
    411  *                  start = limit;
    412  *                  styleRunStart = styleRunLimit - 1;
    413  *                  if (start &gt;= styleRuns[styleRunStart].limit) {
    414  *                      ++styleRunStart;
    415  *                  }
    416  *              }
    417  *          }
    418  *      }
    419  *
    420  *      public static void main(String[] args)
    421  *      {
    422  *          renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
    423  *          renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
    424  *      }
    425  *  }
    426  *
    427  * </pre>
    428  *
    429  * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
    430  * @hide Only a subset of ICU is exposed in Android
    431  */
    432 
    433 /*
    434  * General implementation notes:
    435  *
    436  * Throughout the implementation, there are comments like (W2) that refer to
    437  * rules of the BiDi algorithm, in this example to the second rule of the
    438  * resolution of weak types.
    439  *
    440  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
    441  * character according to UTF-16, the second UChar gets the directional property of
    442  * the entire character assigned, while the first one gets a BN, a boundary
    443  * neutral, type, which is ignored by most of the algorithm according to
    444  * rule (X9) and the implementation suggestions of the BiDi algorithm.
    445  *
    446  * Later, adjustWSLevels() will set the level for each BN to that of the
    447  * following character (UChar), which results in surrogate pairs getting the
    448  * same level on each of their surrogates.
    449  *
    450  * In a UTF-8 implementation, the same thing could be done: the last byte of
    451  * a multi-byte sequence would get the "real" property, while all previous
    452  * bytes of that sequence would get BN.
    453  *
    454  * It is not possible to assign all those parts of a character the same real
    455  * property because this would fail in the resolution of weak types with rules
    456  * that look at immediately surrounding types.
    457  *
    458  * As a related topic, this implementation does not remove Boundary Neutral
    459  * types from the input, but ignores them wherever this is relevant.
    460  * For example, the loop for the resolution of the weak types reads
    461  * types until it finds a non-BN.
    462  * Also, explicit embedding codes are neither changed into BN nor removed.
    463  * They are only treated the same way real BNs are.
    464  * As stated before, adjustWSLevels() takes care of them at the end.
    465  * For the purpose of conformance, the levels of all these codes
    466  * do not matter.
    467  *
    468  * Note that this implementation modifies the dirProps
    469  * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
    470  * X6, N0 (replace paired brackets by L or R).
    471  *
    472  * In this implementation, the resolution of weak types (W1 to W6),
    473  * neutrals (N1 and N2), and the assignment of the resolved level (In)
    474  * are all done in one single loop, in resolveImplicitLevels().
    475  * Changes of dirProp values are done on the fly, without writing
    476  * them back to the dirProps array.
    477  *
    478  *
    479  * This implementation contains code that allows to bypass steps of the
    480  * algorithm that are not needed on the specific paragraph
    481  * in order to speed up the most common cases considerably,
    482  * like text that is entirely LTR, or RTL text without numbers.
    483  *
    484  * Most of this is done by setting a bit for each directional property
    485  * in a flags variable and later checking for whether there are
    486  * any LTR characters or any RTL characters, or both, whether
    487  * there are any explicit embedding codes, etc.
    488  *
    489  * If the (Xn) steps are performed, then the flags are re-evaluated,
    490  * because they will then not contain the embedding codes any more
    491  * and will be adjusted for override codes, so that subsequently
    492  * more bypassing may be possible than what the initial flags suggested.
    493  *
    494  * If the text is not mixed-directional, then the
    495  * algorithm steps for the weak type resolution are not performed,
    496  * and all levels are set to the paragraph level.
    497  *
    498  * If there are no explicit embedding codes, then the (Xn) steps
    499  * are not performed.
    500  *
    501  * If embedding levels are supplied as a parameter, then all
    502  * explicit embedding codes are ignored, and the (Xn) steps
    503  * are not performed.
    504  *
    505  * White Space types could get the level of the run they belong to,
    506  * and are checked with a test of (flags&MASK_EMBEDDING) to
    507  * consider if the paragraph direction should be considered in
    508  * the flags variable.
    509  *
    510  * If there are no White Space types in the paragraph, then
    511  * (L1) is not necessary in adjustWSLevels().
    512  */
    513 
    514 public class Bidi {
    515 
    516     static class Point {
    517         int pos;    /* position in text */
    518         int flag;   /* flag for LRM/RLM, before/after */
    519     }
    520 
    521     static class InsertPoints {
    522         int size;
    523         int confirmed;
    524         Point[] points = new Point[0];
    525     }
    526 
    527     static class Opening {
    528         int   position;                 /* position of opening bracket */
    529         int   match;                    /* matching char or -position of closing bracket */
    530         int   contextPos;               /* position of last strong char found before opening */
    531         short flags;                    /* bits for L or R/AL found within the pair */
    532         byte  contextDir;               /* L or R according to last strong char before opening */
    533     }
    534 
    535     static class IsoRun {
    536         int   contextPos;               /* position of char determining context */
    537         short start;                    /* index of first opening entry for this run */
    538         short limit;                    /* index after last opening entry for this run */
    539         byte  level;                    /* level of this run */
    540         byte  lastStrong;               /* bidi class of last strong char found in this run */
    541         byte  lastBase;                 /* bidi class of last base char found in this run */
    542         byte  contextDir;               /* L or R to use as context for following openings */
    543     }
    544 
    545     static class BracketData {
    546         Opening[] openings = new Opening[SIMPLE_OPENINGS_COUNT];
    547         int   isoRunLast;               /* index of last used entry */
    548         /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
    549            + 1 for index 0, + 1 for before the first isolated sequence */
    550         IsoRun[]  isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2];
    551         boolean   isNumbersSpecial;     /*reordering mode for NUMBERS_SPECIAL */
    552     }
    553 
    554     static class Isolate {
    555         int   startON;
    556         int   start1;
    557         short stateImp;
    558         short state;
    559     }
    560 
    561     /** Paragraph level setting<p>
    562      *
    563      * Constant indicating that the base direction depends on the first strong
    564      * directional character in the text according to the Unicode Bidirectional
    565      * Algorithm. If no strong directional character is present,
    566      * then set the paragraph level to 0 (left-to-right).<p>
    567      *
    568      * If this value is used in conjunction with reordering modes
    569      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
    570      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
    571      * is assumed to be visual LTR, and the text after reordering is required
    572      * to be the corresponding logical string with appropriate contextual
    573      * direction. The direction of the result string will be RTL if either
    574      * the rightmost or leftmost strong character of the source text is RTL
    575      * or Arabic Letter, the direction will be LTR otherwise.<p>
    576      *
    577      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
    578      * be added at the beginning of the result string to ensure round trip
    579      * (that the result string, when reordered back to visual, will produce
    580      * the original source text).
    581      * @see #REORDER_INVERSE_LIKE_DIRECT
    582      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
    583      */
    584     public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
    585 
    586     /** Paragraph level setting<p>
    587      *
    588      * Constant indicating that the base direction depends on the first strong
    589      * directional character in the text according to the Unicode Bidirectional
    590      * Algorithm. If no strong directional character is present,
    591      * then set the paragraph level to 1 (right-to-left).<p>
    592      *
    593      * If this value is used in conjunction with reordering modes
    594      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
    595      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
    596      * is assumed to be visual LTR, and the text after reordering is required
    597      * to be the corresponding logical string with appropriate contextual
    598      * direction. The direction of the result string will be RTL if either
    599      * the rightmost or leftmost strong character of the source text is RTL
    600      * or Arabic Letter, or if the text contains no strong character;
    601      * the direction will be LTR otherwise.<p>
    602      *
    603      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
    604      * be added at the beginning of the result string to ensure round trip
    605      * (that the result string, when reordered back to visual, will produce
    606      * the original source text).
    607      * @see #REORDER_INVERSE_LIKE_DIRECT
    608      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
    609      */
    610     public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
    611 
    612     /**
    613      * Maximum explicit embedding level.
    614      * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
    615      */
    616     public static final byte MAX_EXPLICIT_LEVEL = 125;
    617 
    618     /**
    619      * Bit flag for level input.
    620      * Overrides directional properties.
    621      */
    622     public static final byte LEVEL_OVERRIDE = (byte)0x80;
    623 
    624     /**
    625      * Special value which can be returned by the mapping methods when a
    626      * logical index has no corresponding visual index or vice-versa. This may
    627      * happen for the logical-to-visual mapping of a Bidi control when option
    628      * <code>OPTION_REMOVE_CONTROLS</code> is
    629      * specified. This can also happen for the visual-to-logical mapping of a
    630      * Bidi mark (LRM or RLM) inserted by option
    631      * <code>OPTION_INSERT_MARKS</code>.
    632      * @see #getVisualIndex
    633      * @see #getVisualMap
    634      * @see #getLogicalIndex
    635      * @see #getLogicalMap
    636      * @see #OPTION_INSERT_MARKS
    637      * @see #OPTION_REMOVE_CONTROLS
    638      */
    639     public static final int MAP_NOWHERE = -1;
    640 
    641     /**
    642      * Left-to-right text.
    643      * <ul>
    644      * <li>As return value for <code>getDirection()</code>, it means
    645      *     that the source string contains no right-to-left characters, or
    646      *     that the source string is empty and the paragraph level is even.
    647      * <li>As return value for <code>getBaseDirection()</code>, it
    648      *     means that the first strong character of the source string has
    649      *     a left-to-right direction.
    650      * </ul>
    651      */
    652     public static final byte LTR = 0;
    653 
    654     /**
    655      * Right-to-left text.
    656      * <ul>
    657      * <li>As return value for <code>getDirection()</code>, it means
    658      *     that the source string contains no left-to-right characters, or
    659      *     that the source string is empty and the paragraph level is odd.
    660      * <li>As return value for <code>getBaseDirection()</code>, it
    661      *     means that the first strong character of the source string has
    662      *     a right-to-left direction.
    663      * </ul>
    664      */
    665     public static final byte RTL = 1;
    666 
    667     /**
    668      * Mixed-directional text.
    669      * <p>As return value for <code>getDirection()</code>, it means
    670      *    that the source string contains both left-to-right and
    671      *    right-to-left characters.
    672      */
    673     public static final byte MIXED = 2;
    674 
    675     /**
    676      * No strongly directional text.
    677      * <p>As return value for <code>getBaseDirection()</code>, it means
    678      *    that the source string is missing or empty, or contains neither
    679      *    left-to-right nor right-to-left characters.
    680      */
    681     public static final byte NEUTRAL = 3;
    682 
    683     /**
    684      * option bit for writeReordered():
    685      * keep combining characters after their base characters in RTL runs
    686      *
    687      * @see #writeReordered
    688      */
    689     public static final short KEEP_BASE_COMBINING = 1;
    690 
    691     /**
    692      * option bit for writeReordered():
    693      * replace characters with the "mirrored" property in RTL runs
    694      * by their mirror-image mappings
    695      *
    696      * @see #writeReordered
    697      */
    698     public static final short DO_MIRRORING = 2;
    699 
    700     /**
    701      * option bit for writeReordered():
    702      * surround the run with LRMs if necessary;
    703      * this is part of the approximate "inverse Bidi" algorithm
    704      *
    705      * <p>This option does not imply corresponding adjustment of the index
    706      * mappings.
    707      *
    708      * @see #setInverse
    709      * @see #writeReordered
    710      */
    711     public static final short INSERT_LRM_FOR_NUMERIC = 4;
    712 
    713     /**
    714      * option bit for writeReordered():
    715      * remove Bidi control characters
    716      * (this does not affect INSERT_LRM_FOR_NUMERIC)
    717      *
    718      * <p>This option does not imply corresponding adjustment of the index
    719      * mappings.
    720      *
    721      * @see #writeReordered
    722      * @see #INSERT_LRM_FOR_NUMERIC
    723      */
    724     public static final short REMOVE_BIDI_CONTROLS = 8;
    725 
    726     /**
    727      * option bit for writeReordered():
    728      * write the output in reverse order
    729      *
    730      * <p>This has the same effect as calling <code>writeReordered()</code>
    731      * first without this option, and then calling
    732      * <code>writeReverse()</code> without mirroring.
    733      * Doing this in the same step is faster and avoids a temporary buffer.
    734      * An example for using this option is output to a character terminal that
    735      * is designed for RTL scripts and stores text in reverse order.
    736      *
    737      * @see #writeReordered
    738      */
    739     public static final short OUTPUT_REVERSE = 16;
    740 
    741     /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
    742      * @see #setReorderingMode
    743      */
    744     public static final short REORDER_DEFAULT = 0;
    745 
    746     /** Reordering mode: Logical to Visual algorithm which handles numbers in
    747      * a way which mimicks the behavior of Windows XP.
    748      * @see #setReorderingMode
    749      */
    750     public static final short REORDER_NUMBERS_SPECIAL = 1;
    751 
    752     /** Reordering mode: Logical to Visual algorithm grouping numbers with
    753      * adjacent R characters (reversible algorithm).
    754      * @see #setReorderingMode
    755      */
    756     public static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
    757 
    758     /** Reordering mode: Reorder runs only to transform a Logical LTR string
    759      * to the logical RTL string with the same display, or vice-versa.<br>
    760      * If this mode is set together with option
    761      * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
    762      * text may be removed and other controls may be added to produce the
    763      * minimum combination which has the required display.
    764      * @see #OPTION_INSERT_MARKS
    765      * @see #setReorderingMode
    766      */
    767     public static final short REORDER_RUNS_ONLY = 3;
    768 
    769     /** Reordering mode: Visual to Logical algorithm which handles numbers
    770      * like L (same algorithm as selected by <code>setInverse(true)</code>.
    771      * @see #setInverse
    772      * @see #setReorderingMode
    773      */
    774     public static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
    775 
    776     /** Reordering mode: Visual to Logical algorithm equivalent to the regular
    777      * Logical to Visual algorithm.
    778      * @see #setReorderingMode
    779      */
    780     public static final short REORDER_INVERSE_LIKE_DIRECT = 5;
    781 
    782     /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
    783      * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
    784      * @see #setReorderingMode
    785      */
    786     public static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
    787 
    788     /*  Number of values for reordering mode. */
    789     static final short REORDER_COUNT = 7;
    790 
    791     /* Reordering mode values must be ordered so that all the regular logical to
    792      * visual modes come first, and all inverse Bidi modes come last.
    793      */
    794     static final short REORDER_LAST_LOGICAL_TO_VISUAL =
    795             REORDER_NUMBERS_SPECIAL;
    796 
    797     /**
    798      * Option value for <code>setReorderingOptions</code>:
    799      * disable all the options which can be set with this method
    800      * @see #setReorderingOptions
    801      */
    802     public static final int OPTION_DEFAULT = 0;
    803 
    804     /**
    805      * Option bit for <code>setReorderingOptions</code>:
    806      * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
    807      * a reordering to a Logical order
    808      *
    809      * <p>This option must be set or reset before calling
    810      * <code>setPara</code>.
    811      *
    812      * <p>This option is significant only with reordering modes which generate
    813      * a result with Logical order, specifically.
    814      * <ul>
    815      *   <li><code>REORDER_RUNS_ONLY</code></li>
    816      *   <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
    817      *   <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
    818      *   <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
    819      * </ul>
    820      *
    821      * <p>If this option is set in conjunction with reordering mode
    822      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
    823      * <code>setInverse(true)</code>, it implies option
    824      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
    825      * <code>writeReordered()</code>.
    826      *
    827      * <p>For other reordering modes, a minimum number of LRM or RLM characters
    828      * will be added to the source text after reordering it so as to ensure
    829      * round trip, i.e. when applying the inverse reordering mode on the
    830      * resulting logical text with removal of Bidi marks
    831      * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
    832      * <code>setPara()</code> or option
    833      * <code>REMOVE_BIDI_CONTROLS</code> in
    834      * <code>writeReordered</code>), the result will be identical to the
    835      * source text in the first transformation.
    836      *
    837      * <p>This option will be ignored if specified together with option
    838      * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
    839      * <code>REMOVE_BIDI_CONTROLS</code> in calls to method
    840      * <code>writeReordered()</code> and it implies option
    841      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
    842      * <code>writeReordered()</code> if the reordering mode is
    843      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.
    844      *
    845      * @see #setReorderingMode
    846      * @see #setReorderingOptions
    847      * @see #INSERT_LRM_FOR_NUMERIC
    848      * @see #REMOVE_BIDI_CONTROLS
    849      * @see #OPTION_REMOVE_CONTROLS
    850      * @see #REORDER_RUNS_ONLY
    851      * @see #REORDER_INVERSE_NUMBERS_AS_L
    852      * @see #REORDER_INVERSE_LIKE_DIRECT
    853      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
    854      */
    855     public static final int OPTION_INSERT_MARKS = 1;
    856 
    857     /**
    858      * Option bit for <code>setReorderingOptions</code>:
    859      * remove Bidi control characters
    860      *
    861      * <p>This option must be set or reset before calling
    862      * <code>setPara</code>.
    863      *
    864      * <p>This option nullifies option
    865      * <code>OPTION_INSERT_MARKS</code>. It inhibits option
    866      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
    867      * <code>writeReordered()</code> and it implies option
    868      * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.
    869      *
    870      * @see #setReorderingMode
    871      * @see #setReorderingOptions
    872      * @see #OPTION_INSERT_MARKS
    873      * @see #INSERT_LRM_FOR_NUMERIC
    874      * @see #REMOVE_BIDI_CONTROLS
    875      */
    876     public static final int OPTION_REMOVE_CONTROLS = 2;
    877 
    878     /**
    879      * Option bit for <code>setReorderingOptions</code>:
    880      * process the output as part of a stream to be continued
    881      *
    882      * <p>This option must be set or reset before calling
    883      * <code>setPara</code>.
    884      *
    885      * <p>This option specifies that the caller is interested in processing
    886      * large text object in parts. The results of the successive calls are
    887      * expected to be concatenated by the caller. Only the call for the last
    888      * part will have this option bit off.
    889      *
    890      * <p>When this option bit is on, <code>setPara()</code> may process
    891      * less than the full source text in order to truncate the text at a
    892      * meaningful boundary. The caller should call
    893      * <code>getProcessedLength()</code> immediately after calling
    894      * <code>setPara()</code> in order to determine how much of the source
    895      * text has been processed. Source text beyond that length should be
    896      * resubmitted in following calls to <code>setPara</code>. The
    897      * processed length may be less than the length of the source text if a
    898      * character preceding the last character of the source text constitutes a
    899      * reasonable boundary (like a block separator) for text to be continued.<br>
    900      * If the last character of the source text constitutes a reasonable
    901      * boundary, the whole text will be processed at once.<br>
    902      * If nowhere in the source text there exists
    903      * such a reasonable boundary, the processed length will be zero.<br>
    904      * The caller should check for such an occurrence and do one of the following:
    905      * <ul><li>submit a larger amount of text with a better chance to include
    906      *         a reasonable boundary.</li>
    907      *     <li>resubmit the same text after turning off option
    908      *         <code>OPTION_STREAMING</code>.</li></ul>
    909      * In all cases, this option should be turned off before processing the last
    910      * part of the text.
    911      *
    912      * <p>When the <code>OPTION_STREAMING</code> option is used, it is
    913      * recommended to call <code>orderParagraphsLTR(true)</code> before calling
    914      * <code>setPara()</code> so that later paragraphs may be concatenated to
    915      * previous paragraphs on the right.
    916      *
    917      * @see #setReorderingMode
    918      * @see #setReorderingOptions
    919      * @see #getProcessedLength
    920      */
    921     public static final int OPTION_STREAMING = 4;
    922 
    923     /*
    924      *   Comparing the description of the Bidi algorithm with this implementation
    925      *   is easier with the same names for the Bidi types in the code as there.
    926      *   See UCharacterDirection
    927      */
    928     static final byte L   = UCharacterDirection.LEFT_TO_RIGHT;                  /*  0 */
    929     static final byte R   = UCharacterDirection.RIGHT_TO_LEFT;                  /*  1 */
    930     static final byte EN  = UCharacterDirection.EUROPEAN_NUMBER;                /*  2 */
    931     static final byte ES  = UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR;      /*  3 */
    932     static final byte ET  = UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR;     /*  4 */
    933     static final byte AN  = UCharacterDirection.ARABIC_NUMBER;                  /*  5 */
    934     static final byte CS  = UCharacterDirection.COMMON_NUMBER_SEPARATOR;        /*  6 */
    935     static final byte B   = UCharacterDirection.BLOCK_SEPARATOR;                /*  7 */
    936     static final byte S   = UCharacterDirection.SEGMENT_SEPARATOR;              /*  8 */
    937     static final byte WS  = UCharacterDirection.WHITE_SPACE_NEUTRAL;            /*  9 */
    938     static final byte ON  = UCharacterDirection.OTHER_NEUTRAL;                  /* 10 */
    939     static final byte LRE = UCharacterDirection.LEFT_TO_RIGHT_EMBEDDING;        /* 11 */
    940     static final byte LRO = UCharacterDirection.LEFT_TO_RIGHT_OVERRIDE;         /* 12 */
    941     static final byte AL  = UCharacterDirection.RIGHT_TO_LEFT_ARABIC;           /* 13 */
    942     static final byte RLE = UCharacterDirection.RIGHT_TO_LEFT_EMBEDDING;        /* 14 */
    943     static final byte RLO = UCharacterDirection.RIGHT_TO_LEFT_OVERRIDE;         /* 15 */
    944     static final byte PDF = UCharacterDirection.POP_DIRECTIONAL_FORMAT;         /* 16 */
    945     static final byte NSM = UCharacterDirection.DIR_NON_SPACING_MARK;           /* 17 */
    946     static final byte BN  = UCharacterDirection.BOUNDARY_NEUTRAL;               /* 18 */
    947     static final byte FSI = UCharacterDirection.FIRST_STRONG_ISOLATE;           /* 19 */
    948     static final byte LRI = UCharacterDirection.LEFT_TO_RIGHT_ISOLATE;          /* 20 */
    949     static final byte RLI = UCharacterDirection.RIGHT_TO_LEFT_ISOLATE;          /* 21 */
    950     static final byte PDI = UCharacterDirection.POP_DIRECTIONAL_ISOLATE;        /* 22 */
    951     static final byte ENL = PDI + 1;    /* EN after W7 */                       /* 23 */
    952     static final byte ENR = ENL + 1;    /* EN not subject to W7 */              /* 24 */
    953 
    954     /**
    955      * Value returned by <code>BidiClassifier</code> when there is no need to
    956      * override the standard Bidi class for a given code point.
    957      *
    958      * <p>This constant is deprecated; use UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)+1 instead.
    959      *
    960      * @see BidiClassifier
    961      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    962      */
    963     @Deprecated
    964     public static final int CLASS_DEFAULT = UCharacterDirection.CHAR_DIRECTION_COUNT;
    965 
    966     /* number of paras entries allocated initially */
    967     static final int SIMPLE_PARAS_COUNT = 10;
    968     /* number of isolate run entries for paired brackets allocated initially */
    969     static final int SIMPLE_OPENINGS_COUNT = 20;
    970 
    971     private static final char CR = '\r';
    972     private static final char LF = '\n';
    973 
    974     static final int LRM_BEFORE = 1;
    975     static final int LRM_AFTER = 2;
    976     static final int RLM_BEFORE = 4;
    977     static final int RLM_AFTER = 8;
    978 
    979     /* flags for Opening.flags */
    980     static final byte FOUND_L = (byte)DirPropFlag(L);
    981     static final byte FOUND_R = (byte)DirPropFlag(R);
    982 
    983     /*
    984      * The following bit is used for the directional isolate status.
    985      * Stack entries corresponding to isolate sequences are greater than ISOLATE.
    986      */
    987     static final int ISOLATE = 0x0100;
    988 
    989 
    990     /*
    991      * reference to parent paragraph object (reference to self if this object is
    992      * a paragraph object); set to null in a newly opened object; set to a
    993      * real value after a successful execution of setPara or setLine
    994      */
    995     Bidi                paraBidi;
    996 
    997     final UBiDiProps    bdp;
    998 
    999     /* character array representing the current text */
   1000     char[]              text;
   1001 
   1002     /* length of the current text */
   1003     int                 originalLength;
   1004 
   1005     /* if the option OPTION_STREAMING is set, this is the length of
   1006      * text actually processed by <code>setPara</code>, which may be shorter
   1007      * than the original length. Otherwise, it is identical to the original
   1008      * length.
   1009      */
   1010     int                 length;
   1011 
   1012     /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
   1013      * marks are allowed to be inserted in one of the reordering modes, the
   1014      * length of the result string may be different from the processed length.
   1015      */
   1016     int                 resultLength;
   1017 
   1018     /* indicators for whether memory may be allocated after construction */
   1019     boolean             mayAllocateText;
   1020     boolean             mayAllocateRuns;
   1021 
   1022     /* arrays with one value per text-character */
   1023     byte[]              dirPropsMemory = new byte[1];
   1024     byte[]              levelsMemory = new byte[1];
   1025     byte[]              dirProps;
   1026     byte[]              levels;
   1027 
   1028     /* are we performing an approximation of the "inverse Bidi" algorithm? */
   1029     boolean             isInverse;
   1030 
   1031     /* are we using the basic algorithm or its variation? */
   1032     int                 reorderingMode;
   1033 
   1034     /* bitmask for reordering options */
   1035     int                 reorderingOptions;
   1036 
   1037     /* must block separators receive level 0? */
   1038     boolean             orderParagraphsLTR;
   1039 
   1040     /* the paragraph level */
   1041     byte                paraLevel;
   1042     /* original paraLevel when contextual */
   1043     /* must be one of DEFAULT_xxx or 0 if not contextual */
   1044     byte                defaultParaLevel;
   1045 
   1046     /* context data */
   1047     String              prologue;
   1048     String              epilogue;
   1049 
   1050     /* the following is set in setPara, used in processPropertySeq */
   1051 
   1052     ImpTabPair          impTabPair;  /* reference to levels state table pair */
   1053     /* the overall paragraph or line directionality*/
   1054     byte                direction;
   1055 
   1056     /* flags is a bit set for which directional properties are in the text */
   1057     int                 flags;
   1058 
   1059     /* lastArabicPos is index to the last AL in the text, -1 if none */
   1060     int                 lastArabicPos;
   1061 
   1062     /* characters after trailingWSStart are WS and are */
   1063     /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
   1064     int                 trailingWSStart;
   1065 
   1066     /* fields for paragraph handling, set in getDirProps() */
   1067     int                 paraCount;
   1068     int[]               paras_limit = new int[SIMPLE_PARAS_COUNT];
   1069     byte[]              paras_level = new byte[SIMPLE_PARAS_COUNT];
   1070 
   1071     /* fields for line reordering */
   1072     int                 runCount;     /* ==-1: runs not set up yet */
   1073     BidiRun[]           runsMemory = new BidiRun[0];
   1074     BidiRun[]           runs;
   1075 
   1076     /* for non-mixed text, we only need a tiny array of runs (no allocation) */
   1077     BidiRun[]           simpleRuns = {new BidiRun()};
   1078 
   1079     /* fields for managing isolate sequences */
   1080     Isolate[]           isolates;
   1081     /* maximum or current nesting depth of isolate sequences */
   1082     /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
   1083        nesting encountered.
   1084        Within resolveImplicitLevels(), this is the index of the current isolates
   1085        stack entry. */
   1086     int                 isolateCount;
   1087 
   1088     /* mapping of runs in logical order to visual order */
   1089     int[]               logicalToVisualRunsMap;
   1090     /* flag to indicate that the map has been updated */
   1091     boolean             isGoodLogicalToVisualRunsMap;
   1092 
   1093     /* customized class provider */
   1094     BidiClassifier      customClassifier = null;
   1095 
   1096     /* for inverse Bidi with insertion of directional marks */
   1097     InsertPoints        insertPoints = new InsertPoints();
   1098 
   1099     /* for option OPTION_REMOVE_CONTROLS */
   1100     int                 controlCount;
   1101 
   1102     /*
   1103      * Sometimes, bit values are more appropriate
   1104      * to deal with directionality properties.
   1105      * Abbreviations in these method names refer to names
   1106      * used in the Bidi algorithm.
   1107      */
   1108     static int DirPropFlag(byte dir) {
   1109         return (1 << dir);
   1110     }
   1111 
   1112     boolean testDirPropFlagAt(int flag, int index) {
   1113         return ((DirPropFlag(dirProps[index]) & flag) != 0);
   1114     }
   1115 
   1116     static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
   1117 
   1118     /* to avoid some conditional statements, use tiny constant arrays */
   1119     static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
   1120     static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
   1121     static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
   1122 
   1123     static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
   1124     static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
   1125     static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
   1126     static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
   1127     static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
   1128 
   1129     /*  are there any characters that are LTR or RTL? */
   1130     static final int MASK_LTR =
   1131         DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
   1132     static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
   1133 
   1134     static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
   1135     static final int MASK_STRONG_EN_AN = DirPropFlag(L)|DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(EN)|DirPropFlag(AN);
   1136     /* explicit embedding codes */
   1137     static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
   1138     static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
   1139 
   1140     /* explicit isolate codes */
   1141     static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
   1142 
   1143     /* paragraph and segment separators */
   1144     static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
   1145 
   1146     /* all types that are counted as White Space or Neutral in some steps */
   1147     static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT|MASK_ISO;
   1148 
   1149     /* types that are neutrals or could becomes neutrals in (Wn) */
   1150     static final int MASK_POSSIBLE_N = DirPropFlag(ON)|DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_WS;
   1151 
   1152     /*
   1153      * These types may be changed to "e",
   1154      * the embedding type (L or R) of the run,
   1155      * in the Bidi algorithm (N2)
   1156      */
   1157     static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
   1158 
   1159     /*
   1160      *  the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
   1161      */
   1162     static byte GetLRFromLevel(byte level)
   1163     {
   1164         return (byte)(level & 1);
   1165     }
   1166 
   1167     static boolean IsDefaultLevel(byte level)
   1168     {
   1169         return ((level & LEVEL_DEFAULT_LTR) == LEVEL_DEFAULT_LTR);
   1170     }
   1171 
   1172     static boolean IsBidiControlChar(int c)
   1173     {
   1174         /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
   1175                            0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
   1176         return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))
   1177                                              || ((c >= 0x2066) && (c <= 0x2069)));
   1178     }
   1179 
   1180     void verifyValidPara()
   1181     {
   1182         if (!(this == this.paraBidi)) {
   1183             throw new IllegalStateException();
   1184         }
   1185     }
   1186 
   1187     void verifyValidParaOrLine()
   1188     {
   1189         Bidi para = this.paraBidi;
   1190         /* verify Para */
   1191         if (this == para) {
   1192             return;
   1193         }
   1194         /* verify Line */
   1195         if ((para == null) || (para != para.paraBidi)) {
   1196             throw new IllegalStateException();
   1197         }
   1198     }
   1199 
   1200     void verifyRange(int index, int start, int limit)
   1201     {
   1202         if (index < start || index >= limit) {
   1203             throw new IllegalArgumentException("Value " + index +
   1204                       " is out of range " + start + " to " + limit);
   1205         }
   1206     }
   1207 
   1208     /**
   1209      * Allocate a <code>Bidi</code> object.
   1210      * Such an object is initially empty. It is assigned
   1211      * the Bidi properties of a piece of text containing one or more paragraphs
   1212      * by <code>setPara()</code>
   1213      * or the Bidi properties of a line within a paragraph by
   1214      * <code>setLine()</code>.<p>
   1215      * This object can be reused.<p>
   1216      * <code>setPara()</code> and <code>setLine()</code> will allocate
   1217      * additional memory for internal structures as necessary.
   1218      */
   1219     public Bidi()
   1220     {
   1221         this(0, 0);
   1222     }
   1223 
   1224     /**
   1225      * Allocate a <code>Bidi</code> object with preallocated memory
   1226      * for internal structures.
   1227      * This method provides a <code>Bidi</code> object like the default constructor
   1228      * but it also preallocates memory for internal structures
   1229      * according to the sizings supplied by the caller.<p>
   1230      * The preallocation can be limited to some of the internal memory
   1231      * by setting some values to 0 here. That means that if, e.g.,
   1232      * <code>maxRunCount</code> cannot be reasonably predetermined and should not
   1233      * be set to <code>maxLength</code> (the only failproof value) to avoid
   1234      * wasting  memory, then <code>maxRunCount</code> could be set to 0 here
   1235      * and the internal structures that are associated with it will be allocated
   1236      * on demand, just like with the default constructor.
   1237      *
   1238      * @param maxLength is the maximum text or line length that internal memory
   1239      *        will be preallocated for. An attempt to associate this object with a
   1240      *        longer text will fail, unless this value is 0, which leaves the allocation
   1241      *        up to the implementation.
   1242      *
   1243      * @param maxRunCount is the maximum anticipated number of same-level runs
   1244      *        that internal memory will be preallocated for. An attempt to access
   1245      *        visual runs on an object that was not preallocated for as many runs
   1246      *        as the text was actually resolved to will fail,
   1247      *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
   1248      *        The number of runs depends on the actual text and maybe anywhere between
   1249      *        1 and <code>maxLength</code>. It is typically small.
   1250      *
   1251      * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
   1252      */
   1253     public Bidi(int maxLength, int maxRunCount)
   1254     {
   1255         /* check the argument values */
   1256         if (maxLength < 0 || maxRunCount < 0) {
   1257             throw new IllegalArgumentException();
   1258         }
   1259 
   1260         /* reset the object, all reference variables null, all flags false,
   1261            all sizes 0.
   1262            In fact, we don't need to do anything, since class members are
   1263            initialized as zero when an instance is created.
   1264          */
   1265         /*
   1266         mayAllocateText = false;
   1267         mayAllocateRuns = false;
   1268         orderParagraphsLTR = false;
   1269         paraCount = 0;
   1270         runCount = 0;
   1271         trailingWSStart = 0;
   1272         flags = 0;
   1273         paraLevel = 0;
   1274         defaultParaLevel = 0;
   1275         direction = 0;
   1276         */
   1277         /* get Bidi properties */
   1278         bdp = UBiDiProps.INSTANCE;
   1279 
   1280         /* allocate memory for arrays as requested */
   1281         if (maxLength > 0) {
   1282             getInitialDirPropsMemory(maxLength);
   1283             getInitialLevelsMemory(maxLength);
   1284         } else {
   1285             mayAllocateText = true;
   1286         }
   1287 
   1288         if (maxRunCount > 0) {
   1289             // if maxRunCount == 1, use simpleRuns[]
   1290             if (maxRunCount > 1) {
   1291                 getInitialRunsMemory(maxRunCount);
   1292             }
   1293         } else {
   1294             mayAllocateRuns = true;
   1295         }
   1296     }
   1297 
   1298     /*
   1299      * We are allowed to allocate memory if object==null or
   1300      * mayAllocate==true for each array that we need.
   1301      *
   1302      * Assume sizeNeeded>0.
   1303      * If object != null, then assume size > 0.
   1304      */
   1305     private Object getMemory(String label, Object array, Class<?> arrayClass,
   1306             boolean mayAllocate, int sizeNeeded)
   1307     {
   1308         int len = Array.getLength(array);
   1309 
   1310         /* we have at least enough memory and must not allocate */
   1311         if (sizeNeeded == len) {
   1312             return array;
   1313         }
   1314         if (!mayAllocate) {
   1315             /* we must not allocate */
   1316             if (sizeNeeded <= len) {
   1317                 return array;
   1318             }
   1319             throw new OutOfMemoryError("Failed to allocate memory for "
   1320                                        + label);
   1321         }
   1322         /* we may try to grow or shrink */
   1323         /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
   1324            the allocation altogether and rely on this.length */
   1325         try {
   1326             return Array.newInstance(arrayClass, sizeNeeded);
   1327         } catch (Exception e) {
   1328             throw new OutOfMemoryError("Failed to allocate memory for "
   1329                                        + label);
   1330         }
   1331     }
   1332 
   1333     /* helper methods for each allocated array */
   1334     private void getDirPropsMemory(boolean mayAllocate, int len)
   1335     {
   1336         Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
   1337         dirPropsMemory = (byte[]) array;
   1338     }
   1339 
   1340     void getDirPropsMemory(int len)
   1341     {
   1342         getDirPropsMemory(mayAllocateText, len);
   1343     }
   1344 
   1345     private void getLevelsMemory(boolean mayAllocate, int len)
   1346     {
   1347         Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
   1348         levelsMemory = (byte[]) array;
   1349     }
   1350 
   1351     void getLevelsMemory(int len)
   1352     {
   1353         getLevelsMemory(mayAllocateText, len);
   1354     }
   1355 
   1356     private void getRunsMemory(boolean mayAllocate, int len)
   1357     {
   1358         Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
   1359         runsMemory = (BidiRun[]) array;
   1360     }
   1361 
   1362     void getRunsMemory(int len)
   1363     {
   1364         getRunsMemory(mayAllocateRuns, len);
   1365     }
   1366 
   1367     /* additional methods used by constructor - always allow allocation */
   1368     private void getInitialDirPropsMemory(int len)
   1369     {
   1370         getDirPropsMemory(true, len);
   1371     }
   1372 
   1373     private void getInitialLevelsMemory(int len)
   1374     {
   1375         getLevelsMemory(true, len);
   1376     }
   1377 
   1378     private void getInitialRunsMemory(int len)
   1379     {
   1380         getRunsMemory(true, len);
   1381     }
   1382 
   1383     /**
   1384      * Modify the operation of the Bidi algorithm such that it
   1385      * approximates an "inverse Bidi" algorithm. This method
   1386      * must be called before <code>setPara()</code>.
   1387      *
   1388      * <p>The normal operation of the Bidi algorithm as described
   1389      * in the Unicode Technical Report is to take text stored in logical
   1390      * (keyboard, typing) order and to determine the reordering of it for visual
   1391      * rendering.
   1392      * Some legacy systems store text in visual order, and for operations
   1393      * with standard, Unicode-based algorithms, the text needs to be transformed
   1394      * to logical order. This is effectively the inverse algorithm of the
   1395      * described Bidi algorithm. Note that there is no standard algorithm for
   1396      * this "inverse Bidi" and that the current implementation provides only an
   1397      * approximation of "inverse Bidi".
   1398      *
   1399      * <p>With <code>isInversed</code> set to <code>true</code>,
   1400      * this method changes the behavior of some of the subsequent methods
   1401      * in a way that they can be used for the inverse Bidi algorithm.
   1402      * Specifically, runs of text with numeric characters will be treated in a
   1403      * special way and may need to be surrounded with LRM characters when they are
   1404      * written in reordered sequence.
   1405      *
   1406      * <p>Output runs should be retrieved using <code>getVisualRun()</code>.
   1407      * Since the actual input for "inverse Bidi" is visually ordered text and
   1408      * <code>getVisualRun()</code> gets the reordered runs, these are actually
   1409      * the runs of the logically ordered output.
   1410      *
   1411      * <p>Calling this method with argument <code>isInverse</code> set to
   1412      * <code>true</code> is equivalent to calling <code>setReorderingMode</code>
   1413      * with argument <code>reorderingMode</code>
   1414      * set to <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
   1415      * Calling this method with argument <code>isInverse</code> set to
   1416      * <code>false</code> is equivalent to calling <code>setReorderingMode</code>
   1417      * with argument <code>reorderingMode</code>
   1418      * set to <code>REORDER_DEFAULT</code>.
   1419      *
   1420      * @param isInverse specifies "forward" or "inverse" Bidi operation.
   1421      *
   1422      * @see #setPara
   1423      * @see #writeReordered
   1424      * @see #setReorderingMode
   1425      * @see #REORDER_INVERSE_NUMBERS_AS_L
   1426      * @see #REORDER_DEFAULT
   1427      */
   1428     public void setInverse(boolean isInverse) {
   1429         this.isInverse = (isInverse);
   1430         this.reorderingMode = isInverse ? REORDER_INVERSE_NUMBERS_AS_L
   1431                 : REORDER_DEFAULT;
   1432     }
   1433 
   1434     /**
   1435      * Is this <code>Bidi</code> object set to perform the inverse Bidi
   1436      * algorithm?
   1437      * <p>Note: calling this method after setting the reordering mode with
   1438      * <code>setReorderingMode</code> will return <code>true</code> if the
   1439      * reordering mode was set to
   1440      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, <code>false</code>
   1441      * for all other values.
   1442      *
   1443      * @return <code>true</code> if the <code>Bidi</code> object is set to
   1444      * perform the inverse Bidi algorithm by handling numbers as L.
   1445      *
   1446      * @see #setInverse
   1447      * @see #setReorderingMode
   1448      * @see #REORDER_INVERSE_NUMBERS_AS_L
   1449      */
   1450     public boolean isInverse() {
   1451         return isInverse;
   1452     }
   1453 
   1454     /**
   1455      * Modify the operation of the Bidi algorithm such that it implements some
   1456      * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
   1457      * algorithm, depending on different values of the "reordering mode".
   1458      * This method must be called before <code>setPara()</code>, and stays in
   1459      * effect until called again with a different argument.
   1460      *
   1461      * <p>The normal operation of the Bidi algorithm as described in the Unicode
   1462      * Standard Annex #9 is to take text stored in logical (keyboard, typing)
   1463      * order and to determine how to reorder it for visual rendering.
   1464      *
   1465      * <p>With the reordering mode set to a value other than
   1466      * <code>REORDER_DEFAULT</code>, this method changes the behavior of some of
   1467      * the subsequent methods in a way such that they implement an inverse Bidi
   1468      * algorithm or some other algorithm variants.
   1469      *
   1470      * <p>Some legacy systems store text in visual order, and for operations
   1471      * with standard, Unicode-based algorithms, the text needs to be transformed
   1472      * into logical order. This is effectively the inverse algorithm of the
   1473      * described Bidi algorithm. Note that there is no standard algorithm for
   1474      * this "inverse Bidi", so a number of variants are implemented here.
   1475      *
   1476      * <p>In other cases, it may be desirable to emulate some variant of the
   1477      * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
   1478      * Logical to Logical transformation.
   1479      *
   1480      * <ul>
   1481      * <li>When the Reordering Mode is set to
   1482      * <code>REORDER_DEFAULT</code>,
   1483      * the standard Bidi Logical to Visual algorithm is applied.</li>
   1484      *
   1485      * <li>When the reordering mode is set to
   1486      * <code>REORDER_NUMBERS_SPECIAL</code>,
   1487      * the algorithm used to perform Bidi transformations when calling
   1488      * <code>setPara</code> should approximate the algorithm used in Microsoft
   1489      * Windows XP rather than strictly conform to the Unicode Bidi algorithm.
   1490      * <br>
   1491      * The differences between the basic algorithm and the algorithm addressed
   1492      * by this option are as follows:
   1493      * <ul>
   1494      *   <li>Within text at an even embedding level, the sequence "123AB"
   1495      *   (where AB represent R or AL letters) is transformed to "123BA" by the
   1496      *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
   1497      *
   1498      *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
   1499      *   like regular numbers (EN).</li>
   1500      * </ul></li>
   1501      *
   1502      * <li>When the reordering mode is set to
   1503      * <code>REORDER_GROUP_NUMBERS_WITH_R</code>,
   1504      * numbers located between LTR text and RTL text are associated with the RTL
   1505      * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
   1506      * upper case letters represent RTL characters) will be transformed to
   1507      * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
   1508      * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
   1509      * This makes the algorithm reversible and makes it useful when round trip
   1510      * (from visual to logical and back to visual) must be achieved without
   1511      * adding LRM characters. However, this is a variation from the standard
   1512      * Unicode Bidi algorithm.<br>
   1513      * The source text should not contain Bidi control characters other than LRM
   1514      * or RLM.</li>
   1515      *
   1516      * <li>When the reordering mode is set to
   1517      * <code>REORDER_RUNS_ONLY</code>,
   1518      * a "Logical to Logical" transformation must be performed:
   1519      * <ul>
   1520      * <li>If the default text level of the source text (argument
   1521      * <code>paraLevel</code> in <code>setPara</code>) is even, the source text
   1522      * will be handled as LTR logical text and will be transformed to the RTL
   1523      * logical text which has the same LTR visual display.</li>
   1524      * <li>If the default level of the source text is odd, the source text
   1525      * will be handled as RTL logical text and will be transformed to the
   1526      * LTR logical text which has the same LTR visual display.</li>
   1527      * </ul>
   1528      * This mode may be needed when logical text which is basically Arabic or
   1529      * Hebrew, with possible included numbers or phrases in English, has to be
   1530      * displayed as if it had an even embedding level (this can happen if the
   1531      * displaying application treats all text as if it was basically LTR).
   1532      * <br>
   1533      * This mode may also be needed in the reverse case, when logical text which
   1534      * is basically English, with possible included phrases in Arabic or Hebrew,
   1535      * has to be displayed as if it had an odd embedding level.
   1536      * <br>
   1537      * Both cases could be handled by adding LRE or RLE at the head of the
   1538      * text, if the display subsystem supports these formatting controls. If it
   1539      * does not, the problem may be handled by transforming the source text in
   1540      * this mode before displaying it, so that it will be displayed properly.
   1541      * <br>
   1542      * The source text should not contain Bidi control characters other than LRM
   1543      * or RLM.</li>
   1544      *
   1545      * <li>When the reordering mode is set to
   1546      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi"
   1547      * algorithm is applied.
   1548      * Runs of text with numeric characters will be treated like LTR letters and
   1549      * may need to be surrounded with LRM characters when they are written in
   1550      * reordered sequence (the option <code>INSERT_LRM_FOR_NUMERIC</code> can
   1551      * be used with method <code>writeReordered</code> to this end. This mode
   1552      * is equivalent to calling <code>setInverse()</code> with
   1553      * argument <code>isInverse</code> set to <code>true</code>.</li>
   1554      *
   1555      * <li>When the reordering mode is set to
   1556      * <code>REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to
   1557      * Visual Bidi algorithm is used as an approximation of an "inverse Bidi"
   1558      * algorithm. This mode is similar to mode
   1559      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> but is closer to the
   1560      * regular Bidi algorithm.
   1561      * <br>
   1562      * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
   1563      * upper case represents RTL characters) will be transformed to
   1564      * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
   1565      * with mode <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
   1566      * When used in conjunction with option
   1567      * <code>OPTION_INSERT_MARKS</code>, this mode generally
   1568      * adds Bidi marks to the output significantly more sparingly than mode
   1569      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br> with option
   1570      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to
   1571      * <code>writeReordered</code>.</li>
   1572      *
   1573      * <li>When the reordering mode is set to
   1574      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
   1575      * Bidi algorithm used in Windows XP is used as an approximation of an "inverse
   1576      * Bidi" algorithm.
   1577      * <br>
   1578      * For example, an LTR paragraph with the content "abc FED123" (where
   1579      * upper case represents RTL characters) will be transformed to
   1580      * "abc 123DEF.</li>
   1581      * </ul>
   1582      *
   1583      * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
   1584      * (i.e. those with a name starting with <code>REORDER_INVERSE</code>),
   1585      * output runs should be retrieved using <code>getVisualRun()</code>, and
   1586      * the output text with <code>writeReordered()</code>. The caller should
   1587      * keep in mind that in "inverse Bidi" modes the input is actually visually
   1588      * ordered text and reordered output returned by <code>getVisualRun()</code>
   1589      * or <code>writeReordered()</code> are actually runs or character string
   1590      * of logically ordered output.<br>
   1591      * For all the "inverse Bidi" modes, the source text should not contain
   1592      * Bidi control characters other than LRM or RLM.
   1593      *
   1594      * <p>Note that option <code>OUTPUT_REVERSE</code> of
   1595      * <code>writeReordered</code> has no useful meaning and should not be used
   1596      * in conjunction with any value of the reordering mode specifying "inverse
   1597      * Bidi" or with value <code>REORDER_RUNS_ONLY</code>.
   1598      *
   1599      * @param reorderingMode specifies the required variant of the Bidi
   1600      *                       algorithm.
   1601      *
   1602      * @see #setInverse
   1603      * @see #setPara
   1604      * @see #writeReordered
   1605      * @see #INSERT_LRM_FOR_NUMERIC
   1606      * @see #OUTPUT_REVERSE
   1607      * @see #REORDER_DEFAULT
   1608      * @see #REORDER_NUMBERS_SPECIAL
   1609      * @see #REORDER_GROUP_NUMBERS_WITH_R
   1610      * @see #REORDER_RUNS_ONLY
   1611      * @see #REORDER_INVERSE_NUMBERS_AS_L
   1612      * @see #REORDER_INVERSE_LIKE_DIRECT
   1613      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
   1614      */
   1615     public void setReorderingMode(int reorderingMode) {
   1616         if ((reorderingMode < REORDER_DEFAULT) ||
   1617             (reorderingMode >= REORDER_COUNT))
   1618             return;                     /* don't accept a wrong value */
   1619         this.reorderingMode = reorderingMode;
   1620         this.isInverse =
   1621             reorderingMode == REORDER_INVERSE_NUMBERS_AS_L;
   1622     }
   1623 
   1624     /**
   1625      * What is the requested reordering mode for a given Bidi object?
   1626      *
   1627      * @return the current reordering mode of the Bidi object
   1628      *
   1629      * @see #setReorderingMode
   1630      */
   1631     public int getReorderingMode() {
   1632         return this.reorderingMode;
   1633     }
   1634 
   1635     /**
   1636      * Specify which of the reordering options should be applied during Bidi
   1637      * transformations.
   1638      *
   1639      * @param options A combination of zero or more of the following
   1640      * reordering options:
   1641      * <code>OPTION_DEFAULT</code>, <code>OPTION_INSERT_MARKS</code>,
   1642      * <code>OPTION_REMOVE_CONTROLS</code>, <code>OPTION_STREAMING</code>.
   1643      *
   1644      * @see #getReorderingOptions
   1645      * @see #OPTION_DEFAULT
   1646      * @see #OPTION_INSERT_MARKS
   1647      * @see #OPTION_REMOVE_CONTROLS
   1648      * @see #OPTION_STREAMING
   1649      */
   1650     public void setReorderingOptions(int options) {
   1651         if ((options & OPTION_REMOVE_CONTROLS) != 0) {
   1652             this.reorderingOptions = options & ~OPTION_INSERT_MARKS;
   1653         } else {
   1654             this.reorderingOptions = options;
   1655         }
   1656     }
   1657 
   1658     /**
   1659      * What are the reordering options applied to a given Bidi object?
   1660      *
   1661      * @return the current reordering options of the Bidi object
   1662      *
   1663      * @see #setReorderingOptions
   1664      */
   1665     public int getReorderingOptions() {
   1666         return this.reorderingOptions;
   1667     }
   1668 
   1669     /**
   1670      * Get the base direction of the text provided according to the Unicode
   1671      * Bidirectional Algorithm. The base direction is derived from the first
   1672      * character in the string with bidirectional character type L, R, or AL.
   1673      * If the first such character has type L, LTR is returned. If the first
   1674      * such character has type R or AL, RTL is returned. If the string does
   1675      * not contain any character of these types, then NEUTRAL is returned.
   1676      * This is a lightweight function for use when only the base direction is
   1677      * needed and no further bidi processing of the text is needed.
   1678      * @param paragraph the text whose paragraph level direction is needed.
   1679      * @return LTR, RTL, NEUTRAL
   1680      * @see #LTR
   1681      * @see #RTL
   1682      * @see #NEUTRAL
   1683      */
   1684     public static byte getBaseDirection(CharSequence paragraph) {
   1685         if (paragraph == null || paragraph.length() == 0) {
   1686             return NEUTRAL;
   1687         }
   1688 
   1689         int length = paragraph.length();
   1690         int c;// codepoint
   1691         byte direction;
   1692 
   1693         for (int i = 0; i < length; ) {
   1694             // U16_NEXT(paragraph, i, length, c) for C++
   1695             c = UCharacter.codePointAt(paragraph, i);
   1696             direction = UCharacter.getDirectionality(c);
   1697             if (direction == UCharacterDirection.LEFT_TO_RIGHT) {
   1698                 return LTR;
   1699             } else if (direction == UCharacterDirection.RIGHT_TO_LEFT
   1700                 || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) {
   1701                 return RTL;
   1702             }
   1703 
   1704             i = UCharacter.offsetByCodePoints(paragraph, i, 1);// set i to the head index of next codepoint
   1705         }
   1706         return NEUTRAL;
   1707     }
   1708 
   1709 /* perform (P2)..(P3) ------------------------------------------------------- */
   1710 
   1711     /**
   1712      * Returns the directionality of the first strong character
   1713      * after the last B in prologue, if any.
   1714      * Requires prologue!=null.
   1715      */
   1716     private byte firstL_R_AL() {
   1717         byte result = ON;
   1718         for (int i = 0; i < prologue.length(); ) {
   1719             int uchar = prologue.codePointAt(i);
   1720             i += Character.charCount(uchar);
   1721             byte dirProp = (byte)getCustomizedClass(uchar);
   1722             if (result == ON) {
   1723                 if (dirProp == L || dirProp == R || dirProp == AL) {
   1724                     result = dirProp;
   1725                 }
   1726             } else {
   1727                 if (dirProp == B) {
   1728                     result = ON;
   1729                 }
   1730             }
   1731         }
   1732         return result;
   1733     }
   1734 
   1735     /*
   1736      * Check that there are enough entries in the arrays paras_limit and paras_level
   1737      */
   1738     private void checkParaCount() {
   1739         int[] saveLimits;
   1740         byte[] saveLevels;
   1741         int count = paraCount;
   1742         if (count <= paras_level.length)
   1743             return;
   1744         int oldLength = paras_level.length;
   1745         saveLimits = paras_limit;
   1746         saveLevels = paras_level;
   1747         try {
   1748             paras_limit = new int[count * 2];
   1749             paras_level = new byte[count * 2];
   1750         } catch (Exception e) {
   1751             throw new OutOfMemoryError("Failed to allocate memory for paras");
   1752         }
   1753         System.arraycopy(saveLimits, 0, paras_limit, 0, oldLength);
   1754         System.arraycopy(saveLevels, 0, paras_level, 0, oldLength);
   1755     }
   1756 
   1757     /*
   1758      * Get the directional properties for the text, calculate the flags bit-set, and
   1759      * determine the paragraph level if necessary (in paras_level[i]).
   1760      * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
   1761      * When encountering an FSI, it is initially replaced with an LRI, which is the
   1762      * default. Only if a strong R or AL is found within its scope will the LRI be
   1763      * replaced by an RLI.
   1764      */
   1765     static final int NOT_SEEKING_STRONG = 0;        /* 0: not contextual paraLevel, not after FSI */
   1766     static final int SEEKING_STRONG_FOR_PARA = 1;   /* 1: looking for first strong char in para */
   1767     static final int SEEKING_STRONG_FOR_FSI = 2;    /* 2: looking for first strong after FSI */
   1768     static final int LOOKING_FOR_PDI = 3;           /* 3: found strong after FSI, looking for PDI */
   1769 
   1770     private void getDirProps()
   1771     {
   1772         int i = 0, i0, i1;
   1773         flags = 0;          /* collect all directionalities in the text */
   1774         int uchar;
   1775         byte dirProp;
   1776         byte defaultParaLevel = 0;   /* initialize to avoid compiler warnings */
   1777         boolean isDefaultLevel = IsDefaultLevel(paraLevel);
   1778         /* for inverse Bidi, the default para level is set to RTL if there is a
   1779            strong R or AL character at either end of the text                */
   1780         boolean isDefaultLevelInverse=isDefaultLevel &&
   1781                 (reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
   1782                  reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
   1783         lastArabicPos = -1;
   1784         int controlCount = 0;
   1785         boolean removeBidiControls = (reorderingOptions & OPTION_REMOVE_CONTROLS) != 0;
   1786 
   1787         byte state;
   1788         byte lastStrong = ON;           /* for default level & inverse Bidi */
   1789     /* The following stacks are used to manage isolate sequences. Those
   1790        sequences may be nested, but obviously never more deeply than the
   1791        maximum explicit embedding level.
   1792        lastStack is the index of the last used entry in the stack. A value of -1
   1793        means that there is no open isolate sequence.
   1794        lastStack is reset to -1 on paragraph boundaries. */
   1795     /* The following stack contains the position of the initiator of
   1796        each open isolate sequence */
   1797         int[] isolateStartStack= new int[MAX_EXPLICIT_LEVEL+1];
   1798     /* The following stack contains the last known state before
   1799        encountering the initiator of an isolate sequence */
   1800         byte[] previousStateStack = new byte[MAX_EXPLICIT_LEVEL+1];
   1801         int  stackLast=-1;
   1802 
   1803         if ((reorderingOptions & OPTION_STREAMING) != 0)
   1804             length = 0;
   1805         defaultParaLevel = (byte)(paraLevel & 1);
   1806 
   1807         if (isDefaultLevel) {
   1808             paras_level[0] = defaultParaLevel;
   1809             lastStrong = defaultParaLevel;
   1810             if (prologue != null &&                        /* there is a prologue */
   1811                 (dirProp = firstL_R_AL()) != ON) {     /* with a strong character */
   1812                 if (dirProp == L)
   1813                     paras_level[0] = 0;             /* set the default para level */
   1814                 else
   1815                     paras_level[0] = 1;             /* set the default para level */
   1816                 state = NOT_SEEKING_STRONG;
   1817             } else {
   1818                 state = SEEKING_STRONG_FOR_PARA;
   1819             }
   1820         } else {
   1821             paras_level[0] = paraLevel;
   1822             state = NOT_SEEKING_STRONG;
   1823         }
   1824         /* count paragraphs and determine the paragraph level (P2..P3) */
   1825         /*
   1826          * see comment on constant fields:
   1827          * the LEVEL_DEFAULT_XXX values are designed so that
   1828          * their low-order bit alone yields the intended default
   1829          */
   1830 
   1831         for (i = 0; i < originalLength; /* i is incremented in the loop */) {
   1832             i0 = i;                     /* index of first code unit */
   1833             uchar = UTF16.charAt(text, 0, originalLength, i);
   1834             i += UTF16.getCharCount(uchar);
   1835             i1 = i - 1; /* index of last code unit, gets the directional property */
   1836 
   1837             dirProp = (byte)getCustomizedClass(uchar);
   1838             flags |= DirPropFlag(dirProp);
   1839             dirProps[i1] = dirProp;
   1840             if (i1 > i0) {     /* set previous code units' properties to BN */
   1841                 flags |= DirPropFlag(BN);
   1842                 do {
   1843                     dirProps[--i1] = BN;
   1844                 } while (i1 > i0);
   1845             }
   1846             if (removeBidiControls && IsBidiControlChar(uchar)) {
   1847                 controlCount++;
   1848             }
   1849             if (dirProp == L) {
   1850                 if (state == SEEKING_STRONG_FOR_PARA) {
   1851                     paras_level[paraCount - 1] = 0;
   1852                     state = NOT_SEEKING_STRONG;
   1853                 }
   1854                 else if (state == SEEKING_STRONG_FOR_FSI) {
   1855                     if (stackLast <= MAX_EXPLICIT_LEVEL) {
   1856                         /* no need for next statement, already set by default */
   1857                         /* dirProps[isolateStartStack[stackLast]] = LRI; */
   1858                         flags |= DirPropFlag(LRI);
   1859                     }
   1860                     state = LOOKING_FOR_PDI;
   1861                 }
   1862                 lastStrong = L;
   1863                 continue;
   1864             }
   1865             if (dirProp == R || dirProp == AL) {
   1866                 if (state == SEEKING_STRONG_FOR_PARA) {
   1867                     paras_level[paraCount - 1] = 1;
   1868                     state = NOT_SEEKING_STRONG;
   1869                 }
   1870                 else if (state == SEEKING_STRONG_FOR_FSI) {
   1871                     if (stackLast <= MAX_EXPLICIT_LEVEL) {
   1872                         dirProps[isolateStartStack[stackLast]] = RLI;
   1873                         flags |= DirPropFlag(RLI);
   1874                     }
   1875                     state = LOOKING_FOR_PDI;
   1876                 }
   1877                 lastStrong = R;
   1878                 if (dirProp == AL)
   1879                     lastArabicPos = i - 1;
   1880                 continue;
   1881             }
   1882             if (dirProp >= FSI && dirProp <= RLI) { /* FSI, LRI or RLI */
   1883                 stackLast++;
   1884                 if (stackLast <= MAX_EXPLICIT_LEVEL) {
   1885                     isolateStartStack[stackLast] = i - 1;
   1886                     previousStateStack[stackLast] = state;
   1887                 }
   1888                 if (dirProp == FSI) {
   1889                     dirProps[i-1] = LRI;    /* default if no strong char */
   1890                     state = SEEKING_STRONG_FOR_FSI;
   1891                 }
   1892                 else
   1893                     state = LOOKING_FOR_PDI;
   1894                 continue;
   1895             }
   1896             if (dirProp == PDI) {
   1897                 if (state == SEEKING_STRONG_FOR_FSI) {
   1898                     if (stackLast <= MAX_EXPLICIT_LEVEL) {
   1899                         /* no need for next statement, already set by default */
   1900                         /* dirProps[isolateStartStack[stackLast]] = LRI; */
   1901                         flags |= DirPropFlag(LRI);
   1902                     }
   1903                 }
   1904                 if (stackLast >= 0) {
   1905                     if (stackLast <= MAX_EXPLICIT_LEVEL)
   1906                         state = previousStateStack[stackLast];
   1907                     stackLast--;
   1908                 }
   1909                 continue;
   1910             }
   1911             if (dirProp == B) {
   1912                 if (i < originalLength && uchar == CR && text[i] == LF) /* do nothing on the CR */
   1913                     continue;
   1914                 paras_limit[paraCount - 1] = i;
   1915                 if (isDefaultLevelInverse && lastStrong == R)
   1916                     paras_level[paraCount - 1] = 1;
   1917                 if ((reorderingOptions & OPTION_STREAMING) != 0) {
   1918                 /* When streaming, we only process whole paragraphs
   1919                    thus some updates are only done on paragraph boundaries */
   1920                    length = i;          /* i is index to next character */
   1921                    this.controlCount = controlCount;
   1922                 }
   1923                 if (i < originalLength) {       /* B not last char in text */
   1924                     paraCount++;
   1925                     checkParaCount();   /* check that there is enough memory for a new para entry */
   1926                     if (isDefaultLevel) {
   1927                         paras_level[paraCount - 1] = defaultParaLevel;
   1928                         state = SEEKING_STRONG_FOR_PARA;
   1929                         lastStrong = defaultParaLevel;
   1930                     } else {
   1931                         paras_level[paraCount - 1] = paraLevel;
   1932                         state = NOT_SEEKING_STRONG;
   1933                     }
   1934                     stackLast = -1;
   1935                 }
   1936                 continue;
   1937             }
   1938         }
   1939         /* +Ignore still open isolate sequences with overflow */
   1940         if (stackLast > MAX_EXPLICIT_LEVEL) {
   1941             stackLast = MAX_EXPLICIT_LEVEL;
   1942             state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
   1943         }
   1944         /* Resolve direction of still unresolved open FSI sequences */
   1945         while (stackLast >= 0) {
   1946             if (state == SEEKING_STRONG_FOR_FSI) {
   1947                 /* no need for next statement, already set by default */
   1948                 /* dirProps[isolateStartStack[stackLast]] = LRI; */
   1949                 flags |= DirPropFlag(LRI);
   1950                 break;
   1951             }
   1952             state = previousStateStack[stackLast];
   1953             stackLast--;
   1954         }
   1955         /* When streaming, ignore text after the last paragraph separator */
   1956         if ((reorderingOptions & OPTION_STREAMING) != 0) {
   1957             if (length < originalLength)
   1958                 paraCount--;
   1959         } else {
   1960             paras_limit[paraCount - 1] = originalLength;
   1961             this.controlCount = controlCount;
   1962         }
   1963         /* For inverse bidi, default para direction is RTL if there is
   1964            a strong R or AL at either end of the paragraph */
   1965         if (isDefaultLevelInverse && lastStrong == R) {
   1966             paras_level[paraCount - 1] = 1;
   1967         }
   1968         if (isDefaultLevel) {
   1969             paraLevel = paras_level[0];
   1970         }
   1971         /* The following is needed to resolve the text direction for default level
   1972            paragraphs containing no strong character */
   1973         for (i = 0; i < paraCount; i++)
   1974             flags |= DirPropFlagLR(paras_level[i]);
   1975 
   1976         if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
   1977             flags |= DirPropFlag(L);
   1978         }
   1979     }
   1980 
   1981     /* determine the paragraph level at position index */
   1982     byte GetParaLevelAt(int pindex)
   1983     {
   1984         if (defaultParaLevel == 0 || pindex < paras_limit[0])
   1985             return paraLevel;
   1986         int i;
   1987         for (i = 1; i < paraCount; i++)
   1988             if (pindex < paras_limit[i])
   1989                 break;
   1990         if (i >= paraCount)
   1991             i = paraCount - 1;
   1992         return paras_level[i];
   1993     }
   1994 
   1995     /* Functions for handling paired brackets ----------------------------------- */
   1996 
   1997     /* In the isoRuns array, the first entry is used for text outside of any
   1998        isolate sequence.  Higher entries are used for each more deeply nested
   1999        isolate sequence. isoRunLast is the index of the last used entry.  The
   2000        openings array is used to note the data of opening brackets not yet
   2001        matched by a closing bracket, or matched but still susceptible to change
   2002        level.
   2003        Each isoRun entry contains the index of the first and
   2004        one-after-last openings entries for pending opening brackets it
   2005        contains.  The next openings entry to use is the one-after-last of the
   2006        most deeply nested isoRun entry.
   2007        isoRun entries also contain their current embedding level and the last
   2008        encountered strong character, since these will be needed to resolve
   2009        the level of paired brackets.  */
   2010 
   2011     private void bracketInit(BracketData bd) {
   2012         bd.isoRunLast = 0;
   2013         bd.isoRuns[0] = new IsoRun();
   2014         bd.isoRuns[0].start = 0;
   2015         bd.isoRuns[0].limit = 0;
   2016         bd.isoRuns[0].level = GetParaLevelAt(0);
   2017         bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(GetParaLevelAt(0) & 1);
   2018         bd.isoRuns[0].contextPos = 0;
   2019         bd.openings = new Opening[SIMPLE_OPENINGS_COUNT];
   2020         bd.isNumbersSpecial = reorderingMode == REORDER_NUMBERS_SPECIAL ||
   2021                               reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
   2022     }
   2023 
   2024     /* paragraph boundary */
   2025     private void bracketProcessB(BracketData bd, byte level) {
   2026         bd.isoRunLast = 0;
   2027         bd.isoRuns[0].limit = 0;
   2028         bd.isoRuns[0].level = level;
   2029         bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(level & 1);
   2030         bd.isoRuns[0].contextPos = 0;
   2031     }
   2032 
   2033     /* LRE, LRO, RLE, RLO, PDF */
   2034     private void bracketProcessBoundary(BracketData bd, int lastCcPos,
   2035                                         byte contextLevel, byte embeddingLevel) {
   2036         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2037         if ((DirPropFlag(dirProps[lastCcPos]) & MASK_ISO) != 0) /* after an isolate */
   2038             return;
   2039         if (NoOverride(embeddingLevel) > NoOverride(contextLevel))  /* not a PDF */
   2040             contextLevel = embeddingLevel;
   2041         pLastIsoRun.limit = pLastIsoRun.start;
   2042         pLastIsoRun.level = embeddingLevel;
   2043         pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(contextLevel & 1);
   2044         pLastIsoRun.contextPos = lastCcPos;
   2045     }
   2046 
   2047     /* LRI or RLI */
   2048     private void bracketProcessLRI_RLI(BracketData bd, byte level) {
   2049         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2050         short lastLimit;
   2051         pLastIsoRun.lastBase = ON;
   2052         lastLimit = pLastIsoRun.limit;
   2053         bd.isoRunLast++;
   2054         pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2055         if (pLastIsoRun == null)
   2056             pLastIsoRun = bd.isoRuns[bd.isoRunLast] = new IsoRun();
   2057         pLastIsoRun.start = pLastIsoRun.limit = lastLimit;
   2058         pLastIsoRun.level = level;
   2059         pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(level & 1);
   2060         pLastIsoRun.contextPos = 0;
   2061     }
   2062 
   2063     /* PDI */
   2064     private void bracketProcessPDI(BracketData bd) {
   2065         IsoRun pLastIsoRun;
   2066         bd.isoRunLast--;
   2067         pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2068         pLastIsoRun.lastBase = ON;
   2069     }
   2070 
   2071     /* newly found opening bracket: create an openings entry */
   2072     private void bracketAddOpening(BracketData bd, char match, int position) {
   2073         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2074         Opening pOpening;
   2075         if (pLastIsoRun.limit >= bd.openings.length) {  /* no available new entry */
   2076             Opening[] saveOpenings = bd.openings;
   2077             int count;
   2078             try {
   2079                 count = bd.openings.length;
   2080                 bd.openings = new Opening[count * 2];
   2081             } catch (Exception e) {
   2082                 throw new OutOfMemoryError("Failed to allocate memory for openings");
   2083             }
   2084             System.arraycopy(saveOpenings, 0, bd.openings, 0, count);
   2085         }
   2086         pOpening = bd.openings[pLastIsoRun.limit];
   2087         if (pOpening == null)
   2088             pOpening = bd.openings[pLastIsoRun.limit]= new Opening();
   2089         pOpening.position = position;
   2090         pOpening.match = match;
   2091         pOpening.contextDir = pLastIsoRun.contextDir;
   2092         pOpening.contextPos = pLastIsoRun.contextPos;
   2093         pOpening.flags = 0;
   2094         pLastIsoRun.limit++;
   2095     }
   2096 
   2097     /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
   2098     private void fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp) {
   2099         /* This function calls itself recursively */
   2100         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2101         Opening qOpening;
   2102         int k, openingPosition, closingPosition;
   2103         for (k = openingIndex+1; k < pLastIsoRun.limit; k++) {
   2104             qOpening = bd.openings[k];
   2105             if (qOpening.match >= 0)    /* not an N0c match */
   2106                 continue;
   2107             if (newPropPosition < qOpening.contextPos)
   2108                 break;
   2109             if (newPropPosition >= qOpening.position)
   2110                 continue;
   2111             if (newProp == qOpening.contextDir)
   2112                 break;
   2113             openingPosition = qOpening.position;
   2114             dirProps[openingPosition] = newProp;
   2115             closingPosition = -(qOpening.match);
   2116             dirProps[closingPosition] = newProp;
   2117             qOpening.match = 0;                                 /* prevent further changes */
   2118             fixN0c(bd, k, openingPosition, newProp);
   2119             fixN0c(bd, k, closingPosition, newProp);
   2120         }
   2121     }
   2122 
   2123     /* process closing bracket; return L or R if N0b or N0c, ON if N0d */
   2124     private byte bracketProcessClosing(BracketData bd, int openIdx, int position) {
   2125         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2126         Opening pOpening, qOpening;
   2127         byte direction;
   2128         boolean stable;
   2129         byte newProp;
   2130         pOpening = bd.openings[openIdx];
   2131         direction = (byte)(pLastIsoRun.level & 1);
   2132         stable = true;          /* assume stable until proved otherwise */
   2133 
   2134         /* The stable flag is set when brackets are paired and their
   2135            level is resolved and cannot be changed by what will be
   2136            found later in the source string.
   2137            An unstable match can occur only when applying N0c, where
   2138            the resolved level depends on the preceding context, and
   2139            this context may be affected by text occurring later.
   2140            Example: RTL paragraph containing:  abc[(latin) HEBREW]
   2141            When the closing parenthesis is encountered, it appears
   2142            that N0c1 must be applied since 'abc' sets an opposite
   2143            direction context and both parentheses receive level 2.
   2144            However, when the closing square bracket is processed,
   2145            N0b applies because of 'HEBREW' being included within the
   2146            brackets, thus the square brackets are treated like R and
   2147            receive level 1. However, this changes the preceding
   2148            context of the opening parenthesis, and it now appears
   2149            that N0c2 must be applied to the parentheses rather than
   2150            N0c1. */
   2151 
   2152             if ((direction == 0 && (pOpening.flags & FOUND_L) > 0) ||
   2153                 (direction == 1 && (pOpening.flags & FOUND_R) > 0)) {   /* N0b */
   2154                 newProp = direction;
   2155             }
   2156             else if ((pOpening.flags & (FOUND_L | FOUND_R)) != 0) {     /* N0c */
   2157                     /* it is stable if there is no preceding text or in
   2158                        conditions too complicated and not worth checking */
   2159                     stable = (openIdx == pLastIsoRun.start);
   2160                 if (direction != pOpening.contextDir)
   2161                     newProp = pOpening.contextDir;                      /* N0c1 */
   2162                 else
   2163                     newProp = direction;                                /* N0c2 */
   2164             } else {
   2165             /* forget this and any brackets nested within this pair */
   2166             pLastIsoRun.limit = (short)openIdx;
   2167             return ON;                                                  /* N0d */
   2168         }
   2169         dirProps[pOpening.position] = newProp;
   2170         dirProps[position] = newProp;
   2171         /* Update nested N0c pairs that may be affected */
   2172         fixN0c(bd, openIdx, pOpening.position, newProp);
   2173         if (stable) {
   2174             pLastIsoRun.limit = (short)openIdx; /* forget any brackets nested within this pair */
   2175             /* remove lower located synonyms if any */
   2176             while (pLastIsoRun.limit > pLastIsoRun.start &&
   2177                    bd.openings[pLastIsoRun.limit - 1].position == pOpening.position)
   2178                 pLastIsoRun.limit--;
   2179         } else {
   2180             int k;
   2181             pOpening.match = -position;
   2182             /* neutralize lower located synonyms if any */
   2183             k = openIdx - 1;
   2184             while (k >= pLastIsoRun.start &&
   2185                    bd.openings[k].position == pOpening.position)
   2186                 bd.openings[k--].match = 0;
   2187             /* neutralize any unmatched opening between the current pair;
   2188                this will also neutralize higher located synonyms if any */
   2189             for (k = openIdx + 1; k < pLastIsoRun.limit; k++) {
   2190                 qOpening =bd.openings[k];
   2191                 if (qOpening.position >= position)
   2192                     break;
   2193                 if (qOpening.match > 0)
   2194                     qOpening.match = 0;
   2195             }
   2196         }
   2197         return newProp;
   2198     }
   2199 
   2200     /* handle strong characters, digits and candidates for closing brackets */
   2201     private void bracketProcessChar(BracketData bd, int position) {
   2202         IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
   2203         byte dirProp, newProp;
   2204         byte level;
   2205         dirProp = dirProps[position];
   2206         if (dirProp == ON) {
   2207             char c, match;
   2208             int idx;
   2209             /* First see if it is a matching closing bracket. Hopefully, this is
   2210                more efficient than checking if it is a closing bracket at all */
   2211             c = text[position];
   2212             for (idx = pLastIsoRun.limit - 1; idx >= pLastIsoRun.start; idx--) {
   2213                 if (bd.openings[idx].match != c)
   2214                     continue;
   2215                 /* We have a match */
   2216                 newProp = bracketProcessClosing(bd, idx, position);
   2217                 if(newProp == ON) {         /* N0d */
   2218                     c = 0;          /* prevent handling as an opening */
   2219                     break;
   2220                 }
   2221                 pLastIsoRun.lastBase = ON;
   2222                 pLastIsoRun.contextDir = newProp;
   2223                 pLastIsoRun.contextPos = position;
   2224                 level = levels[position];
   2225                 if ((level & LEVEL_OVERRIDE) != 0) {    /* X4, X5 */
   2226                     short flag;
   2227                     int i;
   2228                     newProp = (byte)(level & 1);
   2229                     pLastIsoRun.lastStrong = newProp;
   2230                     flag = (short)DirPropFlag(newProp);
   2231                     for (i = pLastIsoRun.start; i < idx; i++)
   2232                         bd.openings[i].flags |= flag;
   2233                     /* matching brackets are not overridden by LRO/RLO */
   2234                     levels[position] &= ~LEVEL_OVERRIDE;
   2235                 }
   2236                 /* matching brackets are not overridden by LRO/RLO */
   2237                 levels[bd.openings[idx].position] &= ~LEVEL_OVERRIDE;
   2238                 return;
   2239             }
   2240             /* We get here only if the ON character is not a matching closing
   2241                bracket or it is a case of N0d */
   2242             /* Now see if it is an opening bracket */
   2243             if (c != 0)
   2244                 match = (char)UCharacter.getBidiPairedBracket(c); /* get the matching char */
   2245             else
   2246                 match = 0;
   2247             if (match != c &&               /* has a matching char */
   2248                 UCharacter.getIntPropertyValue(c, UProperty.BIDI_PAIRED_BRACKET_TYPE) ==
   2249                     /* opening bracket */         UCharacter.BidiPairedBracketType.OPEN) {
   2250                 /* special case: process synonyms
   2251                    create an opening entry for each synonym */
   2252                 if (match == 0x232A) {      /* RIGHT-POINTING ANGLE BRACKET */
   2253                     bracketAddOpening(bd, (char)0x3009, position);
   2254                 }
   2255                 else if (match == 0x3009) { /* RIGHT ANGLE BRACKET */
   2256                     bracketAddOpening(bd, (char)0x232A, position);
   2257                 }
   2258                 bracketAddOpening(bd, match, position);
   2259             }
   2260         }
   2261         level = levels[position];
   2262         if ((level & LEVEL_OVERRIDE) != 0) {    /* X4, X5 */
   2263             newProp = (byte)(level & 1);
   2264             if (dirProp != S && dirProp != WS && dirProp != ON)
   2265                 dirProps[position] = newProp;
   2266             pLastIsoRun.lastBase = newProp;
   2267             pLastIsoRun.lastStrong = newProp;
   2268             pLastIsoRun.contextDir = newProp;
   2269             pLastIsoRun.contextPos = position;
   2270         }
   2271         else if (dirProp <= R || dirProp == AL) {
   2272             newProp = DirFromStrong(dirProp);
   2273             pLastIsoRun.lastBase = dirProp;
   2274             pLastIsoRun.lastStrong = dirProp;
   2275             pLastIsoRun.contextDir = newProp;
   2276             pLastIsoRun.contextPos = position;
   2277         }
   2278         else if(dirProp == EN) {
   2279             pLastIsoRun.lastBase = EN;
   2280             if (pLastIsoRun.lastStrong == L) {
   2281                 newProp = L;                    /* W7 */
   2282                 if (!bd.isNumbersSpecial)
   2283                     dirProps[position] = ENL;
   2284                 pLastIsoRun.contextDir = L;
   2285                 pLastIsoRun.contextPos = position;
   2286             }
   2287             else {
   2288                 newProp = R;                    /* N0 */
   2289                 if (pLastIsoRun.lastStrong == AL)
   2290                     dirProps[position] = AN;    /* W2 */
   2291                 else
   2292                     dirProps[position] = ENR;
   2293                 pLastIsoRun.contextDir = R;
   2294                 pLastIsoRun.contextPos = position;
   2295             }
   2296         }
   2297         else if (dirProp == AN) {
   2298             newProp = R;                        /* N0 */
   2299             pLastIsoRun.lastBase = AN;
   2300             pLastIsoRun.contextDir = R;
   2301             pLastIsoRun.contextPos = position;
   2302         }
   2303         else if (dirProp == NSM) {
   2304             /* if the last real char was ON, change NSM to ON so that it
   2305                will stay ON even if the last real char is a bracket which
   2306                may be changed to L or R */
   2307             newProp = pLastIsoRun.lastBase;
   2308             if (newProp == ON)
   2309                 dirProps[position] = newProp;
   2310         }
   2311         else {
   2312             newProp = dirProp;
   2313             pLastIsoRun.lastBase = dirProp;
   2314         }
   2315         if (newProp <= R || newProp == AL) {
   2316             int i;
   2317             short flag = (short)DirPropFlag(DirFromStrong(newProp));
   2318             for (i = pLastIsoRun.start; i < pLastIsoRun.limit; i++)
   2319                 if (position > bd.openings[i].position)
   2320                     bd.openings[i].flags |= flag;
   2321         }
   2322     }
   2323 
   2324     /* perform (X1)..(X9) ------------------------------------------------------- */
   2325 
   2326     /* determine if the text is mixed-directional or single-directional */
   2327     private byte directionFromFlags() {
   2328         /* if the text contains AN and neutrals, then some neutrals may become RTL */
   2329         if (!((flags & MASK_RTL) != 0 ||
   2330               ((flags & DirPropFlag(AN)) != 0 &&
   2331                (flags & MASK_POSSIBLE_N) != 0))) {
   2332             return LTR;
   2333         } else if ((flags & MASK_LTR) == 0) {
   2334             return RTL;
   2335         } else {
   2336             return MIXED;
   2337         }
   2338     }
   2339 
   2340     /*
   2341  * Resolve the explicit levels as specified by explicit embedding codes.
   2342  * Recalculate the flags to have them reflect the real properties
   2343  * after taking the explicit embeddings into account.
   2344  *
   2345  * The BiDi algorithm is designed to result in the same behavior whether embedding
   2346  * levels are externally specified (from "styled text", supposedly the preferred
   2347  * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
   2348  * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
   2349  * However, in a real implementation, the removal of these codes and their index
   2350  * positions in the plain text is undesirable since it would result in
   2351  * reallocated, reindexed text.
   2352  * Instead, this implementation leaves the codes in there and just ignores them
   2353  * in the subsequent processing.
   2354  * In order to get the same reordering behavior, positions with a BN or a not-isolate
   2355  * explicit embedding code just get the same level assigned as the last "real"
   2356  * character.
   2357  *
   2358  * Some implementations, not this one, then overwrite some of these
   2359  * directionality properties at "real" same-level-run boundaries by
   2360  * L or R codes so that the resolution of weak types can be performed on the
   2361  * entire paragraph at once instead of having to parse it once more and
   2362  * perform that resolution on same-level-runs.
   2363  * This limits the scope of the implicit rules in effectively
   2364  * the same way as the run limits.
   2365  *
   2366  * Instead, this implementation does not modify these codes, except for
   2367  * paired brackets whose properties (ON) may be replaced by L or R.
   2368  * On one hand, the paragraph has to be scanned for same-level-runs, but
   2369  * on the other hand, this saves another loop to reset these codes,
   2370  * or saves making and modifying a copy of dirProps[].
   2371  *
   2372  *
   2373  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
   2374  *
   2375  *
   2376  * Handling the stack of explicit levels (Xn):
   2377  *
   2378  * With the BiDi stack of explicit levels, as pushed with each
   2379  * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
   2380  * the explicit level must never exceed MAX_EXPLICIT_LEVEL.
   2381  *
   2382  * In order to have a correct push-pop semantics even in the case of overflows,
   2383  * overflow counters and a valid isolate counter are used as described in UAX#9
   2384  * section 3.3.2 "Explicit Levels and Directions".
   2385  *
   2386  * This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
   2387  *
   2388  * Returns the direction
   2389  *
   2390  */
   2391     private byte resolveExplicitLevels() {
   2392         int i = 0;
   2393         byte dirProp;
   2394         byte level = GetParaLevelAt(0);
   2395         byte dirct;
   2396         isolateCount = 0;
   2397 
   2398         /* determine if the text is mixed-directional or single-directional */
   2399         dirct = directionFromFlags();
   2400 
   2401         /* we may not need to resolve any explicit levels */
   2402         if (dirct != MIXED) {
   2403             /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
   2404             return dirct;
   2405         }
   2406         if (reorderingMode > REORDER_LAST_LOGICAL_TO_VISUAL) {
   2407             /* inverse BiDi: mixed, but all characters are at the same embedding level */
   2408             /* set all levels to the paragraph level */
   2409             int paraIndex, start, limit;
   2410             for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
   2411                 if (paraIndex == 0)
   2412                     start = 0;
   2413                 else
   2414                     start = paras_limit[paraIndex - 1];
   2415                 limit = paras_limit[paraIndex];
   2416                 level = paras_level[paraIndex];
   2417                 for (i = start; i < limit; i++)
   2418                     levels[i] =level;
   2419             }
   2420             return dirct;               /* no bracket matching for inverse BiDi */
   2421         }
   2422         if ((flags & (MASK_EXPLICIT | MASK_ISO)) == 0) {
   2423             /* no embeddings, set all levels to the paragraph level */
   2424             /* we still have to perform bracket matching */
   2425             int paraIndex, start, limit;
   2426             BracketData bracketData = new BracketData();
   2427             bracketInit(bracketData);
   2428             for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
   2429                 if (paraIndex == 0)
   2430                     start = 0;
   2431                 else
   2432                     start = paras_limit[paraIndex-1];
   2433                 limit = paras_limit[paraIndex];
   2434                 level = paras_level[paraIndex];
   2435                 for (i = start; i < limit; i++) {
   2436                     levels[i] = level;
   2437                     dirProp = dirProps[i];
   2438                     if (dirProp == BN)
   2439                         continue;
   2440                     if (dirProp == B) {
   2441                         if ((i + 1) < length) {
   2442                             if (text[i] == CR && text[i + 1] == LF)
   2443                                 continue;   /* skip CR when followed by LF */
   2444                             bracketProcessB(bracketData, level);
   2445                         }
   2446                         continue;
   2447                     }
   2448                     bracketProcessChar(bracketData, i);
   2449                 }
   2450             }
   2451             return dirct;
   2452         }
   2453         /* continue to perform (Xn) */
   2454 
   2455         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
   2456         /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
   2457         byte embeddingLevel = level, newLevel;
   2458         byte previousLevel = level; /* previous level for regular (not CC) characters */
   2459         int lastCcPos = 0;          /* index of last effective LRx,RLx, PDx */
   2460 
   2461         /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
   2462            stackLast points to its current entry. */
   2463         short[] stack = new short[MAX_EXPLICIT_LEVEL + 2];  /* we never push anything >= MAX_EXPLICIT_LEVEL
   2464                                                                but we need one more entry as base */
   2465         int stackLast = 0;
   2466         int overflowIsolateCount = 0;
   2467         int overflowEmbeddingCount = 0;
   2468         int validIsolateCount = 0;
   2469         BracketData bracketData = new BracketData();
   2470         bracketInit(bracketData);
   2471         stack[0] = level;       /* initialize base entry to para level, no override, no isolate */
   2472 
   2473         /* recalculate the flags */
   2474         flags = 0;
   2475 
   2476         for (i = 0; i < length; i++) {
   2477             dirProp = dirProps[i];
   2478             switch (dirProp) {
   2479             case LRE:
   2480             case RLE:
   2481             case LRO:
   2482             case RLO:
   2483                 /* (X2, X3, X4, X5) */
   2484                 flags |= DirPropFlag(BN);
   2485                 levels[i] = previousLevel;
   2486                 if (dirProp == LRE || dirProp == LRO)
   2487                     /* least greater even level */
   2488                     newLevel = (byte)((embeddingLevel+2) & ~(LEVEL_OVERRIDE | 1));
   2489                 else
   2490                     /* least greater odd level */
   2491                     newLevel = (byte)((NoOverride(embeddingLevel) + 1) | 1);
   2492                 if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 &&
   2493                                                       overflowEmbeddingCount == 0) {
   2494                     lastCcPos = i;
   2495                     embeddingLevel = newLevel;
   2496                     if (dirProp == LRO || dirProp == RLO)
   2497                         embeddingLevel |= LEVEL_OVERRIDE;
   2498                     stackLast++;
   2499                     stack[stackLast] = embeddingLevel;
   2500                     /* we don't need to set LEVEL_OVERRIDE off for LRE and RLE
   2501                        since this has already been done for newLevel which is
   2502                        the source for embeddingLevel.
   2503                      */
   2504                 } else {
   2505                     if (overflowIsolateCount == 0)
   2506                         overflowEmbeddingCount++;
   2507                 }
   2508                 break;
   2509             case PDF:
   2510                 /* (X7) */
   2511                 flags |= DirPropFlag(BN);
   2512                 levels[i] = previousLevel;
   2513                 /* handle all the overflow cases first */
   2514                 if (overflowIsolateCount > 0) {
   2515                     break;
   2516                 }
   2517                 if (overflowEmbeddingCount > 0) {
   2518                     overflowEmbeddingCount--;
   2519                     break;
   2520                 }
   2521                 if (stackLast > 0 && stack[stackLast] < ISOLATE) {   /* not an isolate entry */
   2522                     lastCcPos = i;
   2523                     stackLast--;
   2524                     embeddingLevel = (byte)stack[stackLast];
   2525                 }
   2526                 break;
   2527             case LRI:
   2528             case RLI:
   2529                 flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
   2530                 levels[i] = NoOverride(embeddingLevel);
   2531                 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
   2532                     bracketProcessBoundary(bracketData, lastCcPos,
   2533                                            previousLevel, embeddingLevel);
   2534                     flags |= DirPropFlagMultiRuns;
   2535                 }
   2536                 previousLevel = embeddingLevel;
   2537                 /* (X5a, X5b) */
   2538                 if (dirProp == LRI)
   2539                     /* least greater even level */
   2540                     newLevel=(byte)((embeddingLevel+2)&~(LEVEL_OVERRIDE|1));
   2541                 else
   2542                     /* least greater odd level */
   2543                     newLevel=(byte)((NoOverride(embeddingLevel)+1)|1);
   2544                 if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0
   2545                                                    && overflowEmbeddingCount == 0) {
   2546                     flags |= DirPropFlag(dirProp);
   2547                     lastCcPos = i;
   2548                     validIsolateCount++;
   2549                     if (validIsolateCount > isolateCount)
   2550                         isolateCount = validIsolateCount;
   2551                     embeddingLevel = newLevel;
   2552                     /* we can increment stackLast without checking because newLevel
   2553                        will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
   2554                     stackLast++;
   2555                     stack[stackLast] = (short)(embeddingLevel + ISOLATE);
   2556                     bracketProcessLRI_RLI(bracketData, embeddingLevel);
   2557                 } else {
   2558                     /* make it WS so that it is handled by adjustWSLevels() */
   2559                     dirProps[i] = WS;
   2560                     overflowIsolateCount++;
   2561                 }
   2562                 break;
   2563             case PDI:
   2564                 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
   2565                     bracketProcessBoundary(bracketData, lastCcPos,
   2566                                            previousLevel, embeddingLevel);
   2567                     flags |= DirPropFlagMultiRuns;
   2568                 }
   2569                 /* (X6a) */
   2570                 if (overflowIsolateCount > 0) {
   2571                     overflowIsolateCount--;
   2572                     /* make it WS so that it is handled by adjustWSLevels() */
   2573                     dirProps[i] = WS;
   2574                 }
   2575                 else if (validIsolateCount > 0) {
   2576                     flags |= DirPropFlag(PDI);
   2577                     lastCcPos = i;
   2578                     overflowEmbeddingCount = 0;
   2579                     while (stack[stackLast] < ISOLATE)  /* pop embedding entries */
   2580                         stackLast--;                    /* until the last isolate entry */
   2581                     stackLast--;                        /* pop also the last isolate entry */
   2582                     validIsolateCount--;
   2583                     bracketProcessPDI(bracketData);
   2584                 } else
   2585                     /* make it WS so that it is handled by adjustWSLevels() */
   2586                     dirProps[i] = WS;
   2587                 embeddingLevel = (byte)(stack[stackLast] & ~ISOLATE);
   2588                 flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
   2589                 previousLevel = embeddingLevel;
   2590                 levels[i] = NoOverride(embeddingLevel);
   2591                 break;
   2592             case B:
   2593                 flags |= DirPropFlag(B);
   2594                 levels[i] = GetParaLevelAt(i);
   2595                 if ((i + 1) < length) {
   2596                     if (text[i] == CR && text[i + 1] == LF)
   2597                         break;          /* skip CR when followed by LF */
   2598                     overflowEmbeddingCount = overflowIsolateCount = 0;
   2599                     validIsolateCount = 0;
   2600                     stackLast = 0;
   2601                     previousLevel = embeddingLevel = GetParaLevelAt(i + 1);
   2602                     stack[0] = embeddingLevel;   /* initialize base entry to para level, no override, no isolate */
   2603                     bracketProcessB(bracketData, embeddingLevel);
   2604                 }
   2605                 break;
   2606             case BN:
   2607                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
   2608                 /* they will get their levels set correctly in adjustWSLevels() */
   2609                 levels[i] = previousLevel;
   2610                 flags |= DirPropFlag(BN);
   2611                 break;
   2612             default:
   2613                 /* all other types are normal characters and get the "real" level */
   2614                 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
   2615                     bracketProcessBoundary(bracketData, lastCcPos,
   2616                                            previousLevel, embeddingLevel);
   2617                     flags |= DirPropFlagMultiRuns;
   2618                     if ((embeddingLevel & LEVEL_OVERRIDE) != 0)
   2619                         flags |= DirPropFlagO(embeddingLevel);
   2620                     else
   2621                         flags |= DirPropFlagE(embeddingLevel);
   2622                 }
   2623                 previousLevel = embeddingLevel;
   2624                 levels[i] = embeddingLevel;
   2625                 bracketProcessChar(bracketData, i);
   2626                 /* the dirProp may have been changed in bracketProcessChar() */
   2627                 flags |= DirPropFlag(dirProps[i]);
   2628                 break;
   2629             }
   2630         }
   2631         if ((flags & MASK_EMBEDDING) != 0) {
   2632             flags |= DirPropFlagLR(paraLevel);
   2633         }
   2634         if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
   2635             flags |= DirPropFlag(L);
   2636         }
   2637         /* again, determine if the text is mixed-directional or single-directional */
   2638         dirct = directionFromFlags();
   2639 
   2640         return dirct;
   2641     }
   2642 
   2643     /**
   2644      * Use a pre-specified embedding levels array:
   2645      *
   2646      * <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
   2647      * ignore all explicit codes (X9),
   2648      * and check all the preset levels.
   2649      *
   2650      * <p>Recalculate the flags to have them reflect the real properties
   2651      * after taking the explicit embeddings into account.
   2652      */
   2653     private byte checkExplicitLevels() {
   2654         int isolateCount = 0;
   2655 
   2656         this.flags = 0;     /* collect all directionalities in the text */
   2657         this.isolateCount = 0;
   2658 
   2659         int currentParaIndex = 0;
   2660         int currentParaLimit = paras_limit[0];
   2661         byte currentParaLevel = paraLevel;
   2662 
   2663         for (int i = 0; i < length; ++i) {
   2664             byte level = levels[i];
   2665             byte dirProp = dirProps[i];
   2666             if (dirProp == LRI || dirProp == RLI) {
   2667                 isolateCount++;
   2668                 if (isolateCount > this.isolateCount)
   2669                     this.isolateCount = isolateCount;
   2670             }
   2671             else if (dirProp == PDI)
   2672                 isolateCount--;
   2673             else if (dirProp == B)
   2674                 isolateCount = 0;
   2675 
   2676             // optimized version of  byte currentParaLevel = GetParaLevelAt(i);
   2677             if (defaultParaLevel != 0 &&
   2678                     i == currentParaLimit && (currentParaIndex + 1) < paraCount) {
   2679                 currentParaLevel = paras_level[++currentParaIndex];
   2680                 currentParaLimit = paras_limit[currentParaIndex];
   2681             }
   2682 
   2683             int overrideFlag = level & LEVEL_OVERRIDE;
   2684             level &= ~LEVEL_OVERRIDE;
   2685             if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) {
   2686                 if (level == 0) {
   2687                     if (dirProp == B) {
   2688                         // Paragraph separators are ok with explicit level 0.
   2689                         // Prevents reordering of paragraphs.
   2690                     } else {
   2691                         // Treat explicit level 0 as a wildcard for the paragraph level.
   2692                         // Avoid making the caller guess what the paragraph level would be.
   2693                         level = currentParaLevel;
   2694                         levels[i] = (byte)(level | overrideFlag);
   2695                     }
   2696                 } else {
   2697                     // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level
   2698                     throw new IllegalArgumentException("level " + level +
   2699                                                        " out of bounds at " + i);
   2700                 }
   2701             }
   2702             if (overrideFlag != 0) {
   2703                 /* keep the override flag in levels[i] but adjust the flags */
   2704                 flags |= DirPropFlagO(level);
   2705             } else {
   2706                 /* set the flags */
   2707                 flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
   2708             }
   2709         }
   2710         if ((flags & MASK_EMBEDDING) != 0)
   2711             flags |= DirPropFlagLR(paraLevel);
   2712         /* determine if the text is mixed-directional or single-directional */
   2713         return directionFromFlags();
   2714     }
   2715 
   2716     /*********************************************************************/
   2717     /* The Properties state machine table                                */
   2718     /*********************************************************************/
   2719     /*                                                                   */
   2720     /* All table cells are 8 bits:                                       */
   2721     /*      bits 0..4:  next state                                       */
   2722     /*      bits 5..7:  action to perform (if > 0)                       */
   2723     /*                                                                   */
   2724     /* Cells may be of format "n" where n represents the next state      */
   2725     /* (except for the rightmost column).                                */
   2726     /* Cells may also be of format "_(x,y)" where x represents an action */
   2727     /* to perform and y represents the next state.                       */
   2728     /*                                                                   */
   2729     /*********************************************************************/
   2730     /* Definitions and type for properties state tables                  */
   2731     /*********************************************************************/
   2732     private static final int IMPTABPROPS_COLUMNS = 16;
   2733     private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
   2734     private static short GetStateProps(short cell) {
   2735         return (short)(cell & 0x1f);
   2736     }
   2737     private static short GetActionProps(short cell) {
   2738         return (short)(cell >> 5);
   2739     }
   2740 
   2741     private static final short groupProp[] =          /* dirProp regrouped */
   2742     {
   2743         /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
   2744             0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
   2745     };
   2746     private static final short _L  = 0;
   2747     private static final short _R  = 1;
   2748     private static final short _EN = 2;
   2749     private static final short _AN = 3;
   2750     private static final short _ON = 4;
   2751     private static final short _S  = 5;
   2752     private static final short _B  = 6; /* reduced dirProp */
   2753 
   2754     /*********************************************************************/
   2755     /*                                                                   */
   2756     /*      PROPERTIES  STATE  TABLE                                     */
   2757     /*                                                                   */
   2758     /* In table impTabProps,                                             */
   2759     /*      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI    */
   2760     /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
   2761     /*      - the Res column is the reduced property assigned to a run   */
   2762     /*                                                                   */
   2763     /* Action 1: process current run1, init new run1                     */
   2764     /*        2: init new run2                                           */
   2765     /*        3: process run1, process run2, init new run1               */
   2766     /*        4: process run1, set run1=run2, init new run2              */
   2767     /*                                                                   */
   2768     /* Notes:                                                            */
   2769     /*  1) This table is used in resolveImplicitLevels().                */
   2770     /*  2) This table triggers actions when there is a change in the Bidi*/
   2771     /*     property of incoming characters (action 1).                   */
   2772     /*  3) Most such property sequences are processed immediately (in    */
   2773     /*     fact, passed to processPropertySeq().                         */
   2774     /*  4) However, numbers are assembled as one sequence. This means    */
   2775     /*     that undefined situations (like CS following digits, until    */
   2776     /*     it is known if the next char will be a digit) are held until  */
   2777     /*     following chars define them.                                  */
   2778     /*     Example: digits followed by CS, then comes another CS or ON;  */
   2779     /*              the digits will be processed, then the CS assigned   */
   2780     /*              as the start of an ON sequence (action 3).           */
   2781     /*  5) There are cases where more than one sequence must be          */
   2782     /*     processed, for instance digits followed by CS followed by L:  */
   2783     /*     the digits must be processed as one sequence, and the CS      */
   2784     /*     must be processed as an ON sequence, all this before starting */
   2785     /*     assembling chars for the opening L sequence.                  */
   2786     /*                                                                   */
   2787     /*                                                                   */
   2788     private static final short impTabProps[][] =
   2789     {
   2790 /*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,   ENL,   ENR,   Res */
   2791 /* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,    18,    21,   _ON },
   2792 /* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3, 32+18, 32+21,    _L },
   2793 /* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3, 32+18, 32+21,    _R },
   2794 /* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3, 32+18, 32+21,    _R },
   2795 /* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,    18,    21,   _EN },
   2796 /* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3, 32+18, 32+21,   _AN },
   2797 /* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,    18,    21,   _AN },
   2798 /* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3, 32+18, 32+21,   _ON },
   2799 /* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3, 32+18, 32+21,   _ON },
   2800 /* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,    18,    21,   _ON },
   2801 /*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,    18,    21,   _EN },
   2802 /*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,    18,    21,   _EN },
   2803 /*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3, 96+18, 96+21,   _AN },
   2804 /*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,    18,    21,   _AN },
   2805 /*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,128+18,128+21,   _ON },
   2806 /*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3, 32+18, 32+21,    _S },
   2807 /*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3, 32+18, 32+21,    _S },
   2808 /*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3, 32+18, 32+21,    _B },
   2809 /*18 ENL         */ {  32+1,  32+2,    18,  32+5,  32+7, 32+15, 32+17, 64+19,    20, 64+19,    18,    18,  32+3,    18,    21,    _L },
   2810 /*19 ENL+ES/CS   */ {  96+1,  96+2,    18,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    19, 128+7,  96+3,    18,    21,    _L },
   2811 /*20 ENL+ET      */ {  32+1,  32+2,    18,  32+5,  32+7, 32+15, 32+17,  32+7,    20,  32+7,    20,    20,  32+3,    18,    21,    _L },
   2812 /*21 ENR         */ {  32+1,  32+2,    21,  32+5,  32+7, 32+15, 32+17, 64+22,    23, 64+22,    21,    21,  32+3,    18,    21,   _AN },
   2813 /*22 ENR+ES/CS   */ {  96+1,  96+2,    21,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    22, 128+7,  96+3,    18,    21,   _AN },
   2814 /*23 ENR+ET      */ {  32+1,  32+2,    21,  32+5,  32+7, 32+15, 32+17,  32+7,    23,  32+7,    23,    23,  32+3,    18,    21,   _AN }
   2815     };
   2816 
   2817     /*********************************************************************/
   2818     /* The levels state machine tables                                   */
   2819     /*********************************************************************/
   2820     /*                                                                   */
   2821     /* All table cells are 8 bits:                                       */
   2822     /*      bits 0..3:  next state                                       */
   2823     /*      bits 4..7:  action to perform (if > 0)                       */
   2824     /*                                                                   */
   2825     /* Cells may be of format "n" where n represents the next state      */
   2826     /* (except for the rightmost column).                                */
   2827     /* Cells may also be of format "_(x,y)" where x represents an action */
   2828     /* to perform and y represents the next state.                       */
   2829     /*                                                                   */
   2830     /* This format limits each table to 16 states each and to 15 actions.*/
   2831     /*                                                                   */
   2832     /*********************************************************************/
   2833     /* Definitions and type for levels state tables                      */
   2834     /*********************************************************************/
   2835     private static final int IMPTABLEVELS_COLUMNS = _B + 2;
   2836     private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
   2837     private static short GetState(byte cell) { return (short)(cell & 0x0f); }
   2838     private static short GetAction(byte cell) { return (short)(cell >> 4); }
   2839 
   2840     private static class ImpTabPair {
   2841         byte[][][] imptab;
   2842         short[][] impact;
   2843 
   2844         ImpTabPair(byte[][] table1, byte[][] table2,
   2845                    short[] act1, short[] act2) {
   2846             imptab = new byte[][][] {table1, table2};
   2847             impact = new short[][] {act1, act2};
   2848         }
   2849     }
   2850 
   2851     /*********************************************************************/
   2852     /*                                                                   */
   2853     /*      LEVELS  STATE  TABLES                                        */
   2854     /*                                                                   */
   2855     /* In all levels state tables,                                       */
   2856     /*      - state 0 is the initial state                               */
   2857     /*      - the Res column is the increment to add to the text level   */
   2858     /*        for this property sequence.                                */
   2859     /*                                                                   */
   2860     /* The impact arrays for each table of a pair map the local action   */
   2861     /* numbers of the table to the total list of actions. For instance,  */
   2862     /* action 2 in a given table corresponds to the action number which  */
   2863     /* appears in entry [2] of the impact array for that table.          */
   2864     /* The first entry of all impact arrays must be 0.                   */
   2865     /*                                                                   */
   2866     /* Action 1: init conditional sequence                               */
   2867     /*        2: prepend conditional sequence to current sequence        */
   2868     /*        3: set ON sequence to new level - 1                        */
   2869     /*        4: init EN/AN/ON sequence                                  */
   2870     /*        5: fix EN/AN/ON sequence followed by R                     */
   2871     /*        6: set previous level sequence to level 2                  */
   2872     /*                                                                   */
   2873     /* Notes:                                                            */
   2874     /*  1) These tables are used in processPropertySeq(). The input      */
   2875     /*     is property sequences as determined by resolveImplicitLevels. */
   2876     /*  2) Most such property sequences are processed immediately        */
   2877     /*     (levels are assigned).                                        */
   2878     /*  3) However, some sequences cannot be assigned a final level till */
   2879     /*     one or more following sequences are received. For instance,   */
   2880     /*     ON following an R sequence within an even-level paragraph.    */
   2881     /*     If the following sequence is R, the ON sequence will be       */
   2882     /*     assigned basic run level+1, and so will the R sequence.       */
   2883     /*  4) S is generally handled like ON, since its level will be fixed */
   2884     /*     to paragraph level in adjustWSLevels().                       */
   2885     /*                                                                   */
   2886 
   2887     private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
   2888         /*  In this table, conditional sequences receive the lower possible level
   2889             until proven otherwise.
   2890         */
   2891     {
   2892         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2893         /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
   2894         /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
   2895         /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
   2896         /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
   2897         /* 4 : R+ON       */ {     0,  0x21,  0x33,  0x33,     4,     4,     0,  0 },
   2898         /* 5 : AN+ON      */ {     0,  0x21,     0,  0x32,     5,     5,     0,  0 }
   2899     };
   2900 
   2901     private static final byte impTabR_DEFAULT[][] = /* Odd  paragraph level */
   2902         /*  In this table, conditional sequences receive the lower possible level
   2903             until proven otherwise.
   2904         */
   2905     {
   2906         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2907         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
   2908         /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
   2909         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
   2910         /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
   2911         /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
   2912         /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
   2913     };
   2914 
   2915     private static final short[] impAct0 = {0,1,2,3,4};
   2916 
   2917     private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
   2918             impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
   2919 
   2920     private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
   2921         /* In this table, conditional sequences receive the lower possible
   2922            level until proven otherwise.
   2923         */
   2924         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2925         /* 0 : init       */ {     0,     2,  0x11,  0x11,     0,     0,     0,  0 },
   2926         /* 1 : L+EN/AN    */ {     0,  0x42,     1,     1,     0,     0,     0,  0 },
   2927         /* 2 : R          */ {     0,     2,     4,     4,  0x13,  0x13,     0,  1 },
   2928         /* 3 : R+ON       */ {     0,  0x22,  0x34,  0x34,     3,     3,     0,  0 },
   2929         /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
   2930     };
   2931     private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
   2932             impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
   2933 
   2934     private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
   2935         /* In this table, EN/AN+ON sequences receive levels as if associated with R
   2936            until proven that there is L or sor/eor on both sides. AN is handled like EN.
   2937         */
   2938         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2939         /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
   2940         /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
   2941         /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
   2942         /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
   2943         /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
   2944         /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
   2945     };
   2946     private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
   2947         /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   2948             until proven that there is L on both sides. AN is handled like EN.
   2949         */
   2950         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2951         /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
   2952         /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
   2953         /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
   2954         /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
   2955         /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
   2956     };
   2957     private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
   2958             ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
   2959                        impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
   2960 
   2961     private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
   2962         /* This table is identical to the Default LTR table except that EN and AN
   2963            are handled like L.
   2964         */
   2965         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2966         /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
   2967         /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
   2968         /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
   2969         /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
   2970         /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
   2971         /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
   2972     };
   2973     private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
   2974         /* This table is identical to the Default RTL table except that EN and AN
   2975            are handled like L.
   2976         */
   2977         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2978         /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
   2979         /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
   2980         /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
   2981         /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
   2982         /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
   2983         /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
   2984     };
   2985     private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
   2986             (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
   2987              impAct0, impAct0);
   2988 
   2989     private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = {  /* Odd  paragraph level */
   2990         /*  In this table, conditional sequences receive the lower possible level
   2991             until proven otherwise.
   2992         */
   2993         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   2994         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
   2995         /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
   2996         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
   2997         /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
   2998         /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
   2999         /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
   3000         /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
   3001     };
   3002     private static final short[] impAct1 = {0,1,13,14};
   3003     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
   3004             impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
   3005 
   3006     private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
   3007         /* The case handled in this table is (visually):  R EN L
   3008          */
   3009         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   3010         /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
   3011         /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
   3012         /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
   3013         /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
   3014         /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
   3015         /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
   3016         /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
   3017     };
   3018     private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
   3019         /* The cases handled in this table are (visually):  R EN L
   3020                                                             R L AN L
   3021         */
   3022         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   3023         /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
   3024         /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
   3025         /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
   3026         /* 3 : L          */ {     3,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
   3027         /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
   3028         /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
   3029         /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
   3030     };
   3031     private static final short[] impAct2 = {0,1,2,5,6,7,8};
   3032     private static final short[] impAct3 = {0,1,9,10,11,12};
   3033     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
   3034             new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
   3035                            impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
   3036 
   3037     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
   3038             impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
   3039 
   3040     private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
   3041         /*  The case handled in this table is (visually):  R EN L
   3042         */
   3043         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
   3044         /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
   3045         /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
   3046         /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
   3047         /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
   3048         /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
   3049     };
   3050     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
   3051             ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
   3052                        impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
   3053 
   3054     private static class LevState {
   3055         byte[][] impTab;                /* level table pointer          */
   3056         short[] impAct;                 /* action map array             */
   3057         int startON;                    /* start of ON sequence         */
   3058         int startL2EN;                  /* start of level 2 sequence    */
   3059         int lastStrongRTL;              /* index of last found R or AL  */
   3060         int runStart;                   /* start position of the run    */
   3061         short state;                    /* current state                */
   3062         byte runLevel;                  /* run level before implicit solving */
   3063     }
   3064 
   3065     /*------------------------------------------------------------------------*/
   3066 
   3067     static final int FIRSTALLOC = 10;
   3068     /*
   3069      *  param pos:     position where to insert
   3070      *  param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
   3071      */
   3072     private void addPoint(int pos, int flag)
   3073     {
   3074         Point point = new Point();
   3075 
   3076         int len = insertPoints.points.length;
   3077         if (len == 0) {
   3078             insertPoints.points = new Point[FIRSTALLOC];
   3079             len = FIRSTALLOC;
   3080         }
   3081         if (insertPoints.size >= len) { /* no room for new point */
   3082             Point[] savePoints = insertPoints.points;
   3083             insertPoints.points = new Point[len * 2];
   3084             System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
   3085         }
   3086         point.pos = pos;
   3087         point.flag = flag;
   3088         insertPoints.points[insertPoints.size] = point;
   3089         insertPoints.size++;
   3090     }
   3091 
   3092     private void setLevelsOutsideIsolates(int start, int limit, byte level)
   3093     {
   3094         byte dirProp;
   3095         int  isolateCount = 0, k;
   3096         for (k = start; k < limit; k++) {
   3097             dirProp = dirProps[k];
   3098             if (dirProp == PDI)
   3099                 isolateCount--;
   3100             if (isolateCount == 0)
   3101                 levels[k] = level;
   3102             if (dirProp == LRI || dirProp == RLI)
   3103                 isolateCount++;
   3104         }
   3105     }
   3106 
   3107     /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
   3108 
   3109     /*
   3110      * This implementation of the (Wn) rules applies all rules in one pass.
   3111      * In order to do so, it needs a look-ahead of typically 1 character
   3112      * (except for W5: sequences of ET) and keeps track of changes
   3113      * in a rule Wp that affect a later Wq (p<q).
   3114      *
   3115      * The (Nn) and (In) rules are also performed in that same single loop,
   3116      * but effectively one iteration behind for white space.
   3117      *
   3118      * Since all implicit rules are performed in one step, it is not necessary
   3119      * to actually store the intermediate directional properties in dirProps[].
   3120      */
   3121 
   3122     private void processPropertySeq(LevState levState, short _prop,
   3123             int start, int limit) {
   3124         byte cell;
   3125         byte[][] impTab = levState.impTab;
   3126         short[] impAct = levState.impAct;
   3127         short oldStateSeq,actionSeq;
   3128         byte level, addLevel;
   3129         int start0, k;
   3130 
   3131         start0 = start;                 /* save original start position */
   3132         oldStateSeq = levState.state;
   3133         cell = impTab[oldStateSeq][_prop];
   3134         levState.state = GetState(cell);        /* isolate the new state */
   3135         actionSeq = impAct[GetAction(cell)];    /* isolate the action */
   3136         addLevel = impTab[levState.state][IMPTABLEVELS_RES];
   3137 
   3138         if (actionSeq != 0) {
   3139             switch (actionSeq) {
   3140             case 1:                     /* init ON seq */
   3141                 levState.startON = start0;
   3142                 break;
   3143 
   3144             case 2:                     /* prepend ON seq to current seq */
   3145                 start = levState.startON;
   3146                 break;
   3147 
   3148             case 3:                     /* EN/AN after R+ON */
   3149                 level = (byte)(levState.runLevel + 1);
   3150                 setLevelsOutsideIsolates(levState.startON, start0, level);
   3151                 break;
   3152 
   3153             case 4:                     /* EN/AN before R for NUMBERS_SPECIAL */
   3154                 level = (byte)(levState.runLevel + 2);
   3155                 setLevelsOutsideIsolates(levState.startON, start0, level);
   3156                 break;
   3157 
   3158             case 5:                     /* L or S after possible relevant EN/AN */
   3159                 /* check if we had EN after R/AL */
   3160                 if (levState.startL2EN >= 0) {
   3161                     addPoint(levState.startL2EN, LRM_BEFORE);
   3162                 }
   3163                 levState.startL2EN = -1;  /* not within previous if since could also be -2 */
   3164                 /* check if we had any relevant EN/AN after R/AL */
   3165                 if ((insertPoints.points.length == 0) ||
   3166                         (insertPoints.size <= insertPoints.confirmed)) {
   3167                     /* nothing, just clean up */
   3168                     levState.lastStrongRTL = -1;
   3169                     /* check if we have a pending conditional segment */
   3170                     level = impTab[oldStateSeq][IMPTABLEVELS_RES];
   3171                     if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
   3172                         start = levState.startON;   /* reset to basic run level */
   3173                     }
   3174                     if (_prop == _S) {              /* add LRM before S */
   3175                         addPoint(start0, LRM_BEFORE);
   3176                         insertPoints.confirmed = insertPoints.size;
   3177                     }
   3178                     break;
   3179                 }
   3180                 /* reset previous RTL cont to level for LTR text */
   3181                 for (k = levState.lastStrongRTL + 1; k < start0; k++) {
   3182                     /* reset odd level, leave runLevel+2 as is */
   3183                     levels[k] = (byte)((levels[k] - 2) & ~1);
   3184                 }
   3185                 /* mark insert points as confirmed */
   3186                 insertPoints.confirmed = insertPoints.size;
   3187                 levState.lastStrongRTL = -1;
   3188                 if (_prop == _S) {           /* add LRM before S */
   3189                     addPoint(start0, LRM_BEFORE);
   3190                     insertPoints.confirmed = insertPoints.size;
   3191                 }
   3192                 break;
   3193 
   3194             case 6:                     /* R/AL after possible relevant EN/AN */
   3195                 /* just clean up */
   3196                 if (insertPoints.points.length > 0)
   3197                     /* remove all non confirmed insert points */
   3198                     insertPoints.size = insertPoints.confirmed;
   3199                 levState.startON = -1;
   3200                 levState.startL2EN = -1;
   3201                 levState.lastStrongRTL = limit - 1;
   3202                 break;
   3203 
   3204             case 7:                     /* EN/AN after R/AL + possible cont */
   3205                 /* check for real AN */
   3206                 if ((_prop == _AN) && (dirProps[start0] == AN) &&
   3207                 (reorderingMode != REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
   3208                 {
   3209                     /* real AN */
   3210                     if (levState.startL2EN == -1) { /* if no relevant EN already found */
   3211                         /* just note the rightmost digit as a strong RTL */
   3212                         levState.lastStrongRTL = limit - 1;
   3213                         break;
   3214                     }
   3215                     if (levState.startL2EN >= 0)  { /* after EN, no AN */
   3216                         addPoint(levState.startL2EN, LRM_BEFORE);
   3217                         levState.startL2EN = -2;
   3218                     }
   3219                     /* note AN */
   3220                     addPoint(start0, LRM_BEFORE);
   3221                     break;
   3222                 }
   3223                 /* if first EN/AN after R/AL */
   3224                 if (levState.startL2EN == -1) {
   3225                     levState.startL2EN = start0;
   3226                 }
   3227                 break;
   3228 
   3229             case 8:                     /* note location of latest R/AL */
   3230                 levState.lastStrongRTL = limit - 1;
   3231                 levState.startON = -1;
   3232                 break;
   3233 
   3234             case 9:                     /* L after R+ON/EN/AN */
   3235                 /* include possible adjacent number on the left */
   3236                 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
   3237                 }
   3238                 if (k >= 0) {
   3239                     addPoint(k, RLM_BEFORE);    /* add RLM before */
   3240                     insertPoints.confirmed = insertPoints.size; /* confirm it */
   3241                 }
   3242                 levState.startON = start0;
   3243                 break;
   3244 
   3245             case 10:                    /* AN after L */
   3246                 /* AN numbers between L text on both sides may be trouble. */
   3247                 /* tentatively bracket with LRMs; will be confirmed if followed by L */
   3248                 addPoint(start0, LRM_BEFORE);   /* add LRM before */
   3249                 addPoint(start0, LRM_AFTER);    /* add LRM after  */
   3250                 break;
   3251 
   3252             case 11:                    /* R after L+ON/EN/AN */
   3253                 /* false alert, infirm LRMs around previous AN */
   3254                 insertPoints.size=insertPoints.confirmed;
   3255                 if (_prop == _S) {          /* add RLM before S */
   3256                     addPoint(start0, RLM_BEFORE);
   3257                     insertPoints.confirmed = insertPoints.size;
   3258                 }
   3259                 break;
   3260 
   3261             case 12:                    /* L after L+ON/AN */
   3262                 level = (byte)(levState.runLevel + addLevel);
   3263                 for (k=levState.startON; k < start0; k++) {
   3264                     if (levels[k] < level) {
   3265                         levels[k] = level;
   3266                     }
   3267                 }
   3268                 insertPoints.confirmed = insertPoints.size;   /* confirm inserts */
   3269                 levState.startON = start0;
   3270                 break;
   3271 
   3272             case 13:                    /* L after L+ON+EN/AN/ON */
   3273                 level = levState.runLevel;
   3274                 for (k = start0-1; k >= levState.startON; k--) {
   3275                     if (levels[k] == level+3) {
   3276                         while (levels[k] == level+3) {
   3277                             levels[k--] -= 2;
   3278                         }
   3279                         while (levels[k] == level) {
   3280                             k--;
   3281                         }
   3282                     }
   3283                     if (levels[k] == level+2) {
   3284                         levels[k] = level;
   3285                         continue;
   3286                     }
   3287                     levels[k] = (byte)(level+1);
   3288                 }
   3289                 break;
   3290 
   3291             case 14:                    /* R after L+ON+EN/AN/ON */
   3292                 level = (byte)(levState.runLevel+1);
   3293                 for (k = start0-1; k >= levState.startON; k--) {
   3294                     if (levels[k] > level) {
   3295                         levels[k] -= 2;
   3296                     }
   3297                 }
   3298                 break;
   3299 
   3300             default:                        /* we should never get here */
   3301                 throw new IllegalStateException("Internal ICU error in processPropertySeq");
   3302             }
   3303         }
   3304         if ((addLevel) != 0 || (start < start0)) {
   3305             level = (byte)(levState.runLevel + addLevel);
   3306             if (start >= levState.runStart) {
   3307                 for (k = start; k < limit; k++) {
   3308                     levels[k] = level;
   3309                 }
   3310             } else {
   3311                 setLevelsOutsideIsolates(start, limit, level);
   3312             }
   3313         }
   3314     }
   3315 
   3316     /**
   3317      * Returns the directionality of the last strong character at the end of the prologue, if any.
   3318      * Requires prologue!=null.
   3319      */
   3320     private byte lastL_R_AL() {
   3321         for (int i = prologue.length(); i > 0; ) {
   3322             int uchar = prologue.codePointBefore(i);
   3323             i -= Character.charCount(uchar);
   3324             byte dirProp = (byte)getCustomizedClass(uchar);
   3325             if (dirProp == L) {
   3326                 return _L;
   3327             }
   3328             if (dirProp == R || dirProp == AL) {
   3329                 return _R;
   3330             }
   3331             if(dirProp == B) {
   3332                 return _ON;
   3333             }
   3334         }
   3335         return _ON;
   3336     }
   3337 
   3338     /**
   3339      * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
   3340      * Requires epilogue!=null.
   3341      */
   3342     private byte firstL_R_AL_EN_AN() {
   3343         for (int i = 0; i < epilogue.length(); ) {
   3344             int uchar = epilogue.codePointAt(i);
   3345             i += Character.charCount(uchar);
   3346             byte dirProp = (byte)getCustomizedClass(uchar);
   3347             if (dirProp == L) {
   3348                 return _L;
   3349             }
   3350             if (dirProp == R || dirProp == AL) {
   3351                 return _R;
   3352             }
   3353             if (dirProp == EN) {
   3354                 return _EN;
   3355             }
   3356             if (dirProp == AN) {
   3357                 return _AN;
   3358             }
   3359         }
   3360         return _ON;
   3361     }
   3362 
   3363     private void resolveImplicitLevels(int start, int limit, short sor, short eor)
   3364     {
   3365         byte dirProp;
   3366         LevState levState = new LevState();
   3367         int i, start1, start2;
   3368         short oldStateImp, stateImp, actionImp;
   3369         short gprop, resProp, cell;
   3370         boolean inverseRTL;
   3371         short nextStrongProp = R;
   3372         int nextStrongPos = -1;
   3373 
   3374         /* check for RTL inverse Bidi mode */
   3375         /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
   3376          * loop on the text characters from end to start.
   3377          * This would need a different properties state table (at least different
   3378          * actions) and different levels state tables (maybe very similar to the
   3379          * LTR corresponding ones.
   3380          */
   3381         inverseRTL=((start<lastArabicPos) && ((GetParaLevelAt(start) & 1)>0) &&
   3382                     (reorderingMode == REORDER_INVERSE_LIKE_DIRECT  ||
   3383                      reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
   3384         /* initialize for property and levels state table */
   3385         levState.startL2EN = -1;        /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   3386         levState.lastStrongRTL = -1;    /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   3387         levState.runStart = start;
   3388         levState.runLevel = levels[start];
   3389         levState.impTab = impTabPair.imptab[levState.runLevel & 1];
   3390         levState.impAct = impTabPair.impact[levState.runLevel & 1];
   3391         if (start == 0 && prologue != null) {
   3392             byte lastStrong = lastL_R_AL();
   3393             if (lastStrong != _ON) {
   3394                 sor = lastStrong;
   3395             }
   3396         }
   3397         /* The isolates[] entries contain enough information to
   3398            resume the bidi algorithm in the same state as it was
   3399            when it was interrupted by an isolate sequence. */
   3400         if (dirProps[start] == PDI) {
   3401             levState.startON = isolates[isolateCount].startON;
   3402             start1 = isolates[isolateCount].start1;
   3403             stateImp = isolates[isolateCount].stateImp;
   3404             levState.state = isolates[isolateCount].state;
   3405             isolateCount--;
   3406         } else {
   3407             levState.startON = -1;
   3408             start1 = start;
   3409             if (dirProps[start] == NSM)
   3410                 stateImp = (short)(1 + sor);
   3411             else
   3412                 stateImp = 0;
   3413             levState.state = 0;
   3414             processPropertySeq(levState, sor, start, start);
   3415         }
   3416         start2 = start;                 /* to make the Java compiler happy */
   3417 
   3418         for (i = start; i <= limit; i++) {
   3419             if (i >= limit) {
   3420                 int k;
   3421                 for (k = limit - 1;
   3422                      k > start &&
   3423                          (DirPropFlag(dirProps[k]) & MASK_BN_EXPLICIT) != 0;
   3424                      k--);
   3425                 dirProp = dirProps[k];
   3426                 if (dirProp == LRI || dirProp == RLI)
   3427                     break;  /* no forced closing for sequence ending with LRI/RLI */
   3428                 gprop = eor;
   3429             } else {
   3430                 byte prop, prop1;
   3431                 prop = dirProps[i];
   3432                 if (prop == B)
   3433                     isolateCount = -1;  /* current isolates stack entry == none */
   3434                 if (inverseRTL) {
   3435                     if (prop == AL) {
   3436                         /* AL before EN does not make it AN */
   3437                         prop = R;
   3438                     } else if (prop == EN) {
   3439                         if (nextStrongPos <= i) {
   3440                             /* look for next strong char (L/R/AL) */
   3441                             int j;
   3442                             nextStrongProp = R;     /* set default */
   3443                             nextStrongPos = limit;
   3444                             for (j = i+1; j < limit; j++) {
   3445                                 prop1 = dirProps[j];
   3446                                 if (prop1 == L || prop1 == R || prop1 == AL) {
   3447                                     nextStrongProp = prop1;
   3448                                     nextStrongPos = j;
   3449                                     break;
   3450                                 }
   3451                             }
   3452                         }
   3453                         if (nextStrongProp == AL) {
   3454                             prop = AN;
   3455                         }
   3456                     }
   3457                 }
   3458                 gprop = groupProp[prop];
   3459             }
   3460             oldStateImp = stateImp;
   3461             cell = impTabProps[oldStateImp][gprop];
   3462             stateImp = GetStateProps(cell);     /* isolate the new state */
   3463             actionImp = GetActionProps(cell);   /* isolate the action */
   3464             if ((i == limit) && (actionImp == 0)) {
   3465                 /* there is an unprocessed sequence if its property == eor   */
   3466                 actionImp = 1;                  /* process the last sequence */
   3467             }
   3468             if (actionImp != 0) {
   3469                 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
   3470                 switch (actionImp) {
   3471                 case 1:             /* process current seq1, init new seq1 */
   3472                     processPropertySeq(levState, resProp, start1, i);
   3473                     start1 = i;
   3474                     break;
   3475                 case 2:             /* init new seq2 */
   3476                     start2 = i;
   3477                     break;
   3478                 case 3:             /* process seq1, process seq2, init new seq1 */
   3479                     processPropertySeq(levState, resProp, start1, start2);
   3480                     processPropertySeq(levState, _ON, start2, i);
   3481                     start1 = i;
   3482                     break;
   3483                 case 4:             /* process seq1, set seq1=seq2, init new seq2 */
   3484                     processPropertySeq(levState, resProp, start1, start2);
   3485                     start1 = start2;
   3486                     start2 = i;
   3487                     break;
   3488                 default:            /* we should never get here */
   3489                     throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
   3490                 }
   3491             }
   3492         }
   3493 
   3494         /* flush possible pending sequence, e.g. ON */
   3495         if (limit == length && epilogue != null) {
   3496             byte firstStrong = firstL_R_AL_EN_AN();
   3497             if (firstStrong != _ON) {
   3498                 eor = firstStrong;
   3499             }
   3500         }
   3501 
   3502         /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
   3503         for (i = limit - 1;
   3504              i > start &&
   3505                  (DirPropFlag(dirProps[i]) & MASK_BN_EXPLICIT) != 0;
   3506              i--);
   3507         dirProp = dirProps[i];
   3508         if ((dirProp == LRI || dirProp == RLI) && limit < length) {
   3509             isolateCount++;
   3510             if (isolates[isolateCount] == null)
   3511                 isolates[isolateCount] = new Isolate();
   3512             isolates[isolateCount].stateImp = stateImp;
   3513             isolates[isolateCount].state = levState.state;
   3514             isolates[isolateCount].start1 = start1;
   3515             isolates[isolateCount].startON = levState.startON;
   3516         }
   3517         else
   3518             processPropertySeq(levState, eor, limit, limit);
   3519     }
   3520 
   3521     /* perform (L1) and (X9) ---------------------------------------------------- */
   3522 
   3523     /*
   3524      * Reset the embedding levels for some non-graphic characters (L1).
   3525      * This method also sets appropriate levels for BN, and
   3526      * explicit embedding types that are supposed to have been removed
   3527      * from the paragraph in (X9).
   3528      */
   3529     private void adjustWSLevels() {
   3530         int i;
   3531 
   3532         if ((flags & MASK_WS) != 0) {
   3533             int flag;
   3534             i = trailingWSStart;
   3535             while (i > 0) {
   3536                 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
   3537                 while (i > 0 && ((flag = DirPropFlag(dirProps[--i])) & MASK_WS) != 0) {
   3538                     if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
   3539                         levels[i] = 0;
   3540                     } else {
   3541                         levels[i] = GetParaLevelAt(i);
   3542                     }
   3543                 }
   3544 
   3545                 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
   3546                 /* here, i+1 is guaranteed to be <length */
   3547                 while (i > 0) {
   3548                     flag = DirPropFlag(dirProps[--i]);
   3549                     if ((flag & MASK_BN_EXPLICIT) != 0) {
   3550                         levels[i] = levels[i + 1];
   3551                     } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
   3552                         levels[i] = 0;
   3553                         break;
   3554                     } else if ((flag & MASK_B_S) != 0){
   3555                         levels[i] = GetParaLevelAt(i);
   3556                         break;
   3557                     }
   3558                 }
   3559             }
   3560         }
   3561     }
   3562 
   3563     /**
   3564      * Set the context before a call to setPara().<p>
   3565      *
   3566      * setPara() computes the left-right directionality for a given piece
   3567      * of text which is supplied as one of its arguments. Sometimes this piece
   3568      * of text (the "main text") should be considered in context, because text
   3569      * appearing before ("prologue") and/or after ("epilogue") the main text
   3570      * may affect the result of this computation.<p>
   3571      *
   3572      * This function specifies the prologue and/or the epilogue for the next
   3573      * call to setPara(). If successive calls to setPara()
   3574      * all need specification of a context, setContext() must be called
   3575      * before each call to setPara(). In other words, a context is not
   3576      * "remembered" after the following successful call to setPara().<p>
   3577      *
   3578      * If a call to setPara() specifies DEFAULT_LTR or
   3579      * DEFAULT_RTL as paraLevel and is preceded by a call to
   3580      * setContext() which specifies a prologue, the paragraph level will
   3581      * be computed taking in consideration the text in the prologue.<p>
   3582      *
   3583      * When setPara() is called without a previous call to
   3584      * setContext, the main text is handled as if preceded and followed
   3585      * by strong directional characters at the current paragraph level.
   3586      * Calling setContext() with specification of a prologue will change
   3587      * this behavior by handling the main text as if preceded by the last
   3588      * strong character appearing in the prologue, if any.
   3589      * Calling setContext() with specification of an epilogue will change
   3590      * the behavior of setPara() by handling the main text as if followed
   3591      * by the first strong character or digit appearing in the epilogue, if any.<p>
   3592      *
   3593      * Note 1: if <code>setContext</code> is called repeatedly without
   3594      *         calling <code>setPara</code>, the earlier calls have no effect,
   3595      *         only the last call will be remembered for the next call to
   3596      *         <code>setPara</code>.<p>
   3597      *
   3598      * Note 2: calling <code>setContext(null, null)</code>
   3599      *         cancels any previous setting of non-empty prologue or epilogue.
   3600      *         The next call to <code>setPara()</code> will process no
   3601      *         prologue or epilogue.<p>
   3602      *
   3603      * Note 3: users must be aware that even after setting the context
   3604      *         before a call to setPara() to perform e.g. a logical to visual
   3605      *         transformation, the resulting string may not be identical to what it
   3606      *         would have been if all the text, including prologue and epilogue, had
   3607      *         been processed together.<br>
   3608      * Example (upper case letters represent RTL characters):<br>
   3609      * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
   3610      * &nbsp;&nbsp;epilogue = none<br>
   3611      * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
   3612      * &nbsp;&nbsp;paraLevel = LTR<br>
   3613      * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
   3614      *             ("HGF" is adjacent to "xyz")<br>
   3615      * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
   3616      *             ("HGF" is not adjacent to "xyz")<br>
   3617      *
   3618      * @param prologue is the text which precedes the text that
   3619      *        will be specified in a coming call to setPara().
   3620      *        If there is no prologue to consider,
   3621      *        this parameter can be <code>null</code>.
   3622      *
   3623      * @param epilogue is the text which follows the text that
   3624      *        will be specified in a coming call to setPara().
   3625      *        If there is no epilogue to consider,
   3626      *        this parameter can be <code>null</code>.
   3627      *
   3628      * @see #setPara
   3629      */
   3630     public void setContext(String prologue, String epilogue) {
   3631         this.prologue = prologue != null && prologue.length() > 0 ? prologue : null;
   3632         this.epilogue = epilogue != null && epilogue.length() > 0 ? epilogue : null;
   3633     }
   3634 
   3635     private void setParaSuccess() {
   3636         prologue = null;                /* forget the last context */
   3637         epilogue = null;
   3638         paraBidi = this;                /* mark successful setPara */
   3639     }
   3640 
   3641     int Bidi_Min(int x, int y) {
   3642         return x < y ? x : y;
   3643     }
   3644 
   3645     int Bidi_Abs(int x) {
   3646         return x >= 0 ? x : -x;
   3647     }
   3648 
   3649     void setParaRunsOnly(char[] parmText, byte parmParaLevel) {
   3650         int[] visualMap;
   3651         String visualText;
   3652         int saveLength, saveTrailingWSStart;
   3653         byte[] saveLevels;
   3654         byte saveDirection;
   3655         int i, j, visualStart, logicalStart,
   3656             oldRunCount, runLength, addedRuns, insertRemove,
   3657             start, limit, step, indexOddBit, logicalPos,
   3658             index, index1;
   3659         int saveOptions;
   3660 
   3661         reorderingMode = REORDER_DEFAULT;
   3662         int parmLength = parmText.length;
   3663         if (parmLength == 0) {
   3664             setPara(parmText, parmParaLevel, null);
   3665             reorderingMode = REORDER_RUNS_ONLY;
   3666             return;
   3667         }
   3668         /* obtain memory for mapping table and visual text */
   3669         saveOptions = reorderingOptions;
   3670         if ((saveOptions & OPTION_INSERT_MARKS) > 0) {
   3671             reorderingOptions &= ~OPTION_INSERT_MARKS;
   3672             reorderingOptions |= OPTION_REMOVE_CONTROLS;
   3673         }
   3674         parmParaLevel &= 1;             /* accept only 0 or 1 */
   3675         setPara(parmText, parmParaLevel, null);
   3676         /* we cannot access directly levels since it is not yet set if
   3677          * direction is not MIXED
   3678          */
   3679         saveLevels = new byte[this.length];
   3680         System.arraycopy(getLevels(), 0, saveLevels, 0, this.length);
   3681         saveTrailingWSStart = trailingWSStart;
   3682 
   3683         /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
   3684          * the visual map and the dirProps array to drive the second call
   3685          * to setPara (but must make provision for possible removal of
   3686          * Bidi controls.  Alternatively, only use the dirProps array via
   3687          * customized classifier callback.
   3688          */
   3689         visualText = writeReordered(DO_MIRRORING);
   3690         visualMap = getVisualMap();
   3691         this.reorderingOptions = saveOptions;
   3692         saveLength = this.length;
   3693         saveDirection=this.direction;
   3694 
   3695         this.reorderingMode = REORDER_INVERSE_LIKE_DIRECT;
   3696         parmParaLevel ^= 1;
   3697         setPara(visualText, parmParaLevel, null);
   3698         BidiLine.getRuns(this);
   3699         /* check if some runs must be split, count how many splits */
   3700         addedRuns = 0;
   3701         oldRunCount = this.runCount;
   3702         visualStart = 0;
   3703         for (i = 0; i < oldRunCount; i++, visualStart += runLength) {
   3704             runLength = runs[i].limit - visualStart;
   3705             if (runLength < 2) {
   3706                 continue;
   3707             }
   3708             logicalStart = runs[i].start;
   3709             for (j = logicalStart+1; j < logicalStart+runLength; j++) {
   3710                 index = visualMap[j];
   3711                 index1 = visualMap[j-1];
   3712                 if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
   3713                     addedRuns++;
   3714                 }
   3715             }
   3716         }
   3717         if (addedRuns > 0) {
   3718             getRunsMemory(oldRunCount + addedRuns);
   3719             if (runCount == 1) {
   3720                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
   3721                 runsMemory[0] = runs[0];
   3722             } else {
   3723                 System.arraycopy(runs, 0, runsMemory, 0, runCount);
   3724             }
   3725             runs = runsMemory;
   3726             runCount += addedRuns;
   3727             for (i = oldRunCount; i < runCount; i++) {
   3728                 if (runs[i] == null) {
   3729                     runs[i] = new BidiRun(0, 0, (byte)0);
   3730                 }
   3731             }
   3732         }
   3733         /* split runs which are not consecutive in source text */
   3734         int newI;
   3735         for (i = oldRunCount-1; i >= 0; i--) {
   3736             newI = i + addedRuns;
   3737             runLength = i==0 ? runs[0].limit :
   3738                                runs[i].limit - runs[i-1].limit;
   3739             logicalStart = runs[i].start;
   3740             indexOddBit = runs[i].level & 1;
   3741             if (runLength < 2) {
   3742                 if (addedRuns > 0) {
   3743                     runs[newI].copyFrom(runs[i]);
   3744                 }
   3745                 logicalPos = visualMap[logicalStart];
   3746                 runs[newI].start = logicalPos;
   3747                 runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
   3748                 continue;
   3749             }
   3750             if (indexOddBit > 0) {
   3751                 start = logicalStart;
   3752                 limit = logicalStart + runLength - 1;
   3753                 step = 1;
   3754             } else {
   3755                 start = logicalStart + runLength - 1;
   3756                 limit = logicalStart;
   3757                 step = -1;
   3758             }
   3759             for (j = start; j != limit; j += step) {
   3760                 index = visualMap[j];
   3761                 index1 = visualMap[j+step];
   3762                 if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
   3763                     logicalPos = Bidi_Min(visualMap[start], index);
   3764                     runs[newI].start = logicalPos;
   3765                     runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
   3766                     runs[newI].limit = runs[i].limit;
   3767                     runs[i].limit -= Bidi_Abs(j - start) + 1;
   3768                     insertRemove = runs[i].insertRemove & (LRM_AFTER|RLM_AFTER);
   3769                     runs[newI].insertRemove = insertRemove;
   3770                     runs[i].insertRemove &= ~insertRemove;
   3771                     start = j + step;
   3772                     addedRuns--;
   3773                     newI--;
   3774                 }
   3775             }
   3776             if (addedRuns > 0) {
   3777                 runs[newI].copyFrom(runs[i]);
   3778             }
   3779             logicalPos = Bidi_Min(visualMap[start], visualMap[limit]);
   3780             runs[newI].start = logicalPos;
   3781             runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
   3782         }
   3783 
   3784 //    cleanup1:
   3785         /* restore initial paraLevel */
   3786         this.paraLevel ^= 1;
   3787 //    cleanup2:
   3788         /* restore real text */
   3789         this.text = parmText;
   3790         this.length = saveLength;
   3791         this.originalLength = parmLength;
   3792         this.direction=saveDirection;
   3793         this.levels = saveLevels;
   3794         this.trailingWSStart = saveTrailingWSStart;
   3795         if (runCount > 1) {
   3796             this.direction = MIXED;
   3797         }
   3798 //    cleanup3:
   3799         this.reorderingMode = REORDER_RUNS_ONLY;
   3800     }
   3801 
   3802     /**
   3803      * Perform the Unicode Bidi algorithm. It is defined in the
   3804      * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>.
   3805      *
   3806      * <p>This method takes a piece of plain text containing one or more paragraphs,
   3807      * with or without externally specified embedding levels from <i>styled</i>
   3808      * text and computes the left-right-directionality of each character.</p>
   3809      *
   3810      * <p>If the entire text is all of the same directionality, then
   3811      * the method may not perform all the steps described by the algorithm,
   3812      * i.e., some levels may not be the same as if all steps were performed.
   3813      * This is not relevant for unidirectional text.<br>
   3814      * For example, in pure LTR text with numbers the numbers would get
   3815      * a resolved level of 2 higher than the surrounding text according to
   3816      * the algorithm. This implementation may set all resolved levels to
   3817      * the same value in such a case.</p>
   3818      *
   3819      * <p>The text can be composed of multiple paragraphs. Occurrence of a block
   3820      * separator in the text terminates a paragraph, and whatever comes next starts
   3821      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
   3822      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
   3823      * in that case, the pair of characters is considered as terminating the
   3824      * preceding paragraph, and a new paragraph will be started by a character
   3825      * coming after the LF.
   3826      *
   3827      * <p>Although the text is passed here as a <code>String</code>, it is
   3828      * stored internally as an array of characters. Therefore the
   3829      * documentation will refer to indexes of the characters in the text.
   3830      *
   3831      * @param text contains the text that the Bidi algorithm will be performed
   3832      *        on. This text can be retrieved with <code>getText()</code> or
   3833      *        <code>getTextAsString</code>.<br>
   3834      *
   3835      * @param paraLevel specifies the default level for the text;
   3836      *        it is typically 0 (LTR) or 1 (RTL).
   3837      *        If the method shall determine the paragraph level from the text,
   3838      *        then <code>paraLevel</code> can be set to
   3839      *        either <code>LEVEL_DEFAULT_LTR</code>
   3840      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
   3841      *        paragraphs, the paragraph level shall be determined separately for
   3842      *        each paragraph; if a paragraph does not include any strongly typed
   3843      *        character, then the desired default is used (0 for LTR or 1 for RTL).
   3844      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
   3845      *        is also valid, with odd levels indicating RTL.
   3846      *
   3847      * @param embeddingLevels (in) may be used to preset the embedding and override levels,
   3848      *        ignoring characters like LRE and PDF in the text.
   3849      *        A level overrides the directional property of its corresponding
   3850      *        (same index) character if the level has the
   3851      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
   3852      *        Aside from that bit, it must be
   3853      *        <code>paraLevel&lt;=embeddingLevels[]&lt;=MAX_EXPLICIT_LEVEL</code>,
   3854      *        except that level 0 is always allowed.
   3855      *        Level 0 for a paragraph separator prevents reordering of paragraphs;
   3856      *        this only works reliably if <code>LEVEL_OVERRIDE</code>
   3857      *        is also set for paragraph separators.
   3858      *        Level 0 for other characters is treated as a wildcard
   3859      *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
   3860      *        <strong>Caution: </strong>A reference to this array, not a copy
   3861      *        of the levels, will be stored in the <code>Bidi</code> object;
   3862      *        the <code>embeddingLevels</code>
   3863      *        should not be modified to avoid unexpected results on subsequent
   3864      *        Bidi operations. However, the <code>setPara()</code> and
   3865      *        <code>setLine()</code> methods may modify some or all of the
   3866      *        levels.<br><br>
   3867      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
   3868      *        have one entry for each character in <code>text</code>.
   3869      *
   3870      * @throws IllegalArgumentException if the values in embeddingLevels are
   3871      *         not within the allowed range
   3872      *
   3873      * @see #LEVEL_DEFAULT_LTR
   3874      * @see #LEVEL_DEFAULT_RTL
   3875      * @see #LEVEL_OVERRIDE
   3876      * @see #MAX_EXPLICIT_LEVEL
   3877      */
   3878     public void setPara(String text, byte paraLevel, byte[] embeddingLevels)
   3879     {
   3880         if (text == null) {
   3881             setPara(new char[0], paraLevel, embeddingLevels);
   3882         } else {
   3883             setPara(text.toCharArray(), paraLevel, embeddingLevels);
   3884         }
   3885     }
   3886 
   3887     /**
   3888      * Perform the Unicode Bidi algorithm. It is defined in the
   3889      * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>.
   3890      *
   3891      * <p>This method takes a piece of plain text containing one or more paragraphs,
   3892      * with or without externally specified embedding levels from <i>styled</i>
   3893      * text and computes the left-right-directionality of each character.</p>
   3894      *
   3895      * <p>If the entire text is all of the same directionality, then
   3896      * the method may not perform all the steps described by the algorithm,
   3897      * i.e., some levels may not be the same as if all steps were performed.
   3898      * This is not relevant for unidirectional text.<br>
   3899      * For example, in pure LTR text with numbers the numbers would get
   3900      * a resolved level of 2 higher than the surrounding text according to
   3901      * the algorithm. This implementation may set all resolved levels to
   3902      * the same value in such a case.</p>
   3903      *
   3904      * <p>The text can be composed of multiple paragraphs. Occurrence of a block
   3905      * separator in the text terminates a paragraph, and whatever comes next starts
   3906      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
   3907      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
   3908      * in that case, the pair of characters is considered as terminating the
   3909      * preceding paragraph, and a new paragraph will be started by a character
   3910      * coming after the LF.
   3911      *
   3912      * <p>The text is stored internally as an array of characters. Therefore the
   3913      * documentation will refer to indexes of the characters in the text.
   3914      *
   3915      * @param chars contains the text that the Bidi algorithm will be performed
   3916      *        on. This text can be retrieved with <code>getText()</code> or
   3917      *        <code>getTextAsString</code>.<br>
   3918      *
   3919      * @param paraLevel specifies the default level for the text;
   3920      *        it is typically 0 (LTR) or 1 (RTL).
   3921      *        If the method shall determine the paragraph level from the text,
   3922      *        then <code>paraLevel</code> can be set to
   3923      *        either <code>LEVEL_DEFAULT_LTR</code>
   3924      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
   3925      *        paragraphs, the paragraph level shall be determined separately for
   3926      *        each paragraph; if a paragraph does not include any strongly typed
   3927      *        character, then the desired default is used (0 for LTR or 1 for RTL).
   3928      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
   3929      *        is also valid, with odd levels indicating RTL.
   3930      *
   3931      * @param embeddingLevels (in) may be used to preset the embedding and
   3932      *        override levels, ignoring characters like LRE and PDF in the text.
   3933      *        A level overrides the directional property of its corresponding
   3934      *        (same index) character if the level has the
   3935      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
   3936      *        Aside from that bit, it must be
   3937      *        <code>paraLevel&lt;=embeddingLevels[]&lt;=MAX_EXPLICIT_LEVEL</code>,
   3938      *        except that level 0 is always allowed.
   3939      *        Level 0 for a paragraph separator prevents reordering of paragraphs;
   3940      *        this only works reliably if <code>LEVEL_OVERRIDE</code>
   3941      *        is also set for paragraph separators.
   3942      *        Level 0 for other characters is treated as a wildcard
   3943      *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
   3944      *        <strong>Caution: </strong>A reference to this array, not a copy
   3945      *        of the levels, will be stored in the <code>Bidi</code> object;
   3946      *        the <code>embeddingLevels</code>
   3947      *        should not be modified to avoid unexpected results on subsequent
   3948      *        Bidi operations. However, the <code>setPara()</code> and
   3949      *        <code>setLine()</code> methods may modify some or all of the
   3950      *        levels.<br><br>
   3951      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
   3952      *        have one entry for each character in <code>text</code>.
   3953      *
   3954      * @throws IllegalArgumentException if the values in embeddingLevels are
   3955      *         not within the allowed range
   3956      *
   3957      * @see #LEVEL_DEFAULT_LTR
   3958      * @see #LEVEL_DEFAULT_RTL
   3959      * @see #LEVEL_OVERRIDE
   3960      * @see #MAX_EXPLICIT_LEVEL
   3961      */
   3962     public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
   3963     {
   3964         /* check the argument values */
   3965         if (paraLevel < LEVEL_DEFAULT_LTR) {
   3966             verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
   3967         }
   3968         if (chars == null) {
   3969             chars = new char[0];
   3970         }
   3971 
   3972         /* special treatment for RUNS_ONLY mode */
   3973         if (reorderingMode == REORDER_RUNS_ONLY) {
   3974             setParaRunsOnly(chars, paraLevel);
   3975             return;
   3976         }
   3977 
   3978         /* initialize the Bidi object */
   3979         this.paraBidi = null;          /* mark unfinished setPara */
   3980         this.text = chars;
   3981         this.length = this.originalLength = this.resultLength = text.length;
   3982         this.paraLevel = paraLevel;
   3983         this.direction = (byte)(paraLevel & 1);
   3984         this.paraCount = 1;
   3985 
   3986         /* Allocate zero-length arrays instead of setting to null here; then
   3987          * checks for null in various places can be eliminated.
   3988          */
   3989         dirProps = new byte[0];
   3990         levels = new byte[0];
   3991         runs = new BidiRun[0];
   3992         isGoodLogicalToVisualRunsMap = false;
   3993         insertPoints.size = 0;          /* clean up from last call */
   3994         insertPoints.confirmed = 0;     /* clean up from last call */
   3995 
   3996         /*
   3997          * Save the original paraLevel if contextual; otherwise, set to 0.
   3998          */
   3999         defaultParaLevel = IsDefaultLevel(paraLevel) ? paraLevel : 0;
   4000 
   4001         if (length == 0) {
   4002             /*
   4003              * For an empty paragraph, create a Bidi object with the paraLevel and
   4004              * the flags and the direction set but without allocating zero-length arrays.
   4005              * There is nothing more to do.
   4006              */
   4007             if (IsDefaultLevel(paraLevel)) {
   4008                 this.paraLevel &= 1;
   4009                 defaultParaLevel = 0;
   4010             }
   4011             flags = DirPropFlagLR(paraLevel);
   4012             runCount = 0;
   4013             paraCount = 0;
   4014             setParaSuccess();
   4015             return;
   4016         }
   4017 
   4018         runCount = -1;
   4019 
   4020         /*
   4021          * Get the directional properties,
   4022          * the flags bit-set, and
   4023          * determine the paragraph level if necessary.
   4024          */
   4025         getDirPropsMemory(length);
   4026         dirProps = dirPropsMemory;
   4027         getDirProps();
   4028         /* the processed length may have changed if OPTION_STREAMING is set */
   4029         trailingWSStart = length;  /* the levels[] will reflect the WS run */
   4030 
   4031         /* are explicit levels specified? */
   4032         if (embeddingLevels == null) {
   4033             /* no: determine explicit levels according to the (Xn) rules */
   4034             getLevelsMemory(length);
   4035             levels = levelsMemory;
   4036             direction = resolveExplicitLevels();
   4037         } else {
   4038             /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
   4039             levels = embeddingLevels;
   4040             direction = checkExplicitLevels();
   4041         }
   4042 
   4043         /* allocate isolate memory */
   4044         if (isolateCount > 0) {
   4045             if (isolates == null || isolates.length < isolateCount)
   4046                 isolates = new Isolate[isolateCount + 3];   /* keep some reserve */
   4047         }
   4048         isolateCount = -1;              /* current isolates stack entry == none */
   4049 
   4050         /*
   4051          * The steps after (X9) in the Bidi algorithm are performed only if
   4052          * the paragraph text has mixed directionality!
   4053          */
   4054         switch (direction) {
   4055         case LTR:
   4056             /* all levels are implicitly at paraLevel (important for getLevels()) */
   4057             trailingWSStart = 0;
   4058             break;
   4059         case RTL:
   4060             /* all levels are implicitly at paraLevel (important for getLevels()) */
   4061             trailingWSStart = 0;
   4062             break;
   4063         default:
   4064             /*
   4065              *  Choose the right implicit state table
   4066              */
   4067             switch(reorderingMode) {
   4068             case REORDER_DEFAULT:
   4069                 this.impTabPair = impTab_DEFAULT;
   4070                 break;
   4071             case REORDER_NUMBERS_SPECIAL:
   4072                 this.impTabPair = impTab_NUMBERS_SPECIAL;
   4073                 break;
   4074             case REORDER_GROUP_NUMBERS_WITH_R:
   4075                 this.impTabPair = impTab_GROUP_NUMBERS_WITH_R;
   4076                 break;
   4077             case REORDER_RUNS_ONLY:
   4078                 /* we should never get here */
   4079                 throw new InternalError("Internal ICU error in setPara");
   4080                 /* break; */
   4081             case REORDER_INVERSE_NUMBERS_AS_L:
   4082                 this.impTabPair = impTab_INVERSE_NUMBERS_AS_L;
   4083                 break;
   4084             case REORDER_INVERSE_LIKE_DIRECT:
   4085                 if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
   4086                     this.impTabPair = impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
   4087                 } else {
   4088                     this.impTabPair = impTab_INVERSE_LIKE_DIRECT;
   4089                 }
   4090                 break;
   4091             case REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
   4092                 if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
   4093                     this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
   4094                 } else {
   4095                     this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL;
   4096                 }
   4097                 break;
   4098             }
   4099             /*
   4100              * If there are no external levels specified and there
   4101              * are no significant explicit level codes in the text,
   4102              * then we can treat the entire paragraph as one run.
   4103              * Otherwise, we need to perform the following rules on runs of
   4104              * the text with the same embedding levels. (X10)
   4105              * "Significant" explicit level codes are ones that actually
   4106              * affect non-BN characters.
   4107              * Examples for "insignificant" ones are empty embeddings
   4108              * LRE-PDF, LRE-RLE-PDF-PDF, etc.
   4109              */
   4110             if (embeddingLevels == null && paraCount <= 1 &&
   4111                 (flags & DirPropFlagMultiRuns) == 0) {
   4112                 resolveImplicitLevels(0, length,
   4113                         GetLRFromLevel(GetParaLevelAt(0)),
   4114                         GetLRFromLevel(GetParaLevelAt(length - 1)));
   4115             } else {
   4116                 /* sor, eor: start and end types of same-level-run */
   4117                 int start, limit = 0;
   4118                 byte level, nextLevel;
   4119                 short sor, eor;
   4120 
   4121                 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
   4122                 level = GetParaLevelAt(0);
   4123                 nextLevel = levels[0];
   4124                 if (level < nextLevel) {
   4125                     eor = GetLRFromLevel(nextLevel);
   4126                 } else {
   4127                     eor = GetLRFromLevel(level);
   4128                 }
   4129 
   4130                 do {
   4131                     /* determine start and limit of the run (end points just behind the run) */
   4132 
   4133                     /* the values for this run's start are the same as for the previous run's end */
   4134                     start = limit;
   4135                     level = nextLevel;
   4136                     if ((start > 0) && (dirProps[start - 1] == B)) {
   4137                         /* except if this is a new paragraph, then set sor = para level */
   4138                         sor = GetLRFromLevel(GetParaLevelAt(start));
   4139                     } else {
   4140                         sor = eor;
   4141                     }
   4142 
   4143                     /* search for the limit of this run */
   4144                     while ((++limit < length) &&
   4145                            ((levels[limit] == level) ||
   4146                             ((DirPropFlag(dirProps[limit]) & MASK_BN_EXPLICIT) != 0))) {}
   4147 
   4148                     /* get the correct level of the next run */
   4149                     if (limit < length) {
   4150                         nextLevel = levels[limit];
   4151                     } else {
   4152                         nextLevel = GetParaLevelAt(length - 1);
   4153                     }
   4154 
   4155                     /* determine eor from max(level, nextLevel); sor is last run's eor */
   4156                     if (NoOverride(level) < NoOverride(nextLevel)) {
   4157                         eor = GetLRFromLevel(nextLevel);
   4158                     } else {
   4159                         eor = GetLRFromLevel(level);
   4160                     }
   4161 
   4162                     /* if the run consists of overridden directional types, then there
   4163                        are no implicit types to be resolved */
   4164                     if ((level & LEVEL_OVERRIDE) == 0) {
   4165                         resolveImplicitLevels(start, limit, sor, eor);
   4166                     } else {
   4167                         /* remove the LEVEL_OVERRIDE flags */
   4168                         do {
   4169                             levels[start++] &= ~LEVEL_OVERRIDE;
   4170                         } while (start < limit);
   4171                     }
   4172                 } while (limit  < length);
   4173             }
   4174 
   4175             /* reset the embedding levels for some non-graphic characters (L1), (X9) */
   4176             adjustWSLevels();
   4177 
   4178             break;
   4179         }
   4180         /* add RLM for inverse Bidi with contextual orientation resolving
   4181          * to RTL which would not round-trip otherwise
   4182          */
   4183         if ((defaultParaLevel > 0) &&
   4184             ((reorderingOptions & OPTION_INSERT_MARKS) != 0) &&
   4185             ((reorderingMode == REORDER_INVERSE_LIKE_DIRECT) ||
   4186              (reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
   4187             int start, last;
   4188             byte level;
   4189             byte dirProp;
   4190             for (int i = 0; i < paraCount; i++) {
   4191                 last = paras_limit[i] - 1;
   4192                 level = paras_level[i];
   4193                 if (level == 0)
   4194                     continue;           /* LTR paragraph */
   4195                 start = i == 0 ? 0 : paras_limit[i - 1];
   4196                 for (int j = last; j >= start; j--) {
   4197                     dirProp = dirProps[j];
   4198                     if (dirProp == L) {
   4199                         if (j < last) {
   4200                             while (dirProps[last] == B) {
   4201                                 last--;
   4202                             }
   4203                         }
   4204                         addPoint(last, RLM_BEFORE);
   4205                         break;
   4206                     }
   4207                     if ((DirPropFlag(dirProp) & MASK_R_AL) != 0) {
   4208                         break;
   4209                     }
   4210                 }
   4211             }
   4212         }
   4213 
   4214         if ((reorderingOptions & OPTION_REMOVE_CONTROLS) != 0) {
   4215             resultLength -= controlCount;
   4216         } else {
   4217             resultLength += insertPoints.size;
   4218         }
   4219         setParaSuccess();
   4220     }
   4221 
   4222     /**
   4223      * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
   4224      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
   4225      * version 13,
   4226      * also described in The Unicode Standard, Version 4.0 .<p>
   4227      *
   4228      * This method takes a paragraph of text and computes the
   4229      * left-right-directionality of each character. The text should not
   4230      * contain any Unicode block separators.<p>
   4231      *
   4232      * The RUN_DIRECTION attribute in the text, if present, determines the base
   4233      * direction (left-to-right or right-to-left). If not present, the base
   4234      * direction is computed using the Unicode Bidirectional Algorithm,
   4235      * defaulting to left-to-right if there are no strong directional characters
   4236      * in the text. This attribute, if present, must be applied to all the text
   4237      * in the paragraph.<p>
   4238      *
   4239      * The BIDI_EMBEDDING attribute in the text, if present, represents
   4240      * embedding level information. Negative values from -1 to -62 indicate
   4241      * overrides at the absolute value of the level. Positive values from 1 to
   4242      * 62 indicate embeddings. Where values are zero or not defined, the base
   4243      * embedding level as determined by the base direction is assumed.<p>
   4244      *
   4245      * The NUMERIC_SHAPING attribute in the text, if present, converts European
   4246      * digits to other decimal digits before running the bidi algorithm. This
   4247      * attribute, if present, must be applied to all the text in the paragraph.
   4248      *
   4249      * If the entire text is all of the same directionality, then
   4250      * the method may not perform all the steps described by the algorithm,
   4251      * i.e., some levels may not be the same as if all steps were performed.
   4252      * This is not relevant for unidirectional text.<br>
   4253      * For example, in pure LTR text with numbers the numbers would get
   4254      * a resolved level of 2 higher than the surrounding text according to
   4255      * the algorithm. This implementation may set all resolved levels to
   4256      * the same value in such a case.<p>
   4257      *
   4258      * @param paragraph a paragraph of text with optional character and
   4259      *        paragraph attribute information
   4260      */
   4261     public void setPara(AttributedCharacterIterator paragraph)
   4262     {
   4263         byte paraLvl;
   4264         Boolean runDirection = (Boolean) paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
   4265         if (runDirection == null) {
   4266             paraLvl = LEVEL_DEFAULT_LTR;
   4267         } else {
   4268             paraLvl = (runDirection.equals(TextAttribute.RUN_DIRECTION_LTR)) ?
   4269                         LTR : RTL;
   4270         }
   4271 
   4272         byte[] lvls = null;
   4273         int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
   4274         byte[] embeddingLevels = new byte[len];
   4275         char[] txt = new char[len];
   4276         int i = 0;
   4277         char ch = paragraph.first();
   4278         while (ch != AttributedCharacterIterator.DONE) {
   4279             txt[i] = ch;
   4280             Integer embedding = (Integer) paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
   4281             if (embedding != null) {
   4282                 byte level = embedding.byteValue();
   4283                 if (level == 0) {
   4284                     /* no-op */
   4285                 } else if (level < 0) {
   4286                     lvls = embeddingLevels;
   4287                     embeddingLevels[i] = (byte)((0 - level) | LEVEL_OVERRIDE);
   4288                 } else {
   4289                     lvls = embeddingLevels;
   4290                     embeddingLevels[i] = level;
   4291                 }
   4292             }
   4293             ch = paragraph.next();
   4294             ++i;
   4295         }
   4296 
   4297         NumericShaper shaper = (NumericShaper) paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
   4298         if (shaper != null) {
   4299             shaper.shape(txt, 0, len);
   4300         }
   4301         setPara(txt, paraLvl, lvls);
   4302     }
   4303 
   4304     /**
   4305      * Specify whether block separators must be allocated level zero,
   4306      * so that successive paragraphs will progress from left to right.
   4307      * This method must be called before <code>setPara()</code>.
   4308      * Paragraph separators (B) may appear in the text.  Setting them to level zero
   4309      * means that all paragraph separators (including one possibly appearing
   4310      * in the last text position) are kept in the reordered text after the text
   4311      * that they follow in the source text.
   4312      * When this feature is not enabled, a paragraph separator at the last
   4313      * position of the text before reordering will go to the first position
   4314      * of the reordered text when the paragraph level is odd.
   4315      *
   4316      * @param ordarParaLTR specifies whether paragraph separators (B) must
   4317      * receive level 0, so that successive paragraphs progress from left to right.
   4318      *
   4319      * @see #setPara
   4320      */
   4321     public void orderParagraphsLTR(boolean ordarParaLTR) {
   4322         orderParagraphsLTR = ordarParaLTR;
   4323     }
   4324 
   4325     /**
   4326      * Is this <code>Bidi</code> object set to allocate level 0 to block
   4327      * separators so that successive paragraphs progress from left to right?
   4328      *
   4329      * @return <code>true</code> if the <code>Bidi</code> object is set to
   4330      *         allocate level 0 to block separators.
   4331      */
   4332     public boolean isOrderParagraphsLTR() {
   4333         return orderParagraphsLTR;
   4334     }
   4335 
   4336     /**
   4337      * Get the directionality of the text.
   4338      *
   4339      * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
   4340      *         that indicates if the entire text
   4341      *         represented by this object is unidirectional,
   4342      *         and which direction, or if it is mixed-directional.
   4343      *
   4344      * @throws IllegalStateException if this call is not preceded by a successful
   4345      *         call to <code>setPara</code> or <code>setLine</code>
   4346      *
   4347      * @see #LTR
   4348      * @see #RTL
   4349      * @see #MIXED
   4350      */
   4351     public byte getDirection()
   4352     {
   4353         verifyValidParaOrLine();
   4354         return direction;
   4355     }
   4356 
   4357     /**
   4358      * Get the text.
   4359      *
   4360      * @return A <code>String</code> containing the text that the
   4361      *         <code>Bidi</code> object was created for.
   4362      *
   4363      * @throws IllegalStateException if this call is not preceded by a successful
   4364      *         call to <code>setPara</code> or <code>setLine</code>
   4365      *
   4366      * @see #setPara
   4367      * @see #setLine
   4368      */
   4369     public String getTextAsString()
   4370     {
   4371         verifyValidParaOrLine();
   4372         return new String(text);
   4373     }
   4374 
   4375     /**
   4376      * Get the text.
   4377      *
   4378      * @return A <code>char</code> array containing the text that the
   4379      *         <code>Bidi</code> object was created for.
   4380      *
   4381      * @throws IllegalStateException if this call is not preceded by a successful
   4382      *         call to <code>setPara</code> or <code>setLine</code>
   4383      *
   4384      * @see #setPara
   4385      * @see #setLine
   4386      */
   4387     public char[] getText()
   4388     {
   4389         verifyValidParaOrLine();
   4390         return text;
   4391     }
   4392 
   4393     /**
   4394      * Get the length of the text.
   4395      *
   4396      * @return The length of the text that the <code>Bidi</code> object was
   4397      *         created for.
   4398      *
   4399      * @throws IllegalStateException if this call is not preceded by a successful
   4400      *         call to <code>setPara</code> or <code>setLine</code>
   4401      */
   4402     public int getLength()
   4403     {
   4404         verifyValidParaOrLine();
   4405         return originalLength;
   4406     }
   4407 
   4408     /**
   4409      * Get the length of the source text processed by the last call to
   4410      * <code>setPara()</code>. This length may be different from the length of
   4411      * the source text if option <code>OPTION_STREAMING</code> has been
   4412      * set.
   4413      * <br>
   4414      * Note that whenever the length of the text affects the execution or the
   4415      * result of a method, it is the processed length which must be considered,
   4416      * except for <code>setPara</code> (which receives unprocessed source text)
   4417      * and <code>getLength</code> (which returns the original length of the
   4418      * source text).<br>
   4419      * In particular, the processed length is the one to consider in the
   4420      * following cases:
   4421      * <ul>
   4422      * <li>maximum value of the <code>limit</code> argument of
   4423      * <code>setLine</code></li>
   4424      * <li>maximum value of the <code>charIndex</code> argument of
   4425      * <code>getParagraph</code></li>
   4426      * <li>maximum value of the <code>charIndex</code> argument of
   4427      * <code>getLevelAt</code></li>
   4428      * <li>number of elements in the array returned by <code>getLevels</code>
   4429      * </li>
   4430      * <li>maximum value of the <code>logicalStart</code> argument of
   4431      * <code>getLogicalRun</code></li>
   4432      * <li>maximum value of the <code>logicalIndex</code> argument of
   4433      * <code>getVisualIndex</code></li>
   4434      * <li>number of elements returned by <code>getLogicalMap</code></li>
   4435      * <li>length of text processed by <code>writeReordered</code></li>
   4436      * </ul>
   4437      *
   4438      * @return The length of the part of the source text processed by
   4439      *         the last call to <code>setPara</code>.
   4440      *
   4441      * @throws IllegalStateException if this call is not preceded by a successful
   4442      *         call to <code>setPara</code> or <code>setLine</code>
   4443      *
   4444      * @see #setPara
   4445      * @see #OPTION_STREAMING
   4446      */
   4447     public int getProcessedLength() {
   4448         verifyValidParaOrLine();
   4449         return length;
   4450     }
   4451 
   4452     /**
   4453      * Get the length of the reordered text resulting from the last call to
   4454      * <code>setPara()</code>. This length may be different from the length
   4455      * of the source text if option <code>OPTION_INSERT_MARKS</code>
   4456      * or option <code>OPTION_REMOVE_CONTROLS</code> has been set.
   4457      * <br>
   4458      * This resulting length is the one to consider in the following cases:
   4459      * <ul>
   4460      * <li>maximum value of the <code>visualIndex</code> argument of
   4461      * <code>getLogicalIndex</code></li>
   4462      * <li>number of elements returned by <code>getVisualMap</code></li>
   4463      * </ul>
   4464      * Note that this length stays identical to the source text length if
   4465      * Bidi marks are inserted or removed using option bits of
   4466      * <code>writeReordered</code>, or if option
   4467      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
   4468      *
   4469      * @return The length of the reordered text resulting from
   4470      *         the last call to <code>setPara</code>.
   4471      *
   4472      * @throws IllegalStateException if this call is not preceded by a successful
   4473      *         call to <code>setPara</code> or <code>setLine</code>
   4474      *
   4475      * @see #setPara
   4476      * @see #OPTION_INSERT_MARKS
   4477      * @see #OPTION_REMOVE_CONTROLS
   4478      * @see #REORDER_INVERSE_NUMBERS_AS_L
   4479      */
   4480     public int getResultLength() {
   4481         verifyValidParaOrLine();
   4482         return resultLength;
   4483     }
   4484 
   4485     /* paragraphs API methods ------------------------------------------------- */
   4486 
   4487     /**
   4488      * Get the paragraph level of the text.
   4489      *
   4490      * @return The paragraph level. If there are multiple paragraphs, their
   4491      *         level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
   4492      *         LEVEL_DEFAULT_RTL.  In that case, the level of the first paragraph
   4493      *         is returned.
   4494      *
   4495      * @throws IllegalStateException if this call is not preceded by a successful
   4496      *         call to <code>setPara</code> or <code>setLine</code>
   4497      *
   4498      * @see #LEVEL_DEFAULT_LTR
   4499      * @see #LEVEL_DEFAULT_RTL
   4500      * @see #getParagraph
   4501      * @see #getParagraphByIndex
   4502      */
   4503     public byte getParaLevel()
   4504     {
   4505         verifyValidParaOrLine();
   4506         return paraLevel;
   4507     }
   4508 
   4509     /**
   4510      * Get the number of paragraphs.
   4511      *
   4512      * @return The number of paragraphs.
   4513      *
   4514      * @throws IllegalStateException if this call is not preceded by a successful
   4515      *         call to <code>setPara</code> or <code>setLine</code>
   4516      */
   4517     public int countParagraphs()
   4518     {
   4519         verifyValidParaOrLine();
   4520         return paraCount;
   4521     }
   4522 
   4523     /**
   4524      * Get a paragraph, given the index of this paragraph.
   4525      *
   4526      * This method returns information about a paragraph.<p>
   4527      *
   4528      * @param paraIndex is the number of the paragraph, in the
   4529      *        range <code>[0..countParagraphs()-1]</code>.
   4530      *
   4531      * @return a BidiRun object with the details of the paragraph:<br>
   4532      *        <code>start</code> will receive the index of the first character
   4533      *        of the paragraph in the text.<br>
   4534      *        <code>limit</code> will receive the limit of the paragraph.<br>
   4535      *        <code>embeddingLevel</code> will receive the level of the paragraph.
   4536      *
   4537      * @throws IllegalStateException if this call is not preceded by a successful
   4538      *         call to <code>setPara</code> or <code>setLine</code>
   4539      * @throws IllegalArgumentException if paraIndex is not in the range
   4540      *        <code>[0..countParagraphs()-1]</code>
   4541      *
   4542      * @see android.icu.text.BidiRun
   4543      */
   4544     public BidiRun getParagraphByIndex(int paraIndex)
   4545     {
   4546         verifyValidParaOrLine();
   4547         verifyRange(paraIndex, 0, paraCount);
   4548 
   4549         Bidi bidi = paraBidi;             /* get Para object if Line object */
   4550         int paraStart;
   4551         if (paraIndex == 0) {
   4552             paraStart = 0;
   4553         } else {
   4554             paraStart = bidi.paras_limit[paraIndex - 1];
   4555         }
   4556         BidiRun bidiRun = new BidiRun();
   4557         bidiRun.start = paraStart;
   4558         bidiRun.limit = bidi.paras_limit[paraIndex];
   4559         bidiRun.level = GetParaLevelAt(paraStart);
   4560         return bidiRun;
   4561     }
   4562 
   4563     /**
   4564      * Get a paragraph, given a position within the text.
   4565      * This method returns information about a paragraph.<br>
   4566      * Note: if the paragraph index is known, it is more efficient to
   4567      * retrieve the paragraph information using getParagraphByIndex().<p>
   4568      *
   4569      * @param charIndex is the index of a character within the text, in the
   4570      *        range <code>[0..getProcessedLength()-1]</code>.
   4571      *
   4572      * @return a BidiRun object with the details of the paragraph:<br>
   4573      *        <code>start</code> will receive the index of the first character
   4574      *        of the paragraph in the text.<br>
   4575      *        <code>limit</code> will receive the limit of the paragraph.<br>
   4576      *        <code>embeddingLevel</code> will receive the level of the paragraph.
   4577      *
   4578      * @throws IllegalStateException if this call is not preceded by a successful
   4579      *         call to <code>setPara</code> or <code>setLine</code>
   4580      * @throws IllegalArgumentException if charIndex is not within the legal range
   4581      *
   4582      * @see android.icu.text.BidiRun
   4583      * @see #getParagraphByIndex
   4584      * @see #getProcessedLength
   4585      */
   4586     public BidiRun getParagraph(int charIndex)
   4587     {
   4588         verifyValidParaOrLine();
   4589         Bidi bidi = paraBidi;             /* get Para object if Line object */
   4590         verifyRange(charIndex, 0, bidi.length);
   4591         int paraIndex;
   4592         for (paraIndex = 0; charIndex >= bidi.paras_limit[paraIndex]; paraIndex++) {
   4593         }
   4594         return getParagraphByIndex(paraIndex);
   4595     }
   4596 
   4597     /**
   4598      * Get the index of a paragraph, given a position within the text.<p>
   4599      *
   4600      * @param charIndex is the index of a character within the text, in the
   4601      *        range <code>[0..getProcessedLength()-1]</code>.
   4602      *
   4603      * @return The index of the paragraph containing the specified position,
   4604      *         starting from 0.
   4605      *
   4606      * @throws IllegalStateException if this call is not preceded by a successful
   4607      *         call to <code>setPara</code> or <code>setLine</code>
   4608      * @throws IllegalArgumentException if charIndex is not within the legal range
   4609      *
   4610      * @see android.icu.text.BidiRun
   4611      * @see #getProcessedLength
   4612      */
   4613     public int getParagraphIndex(int charIndex)
   4614     {
   4615         verifyValidParaOrLine();
   4616         Bidi bidi = paraBidi;             /* get Para object if Line object */
   4617         verifyRange(charIndex, 0, bidi.length);
   4618         int paraIndex;
   4619         for (paraIndex = 0; charIndex >= bidi.paras_limit[paraIndex]; paraIndex++) {
   4620         }
   4621         return paraIndex;
   4622     }
   4623 
   4624     /**
   4625      * Set a custom Bidi classifier used by the UBA implementation for Bidi
   4626      * class determination.
   4627      *
   4628      * @param classifier A new custom classifier. This can be null.
   4629      *
   4630      * @see #getCustomClassifier
   4631      */
   4632     public void setCustomClassifier(BidiClassifier classifier) {
   4633         this.customClassifier = classifier;
   4634     }
   4635 
   4636     /**
   4637      * Gets the current custom class classifier used for Bidi class
   4638      * determination.
   4639      *
   4640      * @return An instance of class <code>BidiClassifier</code>
   4641      *
   4642      * @see #setCustomClassifier
   4643      */
   4644     public BidiClassifier getCustomClassifier() {
   4645         return this.customClassifier;
   4646     }
   4647 
   4648     /**
   4649      * Retrieves the Bidi class for a given code point.
   4650      * <p>If a <code>BidiClassifier</code> is defined and returns a value
   4651      * other than <code>CLASS_DEFAULT=UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)+1</code>,
   4652      * that value is used; otherwise the default class determination mechanism is invoked.
   4653      *
   4654      * @param c The code point to get a Bidi class for.
   4655      *
   4656      * @return The Bidi class for the character <code>c</code> that is in effect
   4657      *         for this <code>Bidi</code> instance.
   4658      *
   4659      * @see BidiClassifier
   4660      */
   4661     public int getCustomizedClass(int c) {
   4662         int dir;
   4663 
   4664         if (customClassifier == null ||
   4665                 (dir = customClassifier.classify(c)) == Bidi.CLASS_DEFAULT) {
   4666             dir = bdp.getClass(c);
   4667         }
   4668         if (dir >= UCharacterDirection.CHAR_DIRECTION_COUNT)
   4669             dir = ON;
   4670         return dir;
   4671     }
   4672 
   4673     /**
   4674      * <code>setLine()</code> returns a <code>Bidi</code> object to
   4675      * contain the reordering information, especially the resolved levels,
   4676      * for all the characters in a line of text. This line of text is
   4677      * specified by referring to a <code>Bidi</code> object representing
   4678      * this information for a piece of text containing one or more paragraphs,
   4679      * and by specifying a range of indexes in this text.<p>
   4680      * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
   4681      *
   4682      * This is used after calling <code>setPara()</code>
   4683      * for a piece of text, and after line-breaking on that text.
   4684      * It is not necessary if each paragraph is treated as a single line.<p>
   4685      *
   4686      * After line-breaking, rules (L1) and (L2) for the treatment of
   4687      * trailing WS and for reordering are performed on
   4688      * a <code>Bidi</code> object that represents a line.<p>
   4689      *
   4690      * <strong>Important: </strong>the line <code>Bidi</code> object may
   4691      * reference data within the global text <code>Bidi</code> object.
   4692      * You should not alter the content of the global text object until
   4693      * you are finished using the line object.
   4694      *
   4695      * @param start is the line's first index into the text.
   4696      *
   4697      * @param limit is just behind the line's last index into the text
   4698      *        (its last index +1).
   4699      *
   4700      * @return a <code>Bidi</code> object that will now represent a line of the text.
   4701      *
   4702      * @throws IllegalStateException if this call is not preceded by a successful
   4703      *         call to <code>setPara</code>
   4704      * @throws IllegalArgumentException if start and limit are not in the range
   4705      *         <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
   4706      *         or if the specified line crosses a paragraph boundary
   4707      *
   4708      * @see #setPara
   4709      * @see #getProcessedLength
   4710      */
   4711     public Bidi setLine(int start, int limit)
   4712     {
   4713         verifyValidPara();
   4714         verifyRange(start, 0, limit);
   4715         verifyRange(limit, 0, length+1);
   4716         if (getParagraphIndex(start) != getParagraphIndex(limit - 1)) {
   4717             /* the line crosses a paragraph boundary */
   4718             throw new IllegalArgumentException();
   4719         }
   4720         return BidiLine.setLine(this, start, limit);
   4721     }
   4722 
   4723     /**
   4724      * Get the level for one character.
   4725      *
   4726      * @param charIndex the index of a character.
   4727      *
   4728      * @return The level for the character at <code>charIndex</code>.
   4729      *
   4730      * @throws IllegalStateException if this call is not preceded by a successful
   4731      *         call to <code>setPara</code> or <code>setLine</code>
   4732      * @throws IllegalArgumentException if charIndex is not in the range
   4733      *         <code>0&lt;=charIndex&lt;getProcessedLength()</code>
   4734      *
   4735      * @see #getProcessedLength
   4736      */
   4737     public byte getLevelAt(int charIndex)
   4738     {
   4739         verifyValidParaOrLine();
   4740         verifyRange(charIndex, 0, length);
   4741         return BidiLine.getLevelAt(this, charIndex);
   4742     }
   4743 
   4744     /**
   4745      * Get an array of levels for each character.<p>
   4746      *
   4747      * Note that this method may allocate memory under some
   4748      * circumstances, unlike <code>getLevelAt()</code>.
   4749      *
   4750      * @return The levels array for the text,
   4751      *         or <code>null</code> if an error occurs.
   4752      *
   4753      * @throws IllegalStateException if this call is not preceded by a successful
   4754      *         call to <code>setPara</code> or <code>setLine</code>
   4755      */
   4756     public byte[] getLevels()
   4757     {
   4758         verifyValidParaOrLine();
   4759         if (length <= 0) {
   4760             return new byte[0];
   4761         }
   4762         return BidiLine.getLevels(this);
   4763     }
   4764 
   4765     /**
   4766      * Get a logical run.
   4767      * This method returns information about a run and is used
   4768      * to retrieve runs in logical order.<p>
   4769      * This is especially useful for line-breaking on a paragraph.
   4770      *
   4771      * @param logicalPosition is a logical position within the source text.
   4772      *
   4773      * @return a BidiRun object filled with <code>start</code> containing
   4774      *        the first character of the run, <code>limit</code> containing
   4775      *        the limit of the run, and <code>embeddingLevel</code> containing
   4776      *        the level of the run.
   4777      *
   4778      * @throws IllegalStateException if this call is not preceded by a successful
   4779      *         call to <code>setPara</code> or <code>setLine</code>
   4780      * @throws IllegalArgumentException if logicalPosition is not in the range
   4781      *         <code>0&lt;=logicalPosition&lt;getProcessedLength()</code>
   4782      *
   4783      * @see android.icu.text.BidiRun
   4784      * @see android.icu.text.BidiRun#getStart()
   4785      * @see android.icu.text.BidiRun#getLimit()
   4786      * @see android.icu.text.BidiRun#getEmbeddingLevel()
   4787      */
   4788     public BidiRun getLogicalRun(int logicalPosition)
   4789     {
   4790         verifyValidParaOrLine();
   4791         verifyRange(logicalPosition, 0, length);
   4792         return BidiLine.getLogicalRun(this, logicalPosition);
   4793     }
   4794 
   4795     /**
   4796      * Get the number of runs.
   4797      * This method may invoke the actual reordering on the
   4798      * <code>Bidi</code> object, after <code>setPara()</code>
   4799      * may have resolved only the levels of the text. Therefore,
   4800      * <code>countRuns()</code> may have to allocate memory,
   4801      * and may throw an exception if it fails to do so.
   4802      *
   4803      * @return The number of runs.
   4804      *
   4805      * @throws IllegalStateException if this call is not preceded by a successful
   4806      *         call to <code>setPara</code> or <code>setLine</code>
   4807      */
   4808     public int countRuns()
   4809     {
   4810         verifyValidParaOrLine();
   4811         BidiLine.getRuns(this);
   4812         return runCount;
   4813     }
   4814 
   4815     /**
   4816      *
   4817      * Get a <code>BidiRun</code> object according to its index. BidiRun methods
   4818      * may be used to retrieve the run's logical start, length and level,
   4819      * which can be even for an LTR run or odd for an RTL run.
   4820      * In an RTL run, the character at the logical start is
   4821      * visually on the right of the displayed run.
   4822      * The length is the number of characters in the run.<p>
   4823      * <code>countRuns()</code> is normally called
   4824      * before the runs are retrieved.
   4825      *
   4826      * <p>
   4827      *  Example:
   4828      * <pre>
   4829      *  Bidi bidi = new Bidi();
   4830      *  String text = "abc 123 DEFG xyz";
   4831      *  bidi.setPara(text, Bidi.RTL, null);
   4832      *  int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length;
   4833      *  BidiRun run;
   4834      *  for (i = 0; i &lt; count; ++i) {
   4835      *      run = bidi.getVisualRun(i);
   4836      *      logicalStart = run.getStart();
   4837      *      length = run.getLength();
   4838      *      if (Bidi.LTR == run.getEmbeddingLevel()) {
   4839      *          do { // LTR
   4840      *              show_char(text.charAt(logicalStart++), visualIndex++);
   4841      *          } while (--length &gt; 0);
   4842      *      } else {
   4843      *          logicalStart += length;  // logicalLimit
   4844      *          do { // RTL
   4845      *              show_char(text.charAt(--logicalStart), visualIndex++);
   4846      *          } while (--length &gt; 0);
   4847      *      }
   4848      *  }
   4849      * </pre>
   4850      * <p>
   4851      * Note that in right-to-left runs, code like this places
   4852      * second surrogates before first ones (which is generally a bad idea)
   4853      * and combining characters before base characters.
   4854      * <p>
   4855      * Use of <code>{@link #writeReordered}</code>, optionally with the
   4856      * <code>{@link #KEEP_BASE_COMBINING}</code> option, can be considered in
   4857      * order to avoid these issues.
   4858      *
   4859      * @param runIndex is the number of the run in visual order, in the
   4860      *        range <code>[0..countRuns()-1]</code>.
   4861      *
   4862      * @return a BidiRun object containing the details of the run. The
   4863      *         directionality of the run is
   4864      *         <code>LTR==0</code> or <code>RTL==1</code>,
   4865      *         never <code>MIXED</code>.
   4866      *
   4867      * @throws IllegalStateException if this call is not preceded by a successful
   4868      *         call to <code>setPara</code> or <code>setLine</code>
   4869      * @throws IllegalArgumentException if <code>runIndex</code> is not in
   4870      *         the range <code>0&lt;=runIndex&lt;countRuns()</code>
   4871      *
   4872      * @see #countRuns()
   4873      * @see android.icu.text.BidiRun
   4874      * @see android.icu.text.BidiRun#getStart()
   4875      * @see android.icu.text.BidiRun#getLength()
   4876      * @see android.icu.text.BidiRun#getEmbeddingLevel()
   4877      */
   4878     public BidiRun getVisualRun(int runIndex)
   4879     {
   4880         verifyValidParaOrLine();
   4881         BidiLine.getRuns(this);
   4882         verifyRange(runIndex, 0, runCount);
   4883         return BidiLine.getVisualRun(this, runIndex);
   4884     }
   4885 
   4886     /**
   4887      * Get the visual position from a logical text position.
   4888      * If such a mapping is used many times on the same
   4889      * <code>Bidi</code> object, then calling
   4890      * <code>getLogicalMap()</code> is more efficient.
   4891      * <p>
   4892      * The value returned may be <code>MAP_NOWHERE</code> if there is no
   4893      * visual position because the corresponding text character is a Bidi
   4894      * control removed from output by the option
   4895      * <code>OPTION_REMOVE_CONTROLS</code>.
   4896      * <p>
   4897      * When the visual output is altered by using options of
   4898      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
   4899      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
   4900      * <code>REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
   4901      * be correct. It is advised to use, when possible, reordering options
   4902      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
   4903      * <p>
   4904      * Note that in right-to-left runs, this mapping places
   4905      * second surrogates before first ones (which is generally a bad idea)
   4906      * and combining characters before base characters.
   4907      * Use of <code>{@link #writeReordered}</code>, optionally with the
   4908      * <code>{@link #KEEP_BASE_COMBINING}</code> option can be considered instead
   4909      * of using the mapping, in order to avoid these issues.
   4910      *
   4911      * @param logicalIndex is the index of a character in the text.
   4912      *
   4913      * @return The visual position of this character.
   4914      *
   4915      * @throws IllegalStateException if this call is not preceded by a successful
   4916      *         call to <code>setPara</code> or <code>setLine</code>
   4917      * @throws IllegalArgumentException if <code>logicalIndex</code> is not in
   4918      *         the range <code>0&lt;=logicalIndex&lt;getProcessedLength()</code>
   4919      *
   4920      * @see #getLogicalMap
   4921      * @see #getLogicalIndex
   4922      * @see #getProcessedLength
   4923      * @see #MAP_NOWHERE
   4924      * @see #OPTION_REMOVE_CONTROLS
   4925      * @see #writeReordered
   4926      */
   4927     public int getVisualIndex(int logicalIndex)
   4928     {
   4929         verifyValidParaOrLine();
   4930         verifyRange(logicalIndex, 0, length);
   4931         return BidiLine.getVisualIndex(this, logicalIndex);
   4932     }
   4933 
   4934 
   4935     /**
   4936      * Get the logical text position from a visual position.
   4937      * If such a mapping is used many times on the same
   4938      * <code>Bidi</code> object, then calling
   4939      * <code>getVisualMap()</code> is more efficient.
   4940      * <p>
   4941      * The value returned may be <code>MAP_NOWHERE</code> if there is no
   4942      * logical position because the corresponding text character is a Bidi
   4943      * mark inserted in the output by option
   4944      * <code>OPTION_INSERT_MARKS</code>.
   4945      * <p>
   4946      * This is the inverse method to <code>getVisualIndex()</code>.
   4947      * <p>
   4948      * When the visual output is altered by using options of
   4949      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
   4950      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
   4951      * <code>REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
   4952      * be correct. It is advised to use, when possible, reordering options
   4953      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
   4954      *
   4955      * @param visualIndex is the visual position of a character.
   4956      *
   4957      * @return The index of this character in the text.
   4958      *
   4959      * @throws IllegalStateException if this call is not preceded by a successful
   4960      *         call to <code>setPara</code> or <code>setLine</code>
   4961      * @throws IllegalArgumentException if <code>visualIndex</code> is not in
   4962      *         the range <code>0&lt;=visualIndex&lt;getResultLength()</code>
   4963      *
   4964      * @see #getVisualMap
   4965      * @see #getVisualIndex
   4966      * @see #getResultLength
   4967      * @see #MAP_NOWHERE
   4968      * @see #OPTION_INSERT_MARKS
   4969      * @see #writeReordered
   4970      */
   4971     public int getLogicalIndex(int visualIndex)
   4972     {
   4973         verifyValidParaOrLine();
   4974         verifyRange(visualIndex, 0, resultLength);
   4975         /* we can do the trivial cases without the runs array */
   4976         if (insertPoints.size == 0 && controlCount == 0) {
   4977             if (direction == LTR) {
   4978                 return visualIndex;
   4979             }
   4980             else if (direction == RTL) {
   4981                 return length - visualIndex - 1;
   4982             }
   4983         }
   4984         BidiLine.getRuns(this);
   4985         return BidiLine.getLogicalIndex(this, visualIndex);
   4986     }
   4987 
   4988     /**
   4989      * Get a logical-to-visual index map (array) for the characters in the
   4990      * <code>Bidi</code> (paragraph or line) object.
   4991      * <p>
   4992      * Some values in the map may be <code>MAP_NOWHERE</code> if the
   4993      * corresponding text characters are Bidi controls removed from the visual
   4994      * output by the option <code>OPTION_REMOVE_CONTROLS</code>.
   4995      * <p>
   4996      * When the visual output is altered by using options of
   4997      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
   4998      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
   4999      * <code>REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
   5000      * be correct. It is advised to use, when possible, reordering options
   5001      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
   5002      * <p>
   5003      * Note that in right-to-left runs, this mapping places
   5004      * second surrogates before first ones (which is generally a bad idea)
   5005      * and combining characters before base characters.
   5006      * Use of <code>{@link #writeReordered}</code>, optionally with the
   5007      * <code>{@link #KEEP_BASE_COMBINING}</code> option can be considered instead
   5008      * of using the mapping, in order to avoid these issues.
   5009      *
   5010      * @return an array of <code>getProcessedLength()</code>
   5011      *        indexes which will reflect the reordering of the characters.<br><br>
   5012      *        The index map will result in
   5013      *        <code>indexMap[logicalIndex]==visualIndex</code>, where
   5014      *        <code>indexMap</code> represents the returned array.
   5015      *
   5016      * @throws IllegalStateException if this call is not preceded by a successful
   5017      *         call to <code>setPara</code> or <code>setLine</code>
   5018      *
   5019      * @see #getVisualMap
   5020      * @see #getVisualIndex
   5021      * @see #getProcessedLength
   5022      * @see #MAP_NOWHERE
   5023      * @see #OPTION_REMOVE_CONTROLS
   5024      * @see #writeReordered
   5025      */
   5026     public int[] getLogicalMap()
   5027     {
   5028         /* countRuns() checks successful call to setPara/setLine */
   5029         countRuns();
   5030         if (length <= 0) {
   5031             return new int[0];
   5032         }
   5033         return BidiLine.getLogicalMap(this);
   5034     }
   5035 
   5036     /**
   5037      * Get a visual-to-logical index map (array) for the characters in the
   5038      * <code>Bidi</code> (paragraph or line) object.
   5039      * <p>
   5040      * Some values in the map may be <code>MAP_NOWHERE</code> if the
   5041      * corresponding text characters are Bidi marks inserted in the visual
   5042      * output by the option <code>OPTION_INSERT_MARKS</code>.
   5043      * <p>
   5044      * When the visual output is altered by using options of
   5045      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
   5046      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
   5047      * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
   5048      * be correct. It is advised to use, when possible, reordering options
   5049      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
   5050      *
   5051      * @return an array of <code>getResultLength()</code>
   5052      *        indexes which will reflect the reordering of the characters.<br><br>
   5053      *        The index map will result in
   5054      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
   5055      *        <code>indexMap</code> represents the returned array.
   5056      *
   5057      * @throws IllegalStateException if this call is not preceded by a successful
   5058      *         call to <code>setPara</code> or <code>setLine</code>
   5059      *
   5060      * @see #getLogicalMap
   5061      * @see #getLogicalIndex
   5062      * @see #getResultLength
   5063      * @see #MAP_NOWHERE
   5064      * @see #OPTION_INSERT_MARKS
   5065      * @see #writeReordered
   5066      */
   5067     public int[] getVisualMap()
   5068     {
   5069         /* countRuns() checks successful call to setPara/setLine */
   5070         countRuns();
   5071         if (resultLength <= 0) {
   5072             return new int[0];
   5073         }
   5074         return BidiLine.getVisualMap(this);
   5075     }
   5076 
   5077     /**
   5078      * This is a convenience method that does not use a <code>Bidi</code> object.
   5079      * It is intended to be used for when an application has determined the levels
   5080      * of objects (character sequences) and just needs to have them reordered (L2).
   5081      * This is equivalent to using <code>getLogicalMap()</code> on a
   5082      * <code>Bidi</code> object.
   5083      *
   5084      * @param levels is an array of levels that have been determined by
   5085      *        the application.
   5086      *
   5087      * @return an array of <code>levels.length</code>
   5088      *        indexes which will reflect the reordering of the characters.<p>
   5089      *        The index map will result in
   5090      *        <code>indexMap[logicalIndex]==visualIndex</code>, where
   5091      *        <code>indexMap</code> represents the returned array.
   5092      */
   5093     public static int[] reorderLogical(byte[] levels)
   5094     {
   5095         return BidiLine.reorderLogical(levels);
   5096     }
   5097 
   5098     /**
   5099      * This is a convenience method that does not use a <code>Bidi</code> object.
   5100      * It is intended to be used for when an application has determined the levels
   5101      * of objects (character sequences) and just needs to have them reordered (L2).
   5102      * This is equivalent to using <code>getVisualMap()</code> on a
   5103      * <code>Bidi</code> object.
   5104      *
   5105      * @param levels is an array of levels that have been determined by
   5106      *        the application.
   5107      *
   5108      * @return an array of <code>levels.length</code>
   5109      *        indexes which will reflect the reordering of the characters.<p>
   5110      *        The index map will result in
   5111      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
   5112      *        <code>indexMap</code> represents the returned array.
   5113      */
   5114     public static int[] reorderVisual(byte[] levels)
   5115     {
   5116         return BidiLine.reorderVisual(levels);
   5117     }
   5118 
   5119     /**
   5120      * Invert an index map.
   5121      * The index mapping of the argument map is inverted and returned as
   5122      * an array of indexes that we will call the inverse map.
   5123      *
   5124      * @param srcMap is an array whose elements define the original mapping
   5125      * from a source array to a destination array.
   5126      * Some elements of the source array may have no mapping in the
   5127      * destination array. In that case, their value will be
   5128      * the special value <code>MAP_NOWHERE</code>.
   5129      * All elements must be &gt;=0 or equal to <code>MAP_NOWHERE</code>.
   5130      * Some elements in the source map may have a value greater than the
   5131      * srcMap.length if the destination array has more elements than the
   5132      * source array.
   5133      * There must be no duplicate indexes (two or more elements with the
   5134      * same value except <code>MAP_NOWHERE</code>).
   5135      *
   5136      * @return an array representing the inverse map.
   5137      *         This array has a number of elements equal to 1 + the highest
   5138      *         value in <code>srcMap</code>.
   5139      *         For elements of the result array which have no matching elements
   5140      *         in the source array, the corresponding elements in the inverse
   5141      *         map will receive a value equal to <code>MAP_NOWHERE</code>.
   5142      *         If element with index i in <code>srcMap</code> has a value k different
   5143      *         from <code>MAP_NOWHERE</code>, this means that element i of
   5144      *         the source array maps to element k in the destination array.
   5145      *         The inverse map will have value i in its k-th element.
   5146      *         For all elements of the destination array which do not map to
   5147      *         an element in the source array, the corresponding element in the
   5148      *         inverse map will have a value equal to <code>MAP_NOWHERE</code>.
   5149      *
   5150      * @see #MAP_NOWHERE
   5151      */
   5152     public static int[] invertMap(int[] srcMap)
   5153     {
   5154         if (srcMap == null) {
   5155             return null;
   5156         } else {
   5157             return BidiLine.invertMap(srcMap);
   5158         }
   5159     }
   5160 
   5161     /*
   5162      * Fields and methods for compatibility with java.text.bidi (Sun implementation)
   5163      */
   5164 
   5165     /**
   5166      * Constant indicating base direction is left-to-right.
   5167      */
   5168     public static final int DIRECTION_LEFT_TO_RIGHT = LTR;
   5169 
   5170     /**
   5171      * Constant indicating base direction is right-to-left.
   5172      */
   5173     public static final int DIRECTION_RIGHT_TO_LEFT = RTL;
   5174 
   5175     /**
   5176      * Constant indicating that the base direction depends on the first strong
   5177      * directional character in the text according to the Unicode Bidirectional
   5178      * Algorithm. If no strong directional character is present, the base
   5179      * direction is left-to-right.
   5180      */
   5181     public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = LEVEL_DEFAULT_LTR;
   5182 
   5183     /**
   5184      * Constant indicating that the base direction depends on the first strong
   5185      * directional character in the text according to the Unicode Bidirectional
   5186      * Algorithm. If no strong directional character is present, the base
   5187      * direction is right-to-left.
   5188      */
   5189     public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL;
   5190 
   5191     /**
   5192      * Create Bidi from the given paragraph of text and base direction.
   5193      *
   5194      * @param paragraph a paragraph of text
   5195      * @param flags a collection of flags that control the algorithm. The
   5196      *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
   5197      *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
   5198      *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
   5199      * @see #DIRECTION_LEFT_TO_RIGHT
   5200      * @see #DIRECTION_RIGHT_TO_LEFT
   5201      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
   5202      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
   5203      */
   5204     public Bidi(String paragraph, int flags)
   5205     {
   5206         this(paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags);
   5207     }
   5208 
   5209     /**
   5210      * Create Bidi from the given paragraph of text.<p>
   5211      *
   5212      * The RUN_DIRECTION attribute in the text, if present, determines the base
   5213      * direction (left-to-right or right-to-left). If not present, the base
   5214      * direction is computed using the Unicode Bidirectional Algorithm,
   5215      * defaulting to left-to-right if there are no strong directional characters
   5216      * in the text. This attribute, if present, must be applied to all the text
   5217      * in the paragraph.<p>
   5218      *
   5219      * The BIDI_EMBEDDING attribute in the text, if present, represents
   5220      * embedding level information. Negative values from -1 to -62 indicate
   5221      * overrides at the absolute value of the level. Positive values from 1 to
   5222      * 62 indicate embeddings. Where values are zero or not defined, the base
   5223      * embedding level as determined by the base direction is assumed.<p>
   5224      *
   5225      * The NUMERIC_SHAPING attribute in the text, if present, converts European
   5226      * digits to other decimal digits before running the bidi algorithm. This
   5227      * attribute, if present, must be applied to all the text in the paragraph.<p>
   5228      *
   5229      * Note: this constructor calls setPara() internally.
   5230      *
   5231      * @param paragraph a paragraph of text with optional character and
   5232      *        paragraph attribute information
   5233      */
   5234     public Bidi(AttributedCharacterIterator paragraph)
   5235     {
   5236         this();
   5237         setPara(paragraph);
   5238     }
   5239 
   5240     /**
   5241      * Create Bidi from the given text, embedding, and direction information.
   5242      *
   5243      * <p>The embeddings array may be null. If present, the values represent
   5244      * embedding level information.
   5245      * Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL}
   5246      * indicate overrides at the absolute value of the level.
   5247      * Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings.
   5248      * Where values are zero, the base embedding level
   5249      * as determined by the base direction is assumed,
   5250      * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.</p>
   5251      *
   5252      * <p>Note: This constructor calls setPara() internally,
   5253      * after converting the java.text.Bidi-style embeddings with negative overrides
   5254      * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level.
   5255      *
   5256      * @param text an array containing the paragraph of text to process.
   5257      * @param textStart the index into the text array of the start of the
   5258      *        paragraph.
   5259      * @param embeddings an array containing embedding values for each character
   5260      *        in the paragraph. This can be null, in which case it is assumed
   5261      *        that there is no external embedding information.
   5262      * @param embStart the index into the embedding array of the start of the
   5263      *        paragraph.
   5264      * @param paragraphLength the length of the paragraph in the text and
   5265      *        embeddings arrays.
   5266      * @param flags a collection of flags that control the algorithm. The
   5267      *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
   5268      *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
   5269      *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
   5270      *
   5271      * @throws IllegalArgumentException if the values in embeddings are
   5272      *         not within the allowed range
   5273      *
   5274      * @see #DIRECTION_LEFT_TO_RIGHT
   5275      * @see #DIRECTION_RIGHT_TO_LEFT
   5276      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
   5277      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
   5278      */
   5279     public Bidi(char[] text,
   5280             int textStart,
   5281             byte[] embeddings,
   5282             int embStart,
   5283             int paragraphLength,
   5284             int flags)
   5285     {
   5286         this();
   5287         byte paraLvl;
   5288         switch (flags) {
   5289         case DIRECTION_LEFT_TO_RIGHT:
   5290         default:
   5291             paraLvl = LTR;
   5292             break;
   5293         case DIRECTION_RIGHT_TO_LEFT:
   5294             paraLvl = RTL;
   5295             break;
   5296         case DIRECTION_DEFAULT_LEFT_TO_RIGHT:
   5297             paraLvl = LEVEL_DEFAULT_LTR;
   5298             break;
   5299         case DIRECTION_DEFAULT_RIGHT_TO_LEFT:
   5300             paraLvl = LEVEL_DEFAULT_RTL;
   5301             break;
   5302         }
   5303         byte[] paraEmbeddings;
   5304         if (embeddings == null) {
   5305             paraEmbeddings = null;
   5306         } else {
   5307             // Convert from java.text.Bidi embeddings to ICU setPara() levels:
   5308             // Copy to the start of a new array and convert java.text negative overrides
   5309             // to ICU bit-field-and-mask overrides.
   5310             // A copy of the embeddings is always required because
   5311             // setPara() may modify its embeddings.
   5312             paraEmbeddings = new byte[paragraphLength];
   5313             byte lev;
   5314             for (int i = 0; i < paragraphLength; i++) {
   5315                 lev = embeddings[i + embStart];
   5316                 if (lev < 0) {
   5317                     lev = (byte)((- lev) | LEVEL_OVERRIDE);
   5318                 }
   5319                 // setPara() lifts level 0 up to the resolved paragraph level.
   5320                 paraEmbeddings[i] = lev;
   5321             }
   5322         }
   5323         if (textStart == 0 && paragraphLength == text.length) {
   5324             setPara(text, paraLvl, paraEmbeddings);
   5325         } else {
   5326             char[] paraText = new char[paragraphLength];
   5327             System.arraycopy(text, textStart, paraText, 0, paragraphLength);
   5328             setPara(paraText, paraLvl, paraEmbeddings);
   5329         }
   5330     }
   5331 
   5332     /**
   5333      * Create a Bidi object representing the bidi information on a line of text
   5334      * within the paragraph represented by the current Bidi. This call is not
   5335      * required if the entire paragraph fits on one line.
   5336      *
   5337      * @param lineStart the offset from the start of the paragraph to the start
   5338      *        of the line.
   5339      * @param lineLimit the offset from the start of the paragraph to the limit
   5340      *        of the line.
   5341      *
   5342      * @throws IllegalStateException if this call is not preceded by a successful
   5343      *         call to <code>setPara</code>
   5344      * @throws IllegalArgumentException if lineStart and lineLimit are not in the range
   5345      *         <code>0&lt;=lineStart&lt;lineLimit&lt;=getProcessedLength()</code>,
   5346      *         or if the specified line crosses a paragraph boundary
   5347      */
   5348     public Bidi createLineBidi(int lineStart, int lineLimit)
   5349     {
   5350         return setLine(lineStart, lineLimit);
   5351     }
   5352 
   5353     /**
   5354      * Return true if the line is not left-to-right or right-to-left. This means
   5355      * it either has mixed runs of left-to-right and right-to-left text, or the
   5356      * base direction differs from the direction of the only run of text.
   5357      *
   5358      * @return true if the line is not left-to-right or right-to-left.
   5359      *
   5360      * @throws IllegalStateException if this call is not preceded by a successful
   5361      *         call to <code>setPara</code>
   5362      */
   5363     public boolean isMixed()
   5364     {
   5365         return (!isLeftToRight() && !isRightToLeft());
   5366     }
   5367 
   5368     /**
   5369      * Return true if the line is all left-to-right text and the base direction
   5370      * is left-to-right.
   5371      *
   5372      * @return true if the line is all left-to-right text and the base direction
   5373      *         is left-to-right.
   5374      *
   5375      * @throws IllegalStateException if this call is not preceded by a successful
   5376      *         call to <code>setPara</code>
   5377      */
   5378     public boolean isLeftToRight()
   5379     {
   5380         return (getDirection() == LTR && (paraLevel & 1) == 0);
   5381     }
   5382 
   5383     /**
   5384      * Return true if the line is all right-to-left text, and the base direction
   5385      * is right-to-left
   5386      *
   5387      * @return true if the line is all right-to-left text, and the base
   5388      *         direction is right-to-left
   5389      *
   5390      * @throws IllegalStateException if this call is not preceded by a successful
   5391      *         call to <code>setPara</code>
   5392      */
   5393     public boolean isRightToLeft()
   5394     {
   5395         return (getDirection() == RTL && (paraLevel & 1) == 1);
   5396     }
   5397 
   5398     /**
   5399      * Return true if the base direction is left-to-right
   5400      *
   5401      * @return true if the base direction is left-to-right
   5402      *
   5403      * @throws IllegalStateException if this call is not preceded by a successful
   5404      *         call to <code>setPara</code> or <code>setLine</code>
   5405      */
   5406     public boolean baseIsLeftToRight()
   5407     {
   5408         return (getParaLevel() == LTR);
   5409     }
   5410 
   5411     /**
   5412      * Return the base level (0 if left-to-right, 1 if right-to-left).
   5413      *
   5414      * @return the base level
   5415      *
   5416      * @throws IllegalStateException if this call is not preceded by a successful
   5417      *         call to <code>setPara</code> or <code>setLine</code>
   5418      */
   5419     public int getBaseLevel()
   5420     {
   5421         return getParaLevel();
   5422     }
   5423 
   5424     /**
   5425      * Return the number of level runs.
   5426      *
   5427      * @return the number of level runs
   5428      *
   5429      * @throws IllegalStateException if this call is not preceded by a successful
   5430      *         call to <code>setPara</code> or <code>setLine</code>
   5431      */
   5432     public int getRunCount()
   5433     {
   5434         return countRuns();
   5435     }
   5436 
   5437     /**
   5438      * Compute the logical to visual run mapping
   5439      */
   5440      void getLogicalToVisualRunsMap()
   5441      {
   5442         if (isGoodLogicalToVisualRunsMap) {
   5443             return;
   5444         }
   5445         int count = countRuns();
   5446         if ((logicalToVisualRunsMap == null) ||
   5447             (logicalToVisualRunsMap.length < count)) {
   5448             logicalToVisualRunsMap = new int[count];
   5449         }
   5450         int i;
   5451         long[] keys = new long[count];
   5452         for (i = 0; i < count; i++) {
   5453             keys[i] = ((long)(runs[i].start)<<32) + i;
   5454         }
   5455         Arrays.sort(keys);
   5456         for (i = 0; i < count; i++) {
   5457             logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
   5458         }
   5459         isGoodLogicalToVisualRunsMap = true;
   5460      }
   5461 
   5462     /**
   5463      * Return the level of the nth logical run in this line.
   5464      *
   5465      * @param run the index of the run, between 0 and <code>countRuns()-1</code>
   5466      *
   5467      * @return the level of the run
   5468      *
   5469      * @throws IllegalStateException if this call is not preceded by a successful
   5470      *         call to <code>setPara</code> or <code>setLine</code>
   5471      * @throws IllegalArgumentException if <code>run</code> is not in
   5472      *         the range <code>0&lt;=run&lt;countRuns()</code>
   5473      */
   5474     public int getRunLevel(int run)
   5475     {
   5476         verifyValidParaOrLine();
   5477         BidiLine.getRuns(this);
   5478         verifyRange(run, 0, runCount);
   5479         getLogicalToVisualRunsMap();
   5480         return runs[logicalToVisualRunsMap[run]].level;
   5481     }
   5482 
   5483     /**
   5484      * Return the index of the character at the start of the nth logical run in
   5485      * this line, as an offset from the start of the line.
   5486      *
   5487      * @param run the index of the run, between 0 and <code>countRuns()</code>
   5488      *
   5489      * @return the start of the run
   5490      *
   5491      * @throws IllegalStateException if this call is not preceded by a successful
   5492      *         call to <code>setPara</code> or <code>setLine</code>
   5493      * @throws IllegalArgumentException if <code>run</code> is not in
   5494      *         the range <code>0&lt;=run&lt;countRuns()</code>
   5495      */
   5496     public int getRunStart(int run)
   5497     {
   5498         verifyValidParaOrLine();
   5499         BidiLine.getRuns(this);
   5500         verifyRange(run, 0, runCount);
   5501         getLogicalToVisualRunsMap();
   5502         return runs[logicalToVisualRunsMap[run]].start;
   5503     }
   5504 
   5505     /**
   5506      * Return the index of the character past the end of the nth logical run in
   5507      * this line, as an offset from the start of the line. For example, this
   5508      * will return the length of the line for the last run on the line.
   5509      *
   5510      * @param run the index of the run, between 0 and <code>countRuns()</code>
   5511      *
   5512      * @return the limit of the run
   5513      *
   5514      * @throws IllegalStateException if this call is not preceded by a successful
   5515      *         call to <code>setPara</code> or <code>setLine</code>
   5516      * @throws IllegalArgumentException if <code>run</code> is not in
   5517      *         the range <code>0&lt;=run&lt;countRuns()</code>
   5518      */
   5519     public int getRunLimit(int run)
   5520     {
   5521         verifyValidParaOrLine();
   5522         BidiLine.getRuns(this);
   5523         verifyRange(run, 0, runCount);
   5524         getLogicalToVisualRunsMap();
   5525         int idx = logicalToVisualRunsMap[run];
   5526         int len = idx == 0 ? runs[idx].limit :
   5527                                 runs[idx].limit - runs[idx-1].limit;
   5528         return runs[idx].start + len;
   5529     }
   5530 
   5531     /**
   5532      * Return true if the specified text requires bidi analysis. If this returns
   5533      * false, the text will display left-to-right. Clients can then avoid
   5534      * constructing a Bidi object. Text in the Arabic Presentation Forms area of
   5535      * Unicode is presumed to already be shaped and ordered for display, and so
   5536      * will not cause this method to return true.
   5537      *
   5538      * @param text the text containing the characters to test
   5539      * @param start the start of the range of characters to test
   5540      * @param limit the limit of the range of characters to test
   5541      *
   5542      * @return true if the range of characters requires bidi analysis
   5543      */
   5544     public static boolean requiresBidi(char[] text,
   5545             int start,
   5546             int limit)
   5547     {
   5548         final int RTLMask = (1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT |
   5549                 1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC |
   5550                 1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING |
   5551                 1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE |
   5552                 1 << UCharacter.DIRECTIONALITY_ARABIC_NUMBER);
   5553 
   5554         for (int i = start; i < limit; ++i) {
   5555             if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
   5556                 return true;
   5557             }
   5558         }
   5559         return false;
   5560     }
   5561 
   5562     /**
   5563      * Reorder the objects in the array into visual order based on their levels.
   5564      * This is a utility method to use when you have a collection of objects
   5565      * representing runs of text in logical order, each run containing text at a
   5566      * single level. The elements at <code>index</code> from
   5567      * <code>objectStart</code> up to <code>objectStart + count</code> in the
   5568      * objects array will be reordered into visual order assuming
   5569      * each run of text has the level indicated by the corresponding element in
   5570      * the levels array (at <code>index - objectStart + levelStart</code>).
   5571      *
   5572      * @param levels an array representing the bidi level of each object
   5573      * @param levelStart the start position in the levels array
   5574      * @param objects the array of objects to be reordered into visual order
   5575      * @param objectStart the start position in the objects array
   5576      * @param count the number of objects to reorder
   5577      */
   5578     public static void reorderVisually(byte[] levels,
   5579             int levelStart,
   5580             Object[] objects,
   5581             int objectStart,
   5582             int count)
   5583     {
   5584         byte[] reorderLevels = new byte[count];
   5585         System.arraycopy(levels, levelStart, reorderLevels, 0, count);
   5586         int[] indexMap = reorderVisual(reorderLevels);
   5587         Object[] temp = new Object[count];
   5588         System.arraycopy(objects, objectStart, temp, 0, count);
   5589         for (int i = 0; i < count; ++i) {
   5590             objects[objectStart + i] = temp[indexMap[i]];
   5591         }
   5592     }
   5593 
   5594     /**
   5595      * Take a <code>Bidi</code> object containing the reordering
   5596      * information for a piece of text (one or more paragraphs) set by
   5597      * <code>setPara()</code> or for a line of text set by <code>setLine()</code>
   5598      * and return a string containing the reordered text.
   5599      *
   5600      * <p>The text may have been aliased (only a reference was stored
   5601      * without copying the contents), thus it must not have been modified
   5602      * since the <code>setPara()</code> call.
   5603      *
   5604      * This method preserves the integrity of characters with multiple
   5605      * code units and (optionally) combining characters.
   5606      * Characters in RTL runs can be replaced by mirror-image characters
   5607      * in the returned string. Note that "real" mirroring has to be done in a
   5608      * rendering engine by glyph selection and that for many "mirrored"
   5609      * characters there are no Unicode characters as mirror-image equivalents.
   5610      * There are also options to insert or remove Bidi control
   5611      * characters; see the descriptions of the return value and the
   5612      * <code>options</code> parameter, and of the option bit flags.
   5613      *
   5614      * @param options A bit set of options for the reordering that control
   5615      *                how the reordered text is written.
   5616      *                The options include mirroring the characters on a code
   5617      *                point basis and inserting LRM characters, which is used
   5618      *                especially for transforming visually stored text
   5619      *                to logically stored text (although this is still an
   5620      *                imperfect implementation of an "inverse Bidi" algorithm
   5621      *                because it uses the "forward Bidi" algorithm at its core).
   5622      *                The available options are:
   5623      *                <code>DO_MIRRORING</code>,
   5624      *                <code>INSERT_LRM_FOR_NUMERIC</code>,
   5625      *                <code>KEEP_BASE_COMBINING</code>,
   5626      *                <code>OUTPUT_REVERSE</code>,
   5627      *                <code>REMOVE_BIDI_CONTROLS</code>,
   5628      *                <code>STREAMING</code>
   5629      *
   5630      * @return The reordered text.
   5631      *         If the <code>INSERT_LRM_FOR_NUMERIC</code> option is set, then
   5632      *         the length of the returned string could be as large as
   5633      *         <code>getLength()+2*countRuns()</code>.<br>
   5634      *         If the <code>REMOVE_BIDI_CONTROLS</code> option is set, then the
   5635      *         length of the returned string may be less than
   5636      *         <code>getLength()</code>.<br>
   5637      *         If none of these options is set, then the length of the returned
   5638      *         string will be exactly <code>getProcessedLength()</code>.
   5639      *
   5640      * @throws IllegalStateException if this call is not preceded by a successful
   5641      *         call to <code>setPara</code> or <code>setLine</code>
   5642      *
   5643      * @see #DO_MIRRORING
   5644      * @see #INSERT_LRM_FOR_NUMERIC
   5645      * @see #KEEP_BASE_COMBINING
   5646      * @see #OUTPUT_REVERSE
   5647      * @see #REMOVE_BIDI_CONTROLS
   5648      * @see #OPTION_STREAMING
   5649      * @see #getProcessedLength
   5650      */
   5651     public String writeReordered(int options)
   5652     {
   5653         verifyValidParaOrLine();
   5654         if (length == 0) {
   5655             /* nothing to do */
   5656             return "";
   5657         }
   5658         return BidiWriter.writeReordered(this, options);
   5659     }
   5660 
   5661     /**
   5662      * Reverse a Right-To-Left run of Unicode text.
   5663      *
   5664      * This method preserves the integrity of characters with multiple
   5665      * code units and (optionally) combining characters.
   5666      * Characters can be replaced by mirror-image characters
   5667      * in the destination buffer. Note that "real" mirroring has
   5668      * to be done in a rendering engine by glyph selection
   5669      * and that for many "mirrored" characters there are no
   5670      * Unicode characters as mirror-image equivalents.
   5671      * There are also options to insert or remove Bidi control
   5672      * characters.
   5673      *
   5674      * This method is the implementation for reversing RTL runs as part
   5675      * of <code>writeReordered()</code>. For detailed descriptions
   5676      * of the parameters, see there.
   5677      * Since no Bidi controls are inserted here, the output string length
   5678      * will never exceed <code>src.length()</code>.
   5679      *
   5680      * @see #writeReordered
   5681      *
   5682      * @param src The RTL run text.
   5683      *
   5684      * @param options A bit set of options for the reordering that control
   5685      *                how the reordered text is written.
   5686      *                See the <code>options</code> parameter in <code>writeReordered()</code>.
   5687      *
   5688      * @return The reordered text.
   5689      *         If the <code>REMOVE_BIDI_CONTROLS</code> option
   5690      *         is set, then the length of the returned string may be less than
   5691      *         <code>src.length()</code>. If this option is not set,
   5692      *         then the length of the returned string will be exactly
   5693      *         <code>src.length()</code>.
   5694      *
   5695      * @throws IllegalArgumentException if <code>src</code> is null.
   5696      */
   5697     public static String writeReverse(String src, int options)
   5698     {
   5699         /* error checking */
   5700         if (src == null) {
   5701             throw new IllegalArgumentException();
   5702         }
   5703 
   5704         if (src.length() > 0) {
   5705             return BidiWriter.writeReverse(src, options);
   5706         } else {
   5707             /* nothing to do */
   5708             return "";
   5709         }
   5710     }
   5711 
   5712 }
   5713