Home | History | Annotate | Download | only in text
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2001-2010, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 /* Written by Simon Montagu, Matitiahu Allouche
     10  * (ported from C code written by Markus W. Scherer)
     11  */
     12 
     13 package com.ibm.icu.text;
     14 
     15 import com.ibm.icu.lang.UCharacter;
     16 
     17 final class BidiWriter {
     18 
     19     /** Bidi control code points */
     20     static final char LRM_CHAR = 0x200e;
     21     static final char RLM_CHAR = 0x200f;
     22     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
     23                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
     24 
     25     private static boolean IsCombining(int type)
     26     {
     27         return ((1<<type &
     28                 (1<<UCharacter.NON_SPACING_MARK |
     29                  1<<UCharacter.COMBINING_SPACING_MARK |
     30                  1<<UCharacter.ENCLOSING_MARK)) != 0);
     31     }
     32 
     33     /*
     34      * When we have OUTPUT_REVERSE set on writeReordered(), then we
     35      * semantically write RTL runs in reverse and later reverse them again.
     36      * Instead, we actually write them in forward order to begin with.
     37      * However, if the RTL run was to be mirrored, we need to mirror here now
     38      * since the implicit second reversal must not do it.
     39      * It looks strange to do mirroring in LTR output, but it is only because
     40      * we are writing RTL output in reverse.
     41      */
     42     private static String doWriteForward(String src, int options) {
     43         /* optimize for several combinations of options */
     44         switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {
     45         case 0: {
     46             /* simply return the LTR run */
     47             return src;
     48         }
     49         case Bidi.DO_MIRRORING: {
     50             StringBuffer dest = new StringBuffer(src.length());
     51 
     52             /* do mirroring */
     53             int i=0;
     54             int c;
     55 
     56             do {
     57                 c = UTF16.charAt(src, i);
     58                 i += UTF16.getCharCount(c);
     59                 UTF16.append(dest, UCharacter.getMirror(c));
     60             } while(i < src.length());
     61             return dest.toString();
     62         }
     63         case Bidi.REMOVE_BIDI_CONTROLS: {
     64             StringBuilder dest = new StringBuilder(src.length());
     65 
     66             /* copy the LTR run and remove any Bidi control characters */
     67             int i = 0;
     68             char c;
     69             do {
     70                 c = src.charAt(i++);
     71                 if(!Bidi.IsBidiControlChar(c)) {
     72                     dest.append(c);
     73                 }
     74             } while(i < src.length());
     75             return dest.toString();
     76         }
     77         default: {
     78             StringBuffer dest = new StringBuffer(src.length());
     79 
     80             /* remove Bidi control characters and do mirroring */
     81             int i = 0;
     82             int c;
     83             do {
     84                 c = UTF16.charAt(src, i);
     85                 i += UTF16.getCharCount(c);
     86                 if(!Bidi.IsBidiControlChar(c)) {
     87                     UTF16.append(dest, UCharacter.getMirror(c));
     88                 }
     89             } while(i < src.length());
     90             return dest.toString();
     91         }
     92         } /* end of switch */
     93     }
     94 
     95     private static String doWriteForward(char[] text, int start, int limit,
     96                                          int options)
     97     {
     98         return doWriteForward(new String(text, start, limit - start), options);
     99     }
    100 
    101     static String writeReverse(String src, int options) {
    102         /*
    103          * RTL run -
    104          *
    105          * RTL runs need to be copied to the destination in reverse order
    106          * of code points, not code units, to keep Unicode characters intact.
    107          *
    108          * The general strategy for this is to read the source text
    109          * in backward order, collect all code units for a code point
    110          * (and optionally following combining characters, see below),
    111          * and copy all these code units in ascending order
    112          * to the destination for this run.
    113          *
    114          * Several options request whether combining characters
    115          * should be kept after their base characters,
    116          * whether Bidi control characters should be removed, and
    117          * whether characters should be replaced by their mirror-image
    118          * equivalent Unicode characters.
    119          */
    120         StringBuffer dest = new StringBuffer(src.length());
    121 
    122         /* optimize for several combinations of options */
    123         switch (options &
    124                 (Bidi.REMOVE_BIDI_CONTROLS |
    125                  Bidi.DO_MIRRORING |
    126                  Bidi.KEEP_BASE_COMBINING)) {
    127 
    128         case 0:
    129             /*
    130              * With none of the "complicated" options set, the destination
    131              * run will have the same length as the source run,
    132              * and there is no mirroring and no keeping combining characters
    133              * with their base characters.
    134              *
    135              * XXX: or dest = UTF16.reverse(new StringBuffer(src));
    136              */
    137 
    138             int srcLength = src.length();
    139 
    140             /* preserve character integrity */
    141             do {
    142                 /* i is always after the last code unit known to need to be kept
    143                  *  in this segment */
    144                 int i = srcLength;
    145 
    146                 /* collect code units for one base character */
    147                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
    148                                                              srcLength - 1));
    149 
    150                 /* copy this base character */
    151                 dest.append(src.substring(srcLength, i));
    152             } while(srcLength > 0);
    153             break;
    154 
    155         case Bidi.KEEP_BASE_COMBINING:
    156             /*
    157              * Here, too, the destination
    158              * run will have the same length as the source run,
    159              * and there is no mirroring.
    160              * We do need to keep combining characters with their base
    161              * characters.
    162              */
    163             srcLength = src.length();
    164 
    165             /* preserve character integrity */
    166             do {
    167                 /* i is always after the last code unit known to need to be kept
    168                  *  in this segment */
    169                 int c;
    170                 int i = srcLength;
    171 
    172                 /* collect code units and modifier letters for one base
    173                  * character */
    174                 do {
    175                     c = UTF16.charAt(src, srcLength - 1);
    176                     srcLength -= UTF16.getCharCount(c);
    177                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
    178 
    179                 /* copy this "user character" */
    180                 dest.append(src.substring(srcLength, i));
    181             } while(srcLength > 0);
    182             break;
    183 
    184         default:
    185             /*
    186              * With several "complicated" options set, this is the most
    187              * general and the slowest copying of an RTL run.
    188              * We will do mirroring, remove Bidi controls, and
    189              * keep combining characters with their base characters
    190              * as requested.
    191              */
    192             srcLength = src.length();
    193 
    194             /* preserve character integrity */
    195             do {
    196                 /* i is always after the last code unit known to need to be kept
    197                  *  in this segment */
    198                 int i = srcLength;
    199 
    200                 /* collect code units for one base character */
    201                 int c = UTF16.charAt(src, srcLength - 1);
    202                 srcLength -= UTF16.getCharCount(c);
    203                 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
    204                     /* collect modifier letters for this base character */
    205                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
    206                         c = UTF16.charAt(src, srcLength - 1);
    207                         srcLength -= UTF16.getCharCount(c);
    208                     }
    209                 }
    210 
    211                 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
    212                     Bidi.IsBidiControlChar(c)) {
    213                     /* do not copy this Bidi control character */
    214                     continue;
    215                 }
    216 
    217                 /* copy this "user character" */
    218                 int j = srcLength;
    219                 if((options & Bidi.DO_MIRRORING) != 0) {
    220                     /* mirror only the base character */
    221                     c = UCharacter.getMirror(c);
    222                     UTF16.append(dest, c);
    223                     j += UTF16.getCharCount(c);
    224                 }
    225                 dest.append(src.substring(j, i));
    226             } while(srcLength > 0);
    227             break;
    228         } /* end of switch */
    229 
    230         return dest.toString();
    231     }
    232 
    233     static String doWriteReverse(char[] text, int start, int limit, int options)
    234     {
    235         return writeReverse(new String(text, start, limit - start), options);
    236     }
    237 
    238     static String writeReordered(Bidi bidi, int options)
    239     {
    240         int run, runCount;
    241         StringBuilder dest;
    242         char[] text = bidi.text;
    243         runCount = bidi.countRuns();
    244 
    245         /*
    246          * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
    247          * reordering mode (checked below) is appropriate.
    248          */
    249         if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
    250             options |= Bidi.INSERT_LRM_FOR_NUMERIC;
    251             options &= ~Bidi.REMOVE_BIDI_CONTROLS;
    252         }
    253         /*
    254          * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
    255          * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
    256          */
    257         if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
    258             options |= Bidi.REMOVE_BIDI_CONTROLS;
    259             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
    260         }
    261         /*
    262          * If we do not perform the "inverse Bidi" algorithm, then we
    263          * don't need to insert any LRMs, and don't need to test for it.
    264          */
    265         if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
    266             (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT)  &&
    267             (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
    268             (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
    269             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
    270         }
    271         dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
    272                                  bidi.length * 2 : bidi.length);
    273         /*
    274          * Iterate through all visual runs and copy the run text segments to
    275          * the destination, according to the options.
    276          *
    277          * The tests for where to insert LRMs ignore the fact that there may be
    278          * BN codes or non-BMP code points at the beginning and end of a run;
    279          * they may insert LRMs unnecessarily but the tests are faster this way
    280          * (this would have to be improved for UTF-8).
    281          */
    282         if ((options & Bidi.OUTPUT_REVERSE) == 0) {
    283             /* forward output */
    284             if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
    285                 /* do not insert Bidi controls */
    286                 for (run = 0; run < runCount; ++run) {
    287                     BidiRun bidiRun = bidi.getVisualRun(run);
    288                     if (bidiRun.isEvenRun()) {
    289                         dest.append(doWriteForward(text, bidiRun.start,
    290                                                    bidiRun.limit,
    291                                                    options & ~Bidi.DO_MIRRORING));
    292                      } else {
    293                         dest.append(doWriteReverse(text, bidiRun.start,
    294                                                    bidiRun.limit, options));
    295                      }
    296                 }
    297             } else {
    298                 /* insert Bidi controls for "inverse Bidi" */
    299                 byte[] dirProps = bidi.dirProps;
    300                 char uc;
    301                 int markFlag;
    302 
    303                 for (run = 0; run < runCount; ++run) {
    304                     BidiRun bidiRun = bidi.getVisualRun(run);
    305                     markFlag=0;
    306                     /* check if something relevant in insertPoints */
    307                     markFlag = bidi.runs[run].insertRemove;
    308                     if (markFlag < 0) { /* bidi controls count */
    309                         markFlag = 0;
    310                     }
    311                     if (bidiRun.isEvenRun()) {
    312                         if (bidi.isInverse() &&
    313                                 dirProps[bidiRun.start] != Bidi.L) {
    314                             markFlag |= Bidi.LRM_BEFORE;
    315                         }
    316                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {
    317                             uc = LRM_CHAR;
    318                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
    319                             uc = RLM_CHAR;
    320                         } else {
    321                             uc = 0;
    322                         }
    323                         if (uc != 0) {
    324                             dest.append(uc);
    325                         }
    326                         dest.append(doWriteForward(text,
    327                                                    bidiRun.start, bidiRun.limit,
    328                                                    options & ~Bidi.DO_MIRRORING));
    329 
    330                         if (bidi.isInverse() &&
    331                              dirProps[bidiRun.limit - 1] != Bidi.L) {
    332                             markFlag |= Bidi.LRM_AFTER;
    333                         }
    334                         if ((markFlag & Bidi.LRM_AFTER) != 0) {
    335                             uc = LRM_CHAR;
    336                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
    337                             uc = RLM_CHAR;
    338                         } else {
    339                             uc = 0;
    340                         }
    341                         if (uc != 0) {
    342                             dest.append(uc);
    343                         }
    344                     } else { /* RTL run */
    345                         if (bidi.isInverse() &&
    346                             !bidi.testDirPropFlagAt(MASK_R_AL,
    347                                                     bidiRun.limit - 1)) {
    348                             markFlag |= Bidi.RLM_BEFORE;
    349                         }
    350                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {
    351                             uc = LRM_CHAR;
    352                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
    353                             uc = RLM_CHAR;
    354                         } else {
    355                             uc = 0;
    356                         }
    357                         if (uc != 0) {
    358                             dest.append(uc);
    359                         }
    360                         dest.append(doWriteReverse(text, bidiRun.start,
    361                                                    bidiRun.limit, options));
    362 
    363                         if(bidi.isInverse() &&
    364                                 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
    365                             markFlag |= Bidi.RLM_AFTER;
    366                         }
    367                         if ((markFlag & Bidi.LRM_AFTER) != 0) {
    368                             uc = LRM_CHAR;
    369                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
    370                             uc = RLM_CHAR;
    371                         } else {
    372                             uc = 0;
    373                         }
    374                         if (uc != 0) {
    375                             dest.append(uc);
    376                         }
    377                     }
    378                 }
    379             }
    380         } else {
    381             /* reverse output */
    382             if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
    383                 /* do not insert Bidi controls */
    384                 for(run = runCount; --run >= 0; ) {
    385                     BidiRun bidiRun = bidi.getVisualRun(run);
    386                     if (bidiRun.isEvenRun()) {
    387                         dest.append(doWriteReverse(text,
    388                                                    bidiRun.start, bidiRun.limit,
    389                                                    options & ~Bidi.DO_MIRRORING));
    390                     } else {
    391                         dest.append(doWriteForward(text, bidiRun.start,
    392                                                    bidiRun.limit, options));
    393                     }
    394                 }
    395             } else {
    396                 /* insert Bidi controls for "inverse Bidi" */
    397 
    398                 byte[] dirProps = bidi.dirProps;
    399 
    400                 for (run = runCount; --run >= 0; ) {
    401                     /* reverse output */
    402                     BidiRun bidiRun = bidi.getVisualRun(run);
    403                     if (bidiRun.isEvenRun()) {
    404                         if (dirProps[bidiRun.limit - 1] != Bidi.L) {
    405                             dest.append(LRM_CHAR);
    406                         }
    407 
    408                         dest.append(doWriteReverse(text, bidiRun.start,
    409                                 bidiRun.limit, options & ~Bidi.DO_MIRRORING));
    410 
    411                         if (dirProps[bidiRun.start] != Bidi.L) {
    412                             dest.append(LRM_CHAR);
    413                         }
    414                     } else {
    415                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
    416                             dest.append(RLM_CHAR);
    417                         }
    418 
    419                         dest.append(doWriteForward(text, bidiRun.start,
    420                                                    bidiRun.limit, options));
    421 
    422                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
    423                             dest.append(RLM_CHAR);
    424                         }
    425                     }
    426                 }
    427             }
    428         }
    429 
    430         return dest.toString();
    431     }
    432 }
    433