Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  ubidiwrt.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999aug06
     14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
     15 *
     16 * This file contains implementations for BiDi functions that use
     17 * the core algorithm and core API to write reordered text.
     18 */
     19 
     20 /* set import/export definitions */
     21 #ifndef U_COMMON_IMPLEMENTATION
     22 #   define U_COMMON_IMPLEMENTATION
     23 #endif
     24 
     25 #include "unicode/utypes.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/ubidi.h"
     29 #include "unicode/utf16.h"
     30 #include "cmemory.h"
     31 #include "ustr_imp.h"
     32 #include "ubidiimp.h"
     33 
     34 /*
     35  * The function implementations in this file are designed
     36  * for UTF-16 and UTF-32, not for UTF-8.
     37  *
     38  * Assumptions that are not true for UTF-8:
     39  * - Any code point always needs the same number of code units
     40  *   ("minimum-length-problem" of UTF-8)
     41  * - The BiDi control characters need only one code unit each
     42  *
     43  * Further assumptions for all UTFs:
     44  * - u_charMirror(c) needs the same number of code units as c
     45  */
     46 #if UTF_SIZE==8
     47 # error reimplement ubidi_writeReordered() for UTF-8, see comment above
     48 #endif
     49 
     50 #define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
     51 
     52 /*
     53  * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
     54  * semantically write RTL runs in reverse and later reverse them again.
     55  * Instead, we actually write them in forward order to begin with.
     56  * However, if the RTL run was to be mirrored, we need to mirror here now
     57  * since the implicit second reversal must not do it.
     58  * It looks strange to do mirroring in LTR output, but it is only because
     59  * we are writing RTL output in reverse.
     60  */
     61 static int32_t
     62 doWriteForward(const UChar *src, int32_t srcLength,
     63                UChar *dest, int32_t destSize,
     64                uint16_t options,
     65                UErrorCode *pErrorCode) {
     66     /* optimize for several combinations of options */
     67     switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
     68     case 0: {
     69         /* simply copy the LTR run to the destination */
     70         int32_t length=srcLength;
     71         if(destSize<length) {
     72             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     73             return srcLength;
     74         }
     75         do {
     76             *dest++=*src++;
     77         } while(--length>0);
     78         return srcLength;
     79     }
     80     case UBIDI_DO_MIRRORING: {
     81         /* do mirroring */
     82         int32_t i=0, j=0;
     83         UChar32 c;
     84 
     85         if(destSize<srcLength) {
     86             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     87             return srcLength;
     88         }
     89         do {
     90             U16_NEXT(src, i, srcLength, c);
     91             c=u_charMirror(c);
     92             U16_APPEND_UNSAFE(dest, j, c);
     93         } while(i<srcLength);
     94         return srcLength;
     95     }
     96     case UBIDI_REMOVE_BIDI_CONTROLS: {
     97         /* copy the LTR run and remove any BiDi control characters */
     98         int32_t remaining=destSize;
     99         UChar c;
    100         do {
    101             c=*src++;
    102             if(!IS_BIDI_CONTROL_CHAR(c)) {
    103                 if(--remaining<0) {
    104                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    105 
    106                     /* preflight the length */
    107                     while(--srcLength>0) {
    108                         c=*src++;
    109                         if(!IS_BIDI_CONTROL_CHAR(c)) {
    110                             --remaining;
    111                         }
    112                     }
    113                     return destSize-remaining;
    114                 }
    115                 *dest++=c;
    116             }
    117         } while(--srcLength>0);
    118         return destSize-remaining;
    119     }
    120     default: {
    121         /* remove BiDi control characters and do mirroring */
    122         int32_t remaining=destSize;
    123         int32_t i, j=0;
    124         UChar32 c;
    125         do {
    126             i=0;
    127             U16_NEXT(src, i, srcLength, c);
    128             src+=i;
    129             srcLength-=i;
    130             if(!IS_BIDI_CONTROL_CHAR(c)) {
    131                 remaining-=i;
    132                 if(remaining<0) {
    133                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    134 
    135                     /* preflight the length */
    136                     while(srcLength>0) {
    137                         c=*src++;
    138                         if(!IS_BIDI_CONTROL_CHAR(c)) {
    139                             --remaining;
    140                         }
    141                         --srcLength;
    142                     }
    143                     return destSize-remaining;
    144                 }
    145                 c=u_charMirror(c);
    146                 U16_APPEND_UNSAFE(dest, j, c);
    147             }
    148         } while(srcLength>0);
    149         return j;
    150     }
    151     } /* end of switch */
    152 }
    153 
    154 static int32_t
    155 doWriteReverse(const UChar *src, int32_t srcLength,
    156                UChar *dest, int32_t destSize,
    157                uint16_t options,
    158                UErrorCode *pErrorCode) {
    159     /*
    160      * RTL run -
    161      *
    162      * RTL runs need to be copied to the destination in reverse order
    163      * of code points, not code units, to keep Unicode characters intact.
    164      *
    165      * The general strategy for this is to read the source text
    166      * in backward order, collect all code units for a code point
    167      * (and optionally following combining characters, see below),
    168      * and copy all these code units in ascending order
    169      * to the destination for this run.
    170      *
    171      * Several options request whether combining characters
    172      * should be kept after their base characters,
    173      * whether BiDi control characters should be removed, and
    174      * whether characters should be replaced by their mirror-image
    175      * equivalent Unicode characters.
    176      */
    177     int32_t i, j;
    178     UChar32 c;
    179 
    180     /* optimize for several combinations of options */
    181     switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
    182     case 0:
    183         /*
    184          * With none of the "complicated" options set, the destination
    185          * run will have the same length as the source run,
    186          * and there is no mirroring and no keeping combining characters
    187          * with their base characters.
    188          */
    189         if(destSize<srcLength) {
    190             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    191             return srcLength;
    192         }
    193         destSize=srcLength;
    194 
    195         /* preserve character integrity */
    196         do {
    197             /* i is always after the last code unit known to need to be kept in this segment */
    198             i=srcLength;
    199 
    200             /* collect code units for one base character */
    201             U16_BACK_1(src, 0, srcLength);
    202 
    203             /* copy this base character */
    204             j=srcLength;
    205             do {
    206                 *dest++=src[j++];
    207             } while(j<i);
    208         } while(srcLength>0);
    209         break;
    210     case UBIDI_KEEP_BASE_COMBINING:
    211         /*
    212          * Here, too, the destination
    213          * run will have the same length as the source run,
    214          * and there is no mirroring.
    215          * We do need to keep combining characters with their base characters.
    216          */
    217         if(destSize<srcLength) {
    218             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    219             return srcLength;
    220         }
    221         destSize=srcLength;
    222 
    223         /* preserve character integrity */
    224         do {
    225             /* i is always after the last code unit known to need to be kept in this segment */
    226             i=srcLength;
    227 
    228             /* collect code units and modifier letters for one base character */
    229             do {
    230                 U16_PREV(src, 0, srcLength, c);
    231             } while(srcLength>0 && IS_COMBINING(u_charType(c)));
    232 
    233             /* copy this "user character" */
    234             j=srcLength;
    235             do {
    236                 *dest++=src[j++];
    237             } while(j<i);
    238         } while(srcLength>0);
    239         break;
    240     default:
    241         /*
    242          * With several "complicated" options set, this is the most
    243          * general and the slowest copying of an RTL run.
    244          * We will do mirroring, remove BiDi controls, and
    245          * keep combining characters with their base characters
    246          * as requested.
    247          */
    248         if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
    249             i=srcLength;
    250         } else {
    251             /* we need to find out the destination length of the run,
    252                which will not include the BiDi control characters */
    253             int32_t length=srcLength;
    254             UChar ch;
    255 
    256             i=0;
    257             do {
    258                 ch=*src++;
    259                 if(!IS_BIDI_CONTROL_CHAR(ch)) {
    260                     ++i;
    261                 }
    262             } while(--length>0);
    263             src-=srcLength;
    264         }
    265 
    266         if(destSize<i) {
    267             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    268             return i;
    269         }
    270         destSize=i;
    271 
    272         /* preserve character integrity */
    273         do {
    274             /* i is always after the last code unit known to need to be kept in this segment */
    275             i=srcLength;
    276 
    277             /* collect code units for one base character */
    278             U16_PREV(src, 0, srcLength, c);
    279             if(options&UBIDI_KEEP_BASE_COMBINING) {
    280                 /* collect modifier letters for this base character */
    281                 while(srcLength>0 && IS_COMBINING(u_charType(c))) {
    282                     U16_PREV(src, 0, srcLength, c);
    283                 }
    284             }
    285 
    286             if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
    287                 /* do not copy this BiDi control character */
    288                 continue;
    289             }
    290 
    291             /* copy this "user character" */
    292             j=srcLength;
    293             if(options&UBIDI_DO_MIRRORING) {
    294                 /* mirror only the base character */
    295                 int32_t k=0;
    296                 c=u_charMirror(c);
    297                 U16_APPEND_UNSAFE(dest, k, c);
    298                 dest+=k;
    299                 j+=k;
    300             }
    301             while(j<i) {
    302                 *dest++=src[j++];
    303             }
    304         } while(srcLength>0);
    305         break;
    306     } /* end of switch */
    307 
    308     return destSize;
    309 }
    310 
    311 U_CAPI int32_t U_EXPORT2
    312 ubidi_writeReverse(const UChar *src, int32_t srcLength,
    313                    UChar *dest, int32_t destSize,
    314                    uint16_t options,
    315                    UErrorCode *pErrorCode) {
    316     int32_t destLength;
    317 
    318     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    319         return 0;
    320     }
    321 
    322     /* more error checking */
    323     if( src==NULL || srcLength<-1 ||
    324         destSize<0 || (destSize>0 && dest==NULL))
    325     {
    326         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    327         return 0;
    328     }
    329 
    330     /* do input and output overlap? */
    331     if( dest!=NULL &&
    332         ((src>=dest && src<dest+destSize) ||
    333          (dest>=src && dest<src+srcLength)))
    334     {
    335         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    336         return 0;
    337     }
    338 
    339     if(srcLength==-1) {
    340         srcLength=u_strlen(src);
    341     }
    342     if(srcLength>0) {
    343         destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
    344     } else {
    345         /* nothing to do */
    346         destLength=0;
    347     }
    348 
    349     return u_terminateUChars(dest, destSize, destLength, pErrorCode);
    350 }
    351 
    352 U_CAPI int32_t U_EXPORT2
    353 ubidi_writeReordered(UBiDi *pBiDi,
    354                      UChar *dest, int32_t destSize,
    355                      uint16_t options,
    356                      UErrorCode *pErrorCode) {
    357     const UChar *text;
    358     UChar *saveDest;
    359     int32_t length, destCapacity;
    360     int32_t run, runCount, logicalStart, runLength;
    361 
    362     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    363         return 0;
    364     }
    365 
    366     /* more error checking */
    367     if( pBiDi==NULL ||
    368         (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 ||
    369         destSize<0 || (destSize>0 && dest==NULL))
    370     {
    371         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    372         return 0;
    373     }
    374 
    375     /* do input and output overlap? */
    376     if( dest!=NULL &&
    377         ((text>=dest && text<dest+destSize) ||
    378          (dest>=text && dest<text+pBiDi->originalLength)))
    379     {
    380         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    381         return 0;
    382     }
    383 
    384     if(length==0) {
    385         /* nothing to do */
    386         return u_terminateUChars(dest, destSize, 0, pErrorCode);
    387     }
    388 
    389     runCount=ubidi_countRuns(pBiDi, pErrorCode);
    390     if(U_FAILURE(*pErrorCode)) {
    391         return 0;
    392     }
    393 
    394     /* destSize shrinks, later destination length=destCapacity-destSize */
    395     saveDest=dest;
    396     destCapacity=destSize;
    397 
    398     /*
    399      * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
    400      * reordering mode (checked below) is appropriate.
    401      */
    402     if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
    403         options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
    404         options&=~UBIDI_REMOVE_BIDI_CONTROLS;
    405     }
    406     /*
    407      * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
    408      * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
    409      */
    410     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
    411         options|=UBIDI_REMOVE_BIDI_CONTROLS;
    412         options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
    413     }
    414     /*
    415      * If we do not perform the "inverse BiDi" algorithm, then we
    416      * don't need to insert any LRMs, and don't need to test for it.
    417      */
    418     if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
    419        (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT)  &&
    420        (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
    421        (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
    422         options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
    423     }
    424     /*
    425      * Iterate through all visual runs and copy the run text segments to
    426      * the destination, according to the options.
    427      *
    428      * The tests for where to insert LRMs ignore the fact that there may be
    429      * BN codes or non-BMP code points at the beginning and end of a run;
    430      * they may insert LRMs unnecessarily but the tests are faster this way
    431      * (this would have to be improved for UTF-8).
    432      *
    433      * Note that the only errors that are set by doWriteXY() are buffer overflow
    434      * errors. Ignore them until the end, and continue for preflighting.
    435      */
    436     if(!(options&UBIDI_OUTPUT_REVERSE)) {
    437         /* forward output */
    438         if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
    439             /* do not insert BiDi controls */
    440             for(run=0; run<runCount; ++run) {
    441                 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
    442                     runLength=doWriteForward(text+logicalStart, runLength,
    443                                              dest, destSize,
    444                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
    445                 } else {
    446                     runLength=doWriteReverse(text+logicalStart, runLength,
    447                                              dest, destSize,
    448                                              options, pErrorCode);
    449                 }
    450                 if(dest!=NULL) {
    451                   dest+=runLength;
    452                 }
    453                 destSize-=runLength;
    454             }
    455         } else {
    456             /* insert BiDi controls for "inverse BiDi" */
    457             const DirProp *dirProps=pBiDi->dirProps;
    458             const UChar *src;
    459             UChar uc;
    460             UBiDiDirection dir;
    461             int32_t markFlag;
    462 
    463             for(run=0; run<runCount; ++run) {
    464                 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
    465                 src=text+logicalStart;
    466                 /* check if something relevant in insertPoints */
    467                 markFlag=pBiDi->runs[run].insertRemove;
    468                 if(markFlag<0) {        /* BiDi controls count */
    469                     markFlag=0;
    470                 }
    471 
    472                 if(UBIDI_LTR==dir) {
    473                     if((pBiDi->isInverse) &&
    474                        (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
    475                         markFlag |= LRM_BEFORE;
    476                     }
    477                     if (markFlag & LRM_BEFORE) {
    478                         uc=LRM_CHAR;
    479                     }
    480                     else if (markFlag & RLM_BEFORE) {
    481                         uc=RLM_CHAR;
    482                     }
    483                     else  uc=0;
    484                     if(uc) {
    485                         if(destSize>0) {
    486                             *dest++=uc;
    487                         }
    488                         --destSize;
    489                     }
    490 
    491                     runLength=doWriteForward(src, runLength,
    492                                              dest, destSize,
    493                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
    494                     if(dest!=NULL) {
    495                       dest+=runLength;
    496                     }
    497                     destSize-=runLength;
    498 
    499                     if((pBiDi->isInverse) &&
    500                        (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
    501                         markFlag |= LRM_AFTER;
    502                     }
    503                     if (markFlag & LRM_AFTER) {
    504                         uc=LRM_CHAR;
    505                     }
    506                     else if (markFlag & RLM_AFTER) {
    507                         uc=RLM_CHAR;
    508                     }
    509                     else  uc=0;
    510                     if(uc) {
    511                         if(destSize>0) {
    512                             *dest++=uc;
    513                         }
    514                         --destSize;
    515                     }
    516                 } else {                /* RTL run */
    517                     if((pBiDi->isInverse) &&
    518                        (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
    519                         markFlag |= RLM_BEFORE;
    520                     }
    521                     if (markFlag & LRM_BEFORE) {
    522                         uc=LRM_CHAR;
    523                     }
    524                     else if (markFlag & RLM_BEFORE) {
    525                         uc=RLM_CHAR;
    526                     }
    527                     else  uc=0;
    528                     if(uc) {
    529                         if(destSize>0) {
    530                             *dest++=uc;
    531                         }
    532                         --destSize;
    533                     }
    534 
    535                     runLength=doWriteReverse(src, runLength,
    536                                              dest, destSize,
    537                                              options, pErrorCode);
    538                     if(dest!=NULL) {
    539                       dest+=runLength;
    540                     }
    541                     destSize-=runLength;
    542 
    543                     if((pBiDi->isInverse) &&
    544                        (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
    545                         markFlag |= RLM_AFTER;
    546                     }
    547                     if (markFlag & LRM_AFTER) {
    548                         uc=LRM_CHAR;
    549                     }
    550                     else if (markFlag & RLM_AFTER) {
    551                         uc=RLM_CHAR;
    552                     }
    553                     else  uc=0;
    554                     if(uc) {
    555                         if(destSize>0) {
    556                             *dest++=uc;
    557                         }
    558                         --destSize;
    559                     }
    560                 }
    561             }
    562         }
    563     } else {
    564         /* reverse output */
    565         if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
    566             /* do not insert BiDi controls */
    567             for(run=runCount; --run>=0;) {
    568                 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
    569                     runLength=doWriteReverse(text+logicalStart, runLength,
    570                                              dest, destSize,
    571                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
    572                 } else {
    573                     runLength=doWriteForward(text+logicalStart, runLength,
    574                                              dest, destSize,
    575                                              options, pErrorCode);
    576                 }
    577                 if(dest!=NULL) {
    578                   dest+=runLength;
    579                 }
    580                 destSize-=runLength;
    581             }
    582         } else {
    583             /* insert BiDi controls for "inverse BiDi" */
    584             const DirProp *dirProps=pBiDi->dirProps;
    585             const UChar *src;
    586             UBiDiDirection dir;
    587 
    588             for(run=runCount; --run>=0;) {
    589                 /* reverse output */
    590                 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
    591                 src=text+logicalStart;
    592 
    593                 if(UBIDI_LTR==dir) {
    594                     if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
    595                         if(destSize>0) {
    596                             *dest++=LRM_CHAR;
    597                         }
    598                         --destSize;
    599                     }
    600 
    601                     runLength=doWriteReverse(src, runLength,
    602                                              dest, destSize,
    603                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
    604                     if(dest!=NULL) {
    605                       dest+=runLength;
    606                     }
    607                     destSize-=runLength;
    608 
    609                     if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
    610                         if(destSize>0) {
    611                             *dest++=LRM_CHAR;
    612                         }
    613                         --destSize;
    614                     }
    615                 } else {
    616                     if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
    617                         if(destSize>0) {
    618                             *dest++=RLM_CHAR;
    619                         }
    620                         --destSize;
    621                     }
    622 
    623                     runLength=doWriteForward(src, runLength,
    624                                              dest, destSize,
    625                                              options, pErrorCode);
    626                     if(dest!=NULL) {
    627                       dest+=runLength;
    628                     }
    629                     destSize-=runLength;
    630 
    631                     if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
    632                         if(destSize>0) {
    633                             *dest++=RLM_CHAR;
    634                         }
    635                         --destSize;
    636                     }
    637                 }
    638             }
    639         }
    640     }
    641 
    642     return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
    643 }
    644