Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  ubidi.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999jul27
     14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
     15 */
     16 
     17 #include "cmemory.h"
     18 #include "unicode/utypes.h"
     19 #include "unicode/ustring.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/ubidi.h"
     22 #include "ubidi_props.h"
     23 #include "ubidiimp.h"
     24 #include "uassert.h"
     25 
     26 /*
     27  * General implementation notes:
     28  *
     29  * Throughout the implementation, there are comments like (W2) that refer to
     30  * rules of the BiDi algorithm in its version 5, in this example to the second
     31  * rule of the resolution of weak types.
     32  *
     33  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
     34  * character according to UTF-16, the second UChar gets the directional property of
     35  * the entire character assigned, while the first one gets a BN, a boundary
     36  * neutral, type, which is ignored by most of the algorithm according to
     37  * rule (X9) and the implementation suggestions of the BiDi algorithm.
     38  *
     39  * Later, adjustWSLevels() will set the level for each BN to that of the
     40  * following character (UChar), which results in surrogate pairs getting the
     41  * same level on each of their surrogates.
     42  *
     43  * In a UTF-8 implementation, the same thing could be done: the last byte of
     44  * a multi-byte sequence would get the "real" property, while all previous
     45  * bytes of that sequence would get BN.
     46  *
     47  * It is not possible to assign all those parts of a character the same real
     48  * property because this would fail in the resolution of weak types with rules
     49  * that look at immediately surrounding types.
     50  *
     51  * As a related topic, this implementation does not remove Boundary Neutral
     52  * types from the input, but ignores them wherever this is relevant.
     53  * For example, the loop for the resolution of the weak types reads
     54  * types until it finds a non-BN.
     55  * Also, explicit embedding codes are neither changed into BN nor removed.
     56  * They are only treated the same way real BNs are.
     57  * As stated before, adjustWSLevels() takes care of them at the end.
     58  * For the purpose of conformance, the levels of all these codes
     59  * do not matter.
     60  *
     61  * Note that this implementation never modifies the dirProps
     62  * after the initial setup.
     63  *
     64  *
     65  * In this implementation, the resolution of weak types (Wn),
     66  * neutrals (Nn), and the assignment of the resolved level (In)
     67  * are all done in one single loop, in resolveImplicitLevels().
     68  * Changes of dirProp values are done on the fly, without writing
     69  * them back to the dirProps array.
     70  *
     71  *
     72  * This implementation contains code that allows to bypass steps of the
     73  * algorithm that are not needed on the specific paragraph
     74  * in order to speed up the most common cases considerably,
     75  * like text that is entirely LTR, or RTL text without numbers.
     76  *
     77  * Most of this is done by setting a bit for each directional property
     78  * in a flags variable and later checking for whether there are
     79  * any LTR characters or any RTL characters, or both, whether
     80  * there are any explicit embedding codes, etc.
     81  *
     82  * If the (Xn) steps are performed, then the flags are re-evaluated,
     83  * because they will then not contain the embedding codes any more
     84  * and will be adjusted for override codes, so that subsequently
     85  * more bypassing may be possible than what the initial flags suggested.
     86  *
     87  * If the text is not mixed-directional, then the
     88  * algorithm steps for the weak type resolution are not performed,
     89  * and all levels are set to the paragraph level.
     90  *
     91  * If there are no explicit embedding codes, then the (Xn) steps
     92  * are not performed.
     93  *
     94  * If embedding levels are supplied as a parameter, then all
     95  * explicit embedding codes are ignored, and the (Xn) steps
     96  * are not performed.
     97  *
     98  * White Space types could get the level of the run they belong to,
     99  * and are checked with a test of (flags&MASK_EMBEDDING) to
    100  * consider if the paragraph direction should be considered in
    101  * the flags variable.
    102  *
    103  * If there are no White Space types in the paragraph, then
    104  * (L1) is not necessary in adjustWSLevels().
    105  */
    106 
    107 /* to avoid some conditional statements, use tiny constant arrays */
    108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
    109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
    110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
    111 
    112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
    113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
    114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
    115 
    116 /* UBiDi object management -------------------------------------------------- */
    117 
    118 U_CAPI UBiDi * U_EXPORT2
    119 ubidi_open(void)
    120 {
    121     UErrorCode errorCode=U_ZERO_ERROR;
    122     return ubidi_openSized(0, 0, &errorCode);
    123 }
    124 
    125 U_CAPI UBiDi * U_EXPORT2
    126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
    127     UBiDi *pBiDi;
    128 
    129     /* check the argument values */
    130     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    131         return NULL;
    132     } else if(maxLength<0 || maxRunCount<0) {
    133         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    134         return NULL;    /* invalid arguments */
    135     }
    136 
    137     /* allocate memory for the object */
    138     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
    139     if(pBiDi==NULL) {
    140         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    141         return NULL;
    142     }
    143 
    144     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
    145     uprv_memset(pBiDi, 0, sizeof(UBiDi));
    146 
    147     /* get BiDi properties */
    148     pBiDi->bdp=ubidi_getSingleton();
    149 
    150     /* allocate memory for arrays as requested */
    151     if(maxLength>0) {
    152         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
    153             !getInitialLevelsMemory(pBiDi, maxLength)
    154         ) {
    155             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    156         }
    157     } else {
    158         pBiDi->mayAllocateText=TRUE;
    159     }
    160 
    161     if(maxRunCount>0) {
    162         if(maxRunCount==1) {
    163             /* use simpleRuns[] */
    164             pBiDi->runsSize=sizeof(Run);
    165         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
    166             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    167         }
    168     } else {
    169         pBiDi->mayAllocateRuns=TRUE;
    170     }
    171 
    172     if(U_SUCCESS(*pErrorCode)) {
    173         return pBiDi;
    174     } else {
    175         ubidi_close(pBiDi);
    176         return NULL;
    177     }
    178 }
    179 
    180 /*
    181  * We are allowed to allocate memory if memory==NULL or
    182  * mayAllocate==TRUE for each array that we need.
    183  * We also try to grow memory as needed if we
    184  * allocate it.
    185  *
    186  * Assume sizeNeeded>0.
    187  * If *pMemory!=NULL, then assume *pSize>0.
    188  *
    189  * ### this realloc() may unnecessarily copy the old data,
    190  * which we know we don't need any more;
    191  * is this the best way to do this??
    192  */
    193 U_CFUNC UBool
    194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
    195     void **pMemory = (void **)bidiMem;
    196     /* check for existing memory */
    197     if(*pMemory==NULL) {
    198         /* we need to allocate memory */
    199         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
    200             *pSize=sizeNeeded;
    201             return TRUE;
    202         } else {
    203             return FALSE;
    204         }
    205     } else {
    206         if(sizeNeeded<=*pSize) {
    207             /* there is already enough memory */
    208             return TRUE;
    209         }
    210         else if(!mayAllocate) {
    211             /* not enough memory, and we must not allocate */
    212             return FALSE;
    213         } else {
    214             /* we try to grow */
    215             void *memory;
    216             /* in most cases, we do not need the copy-old-data part of
    217              * realloc, but it is needed when adding runs using getRunsMemory()
    218              * in setParaRunsOnly()
    219              */
    220             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
    221                 *pMemory=memory;
    222                 *pSize=sizeNeeded;
    223                 return TRUE;
    224             } else {
    225                 /* we failed to grow */
    226                 return FALSE;
    227             }
    228         }
    229     }
    230 }
    231 
    232 U_CAPI void U_EXPORT2
    233 ubidi_close(UBiDi *pBiDi) {
    234     if(pBiDi!=NULL) {
    235         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
    236         if(pBiDi->dirPropsMemory!=NULL) {
    237             uprv_free(pBiDi->dirPropsMemory);
    238         }
    239         if(pBiDi->levelsMemory!=NULL) {
    240             uprv_free(pBiDi->levelsMemory);
    241         }
    242         if(pBiDi->runsMemory!=NULL) {
    243             uprv_free(pBiDi->runsMemory);
    244         }
    245         if(pBiDi->parasMemory!=NULL) {
    246             uprv_free(pBiDi->parasMemory);
    247         }
    248         if(pBiDi->insertPoints.points!=NULL) {
    249             uprv_free(pBiDi->insertPoints.points);
    250         }
    251 
    252         uprv_free(pBiDi);
    253     }
    254 }
    255 
    256 /* set to approximate "inverse BiDi" ---------------------------------------- */
    257 
    258 U_CAPI void U_EXPORT2
    259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
    260     if(pBiDi!=NULL) {
    261         pBiDi->isInverse=isInverse;
    262         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
    263                                           : UBIDI_REORDER_DEFAULT;
    264     }
    265 }
    266 
    267 U_CAPI UBool U_EXPORT2
    268 ubidi_isInverse(UBiDi *pBiDi) {
    269     if(pBiDi!=NULL) {
    270         return pBiDi->isInverse;
    271     } else {
    272         return FALSE;
    273     }
    274 }
    275 
    276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
    277  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
    278  * concept of RUNS_ONLY which is a double operation.
    279  * It could be advantageous to divide this into 3 concepts:
    280  * a) Operation: direct / inverse / RUNS_ONLY
    281  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
    282  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
    283  * This would allow combinations not possible today like RUNS_ONLY with
    284  * NUMBERS_SPECIAL.
    285  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
    286  * REMOVE_CONTROLS for the inverse step.
    287  * Not all combinations would be supported, and probably not all do make sense.
    288  * This would need to document which ones are supported and what are the
    289  * fallbacks for unsupported combinations.
    290  */
    291 U_CAPI void U_EXPORT2
    292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
    293     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
    294                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
    295         pBiDi->reorderingMode = reorderingMode;
    296         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
    297     }
    298 }
    299 
    300 U_CAPI UBiDiReorderingMode U_EXPORT2
    301 ubidi_getReorderingMode(UBiDi *pBiDi) {
    302     if (pBiDi!=NULL) {
    303         return pBiDi->reorderingMode;
    304     } else {
    305         return UBIDI_REORDER_DEFAULT;
    306     }
    307 }
    308 
    309 U_CAPI void U_EXPORT2
    310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
    311     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
    312         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
    313     }
    314     if (pBiDi!=NULL) {
    315         pBiDi->reorderingOptions=reorderingOptions;
    316     }
    317 }
    318 
    319 U_CAPI uint32_t U_EXPORT2
    320 ubidi_getReorderingOptions(UBiDi *pBiDi) {
    321     if (pBiDi!=NULL) {
    322         return pBiDi->reorderingOptions;
    323     } else {
    324         return 0;
    325     }
    326 }
    327 
    328 U_CAPI UBiDiDirection U_EXPORT2
    329 ubidi_getBaseDirection(const UChar *text,
    330 int32_t length){
    331 
    332     int32_t i;
    333     UChar32 uchar;
    334     UCharDirection dir;
    335 
    336     if( text==NULL || length<-1 ){
    337         return UBIDI_NEUTRAL;
    338     }
    339 
    340     if(length==-1) {
    341         length=u_strlen(text);
    342     }
    343 
    344     for( i = 0 ; i < length; ) {
    345         /* i is incremented by U16_NEXT */
    346         U16_NEXT(text, i, length, uchar);
    347         dir = u_charDirection(uchar);
    348         if( dir == U_LEFT_TO_RIGHT )
    349                 return UBIDI_LTR;
    350         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
    351                 return UBIDI_RTL;
    352     }
    353     return UBIDI_NEUTRAL;
    354 }
    355 
    356 /* perform (P2)..(P3) ------------------------------------------------------- */
    357 
    358 /*
    359  * Get the directional properties for the text,
    360  * calculate the flags bit-set, and
    361  * determine the paragraph level if necessary.
    362  */
    363 static void
    364 getDirProps(UBiDi *pBiDi) {
    365     const UChar *text=pBiDi->text;
    366     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
    367 
    368     int32_t i=0, i1, length=pBiDi->originalLength;
    369     Flags flags=0;      /* collect all directionalities in the text */
    370     UChar32 uchar;
    371     DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
    372     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
    373     /* for inverse BiDi, the default para level is set to RTL if there is a
    374        strong R or AL character at either end of the text                           */
    375     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
    376             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
    377              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
    378     int32_t lastArabicPos=-1;
    379     int32_t controlCount=0;
    380     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
    381                                        UBIDI_OPTION_REMOVE_CONTROLS);
    382 
    383     typedef enum {
    384          NOT_CONTEXTUAL,                /* 0: not contextual paraLevel */
    385          LOOKING_FOR_STRONG,            /* 1: looking for first strong char */
    386          FOUND_STRONG_CHAR              /* 2: found first strong char       */
    387     } State;
    388     State state;
    389     int32_t paraStart=0;                /* index of first char in paragraph */
    390     DirProp paraDir;                    /* == CONTEXT_RTL within paragraphs
    391                                            starting with strong R char      */
    392     DirProp lastStrongDir=0;            /* for default level & inverse BiDi */
    393     int32_t lastStrongLTR=0;            /* for STREAMING option             */
    394 
    395     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    396         pBiDi->length=0;
    397         lastStrongLTR=0;
    398     }
    399     if(isDefaultLevel) {
    400         paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
    401         paraDir=paraDirDefault;
    402         lastStrongDir=paraDirDefault;
    403         state=LOOKING_FOR_STRONG;
    404     } else {
    405         state=NOT_CONTEXTUAL;
    406         paraDir=0;
    407     }
    408     /* count paragraphs and determine the paragraph level (P2..P3) */
    409     /*
    410      * see comment in ubidi.h:
    411      * the DEFAULT_XXX values are designed so that
    412      * their bit 0 alone yields the intended default
    413      */
    414     for( /* i=0 above */ ; i<length; ) {
    415         /* i is incremented by U16_NEXT */
    416         U16_NEXT(text, i, length, uchar);
    417         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
    418         dirProps[i-1]=dirProp|paraDir;
    419         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
    420             flags|=DIRPROP_FLAG(BN);
    421             dirProps[i-2]=(DirProp)(BN|paraDir);
    422         }
    423         if(state==LOOKING_FOR_STRONG) {
    424             if(dirProp==L) {
    425                 state=FOUND_STRONG_CHAR;
    426                 if(paraDir) {
    427                     paraDir=0;
    428                     for(i1=paraStart; i1<i; i1++) {
    429                         dirProps[i1]&=~CONTEXT_RTL;
    430                     }
    431                 }
    432                 continue;
    433             }
    434             if(dirProp==R || dirProp==AL) {
    435                 state=FOUND_STRONG_CHAR;
    436                 if(paraDir==0) {
    437                     paraDir=CONTEXT_RTL;
    438                     for(i1=paraStart; i1<i; i1++) {
    439                         dirProps[i1]|=CONTEXT_RTL;
    440                     }
    441                 }
    442                 continue;
    443             }
    444         }
    445         if(dirProp==L) {
    446             lastStrongDir=0;
    447             lastStrongLTR=i;            /* i is index to next character */
    448         }
    449         else if(dirProp==R) {
    450             lastStrongDir=CONTEXT_RTL;
    451         }
    452         else if(dirProp==AL) {
    453             lastStrongDir=CONTEXT_RTL;
    454             lastArabicPos=i-1;
    455         }
    456         else if(dirProp==B) {
    457             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    458                 pBiDi->length=i;        /* i is index to next character */
    459             }
    460             if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
    461                 for( ; paraStart<i; paraStart++) {
    462                     dirProps[paraStart]|=CONTEXT_RTL;
    463                 }
    464             }
    465             if(i<length) {              /* B not last char in text */
    466                 if(!((uchar==CR) && (text[i]==LF))) {
    467                     pBiDi->paraCount++;
    468                 }
    469                 if(isDefaultLevel) {
    470                     state=LOOKING_FOR_STRONG;
    471                     paraStart=i;        /* i is index to next character */
    472                     paraDir=paraDirDefault;
    473                     lastStrongDir=paraDirDefault;
    474                 }
    475             }
    476         }
    477         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
    478             controlCount++;
    479         }
    480     }
    481     if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
    482         for(i1=paraStart; i1<length; i1++) {
    483             dirProps[i1]|=CONTEXT_RTL;
    484         }
    485     }
    486     if(isDefaultLevel) {
    487         pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
    488     }
    489     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    490         if((lastStrongLTR>pBiDi->length) &&
    491            (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
    492             pBiDi->length = lastStrongLTR;
    493         }
    494         if(pBiDi->length<pBiDi->originalLength) {
    495             pBiDi->paraCount--;
    496         }
    497     }
    498     /* The following line does nothing new for contextual paraLevel, but is
    499        needed for absolute paraLevel.                               */
    500     flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    501 
    502     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
    503         flags|=DIRPROP_FLAG(L);
    504     }
    505 
    506     pBiDi->controlCount = controlCount;
    507     pBiDi->flags=flags;
    508     pBiDi->lastArabicPos=lastArabicPos;
    509 }
    510 
    511 /* perform (X1)..(X9) ------------------------------------------------------- */
    512 
    513 /* determine if the text is mixed-directional or single-directional */
    514 static UBiDiDirection
    515 directionFromFlags(UBiDi *pBiDi) {
    516     Flags flags=pBiDi->flags;
    517     /* if the text contains AN and neutrals, then some neutrals may become RTL */
    518     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
    519         return UBIDI_LTR;
    520     } else if(!(flags&MASK_LTR)) {
    521         return UBIDI_RTL;
    522     } else {
    523         return UBIDI_MIXED;
    524     }
    525 }
    526 
    527 /*
    528  * Resolve the explicit levels as specified by explicit embedding codes.
    529  * Recalculate the flags to have them reflect the real properties
    530  * after taking the explicit embeddings into account.
    531  *
    532  * The BiDi algorithm is designed to result in the same behavior whether embedding
    533  * levels are externally specified (from "styled text", supposedly the preferred
    534  * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
    535  * That is why (X9) instructs to remove all explicit codes (and BN).
    536  * However, in a real implementation, this removal of these codes and their index
    537  * positions in the plain text is undesirable since it would result in
    538  * reallocated, reindexed text.
    539  * Instead, this implementation leaves the codes in there and just ignores them
    540  * in the subsequent processing.
    541  * In order to get the same reordering behavior, positions with a BN or an
    542  * explicit embedding code just get the same level assigned as the last "real"
    543  * character.
    544  *
    545  * Some implementations, not this one, then overwrite some of these
    546  * directionality properties at "real" same-level-run boundaries by
    547  * L or R codes so that the resolution of weak types can be performed on the
    548  * entire paragraph at once instead of having to parse it once more and
    549  * perform that resolution on same-level-runs.
    550  * This limits the scope of the implicit rules in effectively
    551  * the same way as the run limits.
    552  *
    553  * Instead, this implementation does not modify these codes.
    554  * On one hand, the paragraph has to be scanned for same-level-runs, but
    555  * on the other hand, this saves another loop to reset these codes,
    556  * or saves making and modifying a copy of dirProps[].
    557  *
    558  *
    559  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
    560  *
    561  *
    562  * Handling the stack of explicit levels (Xn):
    563  *
    564  * With the BiDi stack of explicit levels,
    565  * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
    566  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
    567  *
    568  * In order to have a correct push-pop semantics even in the case of overflows,
    569  * there are two overflow counters:
    570  * - countOver60 is incremented with each LRx at level 60
    571  * - from level 60, one RLx increases the level to 61
    572  * - countOver61 is incremented with each LRx and RLx at level 61
    573  *
    574  * Popping levels with PDF must work in the opposite order so that level 61
    575  * is correct at the correct point. Underflows (too many PDFs) must be checked.
    576  *
    577  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
    578  */
    579 static UBiDiDirection
    580 resolveExplicitLevels(UBiDi *pBiDi) {
    581     const DirProp *dirProps=pBiDi->dirProps;
    582     UBiDiLevel *levels=pBiDi->levels;
    583     const UChar *text=pBiDi->text;
    584 
    585     int32_t i=0, length=pBiDi->length;
    586     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
    587     DirProp dirProp;
    588     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
    589 
    590     UBiDiDirection direction;
    591     int32_t paraIndex=0;
    592 
    593     /* determine if the text is mixed-directional or single-directional */
    594     direction=directionFromFlags(pBiDi);
    595 
    596     /* we may not need to resolve any explicit levels, but for multiple
    597        paragraphs we want to loop on all chars to set the para boundaries */
    598     if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
    599         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
    600     } else if((pBiDi->paraCount==1) &&
    601               (!(flags&MASK_EXPLICIT) ||
    602                (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
    603         /* mixed, but all characters are at the same embedding level */
    604         /* or we are in "inverse BiDi" */
    605         /* and we don't have contextual multiple paragraphs with some B char */
    606         /* set all levels to the paragraph level */
    607         for(i=0; i<length; ++i) {
    608             levels[i]=level;
    609         }
    610     } else {
    611         /* continue to perform (Xn) */
    612 
    613         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
    614         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
    615         UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
    616 
    617         UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
    618         uint32_t countOver60=0, countOver61=0;  /* count overflows of explicit levels */
    619 
    620         /* recalculate the flags */
    621         flags=0;
    622 
    623         for(i=0; i<length; ++i) {
    624             dirProp=NO_CONTEXT_RTL(dirProps[i]);
    625             switch(dirProp) {
    626             case LRE:
    627             case LRO:
    628                 /* (X3, X5) */
    629                 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
    630                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
    631                     stack[stackTop]=embeddingLevel;
    632                     ++stackTop;
    633                     embeddingLevel=newLevel;
    634                     if(dirProp==LRO) {
    635                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
    636                     }
    637                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
    638                        since this has already been done for newLevel which is
    639                        the source for embeddingLevel.
    640                      */
    641                 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
    642                     ++countOver61;
    643                 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
    644                     ++countOver60;
    645                 }
    646                 flags|=DIRPROP_FLAG(BN);
    647                 break;
    648             case RLE:
    649             case RLO:
    650                 /* (X2, X4) */
    651                 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
    652                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
    653                     stack[stackTop]=embeddingLevel;
    654                     ++stackTop;
    655                     embeddingLevel=newLevel;
    656                     if(dirProp==RLO) {
    657                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
    658                     }
    659                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
    660                        since this has already been done for newLevel which is
    661                        the source for embeddingLevel.
    662                      */
    663                 } else {
    664                     ++countOver61;
    665                 }
    666                 flags|=DIRPROP_FLAG(BN);
    667                 break;
    668             case PDF:
    669                 /* (X7) */
    670                 /* handle all the overflow cases first */
    671                 if(countOver61>0) {
    672                     --countOver61;
    673                 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
    674                     /* handle LRx overflows from level 60 */
    675                     --countOver60;
    676                 } else if(stackTop>0) {
    677                     /* this is the pop operation; it also pops level 61 while countOver60>0 */
    678                     --stackTop;
    679                     embeddingLevel=stack[stackTop];
    680                 /* } else { (underflow) */
    681                 }
    682                 flags|=DIRPROP_FLAG(BN);
    683                 break;
    684             case B:
    685                 stackTop=0;
    686                 countOver60=countOver61=0;
    687                 level=GET_PARALEVEL(pBiDi, i);
    688                 if((i+1)<length) {
    689                     embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
    690                     if(!((text[i]==CR) && (text[i+1]==LF))) {
    691                         pBiDi->paras[paraIndex++]=i+1;
    692                     }
    693                 }
    694                 flags|=DIRPROP_FLAG(B);
    695                 break;
    696             case BN:
    697                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
    698                 /* they will get their levels set correctly in adjustWSLevels() */
    699                 flags|=DIRPROP_FLAG(BN);
    700                 break;
    701             default:
    702                 /* all other types get the "real" level */
    703                 if(level!=embeddingLevel) {
    704                     level=embeddingLevel;
    705                     if(level&UBIDI_LEVEL_OVERRIDE) {
    706                         flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
    707                     } else {
    708                         flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
    709                     }
    710                 }
    711                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
    712                     flags|=DIRPROP_FLAG(dirProp);
    713                 }
    714                 break;
    715             }
    716 
    717             /*
    718              * We need to set reasonable levels even on BN codes and
    719              * explicit codes because we will later look at same-level runs (X10).
    720              */
    721             levels[i]=level;
    722         }
    723         if(flags&MASK_EMBEDDING) {
    724             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    725         }
    726         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
    727             flags|=DIRPROP_FLAG(L);
    728         }
    729 
    730         /* subsequently, ignore the explicit codes and BN (X9) */
    731 
    732         /* again, determine if the text is mixed-directional or single-directional */
    733         pBiDi->flags=flags;
    734         direction=directionFromFlags(pBiDi);
    735     }
    736 
    737     return direction;
    738 }
    739 
    740 /*
    741  * Use a pre-specified embedding levels array:
    742  *
    743  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
    744  * ignore all explicit codes (X9),
    745  * and check all the preset levels.
    746  *
    747  * Recalculate the flags to have them reflect the real properties
    748  * after taking the explicit embeddings into account.
    749  */
    750 static UBiDiDirection
    751 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
    752     const DirProp *dirProps=pBiDi->dirProps;
    753     DirProp dirProp;
    754     UBiDiLevel *levels=pBiDi->levels;
    755     const UChar *text=pBiDi->text;
    756 
    757     int32_t i, length=pBiDi->length;
    758     Flags flags=0;  /* collect all directionalities in the text */
    759     UBiDiLevel level;
    760     uint32_t paraIndex=0;
    761 
    762     for(i=0; i<length; ++i) {
    763         level=levels[i];
    764         dirProp=NO_CONTEXT_RTL(dirProps[i]);
    765         if(level&UBIDI_LEVEL_OVERRIDE) {
    766             /* keep the override flag in levels[i] but adjust the flags */
    767             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
    768             flags|=DIRPROP_FLAG_O(level);
    769         } else {
    770             /* set the flags */
    771             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
    772         }
    773         if((level<GET_PARALEVEL(pBiDi, i) &&
    774             !((0==level)&&(dirProp==B))) ||
    775            (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
    776             /* level out of bounds */
    777             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    778             return UBIDI_LTR;
    779         }
    780         if((dirProp==B) && ((i+1)<length)) {
    781             if(!((text[i]==CR) && (text[i+1]==LF))) {
    782                 pBiDi->paras[paraIndex++]=i+1;
    783             }
    784         }
    785     }
    786     if(flags&MASK_EMBEDDING) {
    787         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    788     }
    789 
    790     /* determine if the text is mixed-directional or single-directional */
    791     pBiDi->flags=flags;
    792     return directionFromFlags(pBiDi);
    793 }
    794 
    795 /******************************************************************
    796  The Properties state machine table
    797 *******************************************************************
    798 
    799  All table cells are 8 bits:
    800       bits 0..4:  next state
    801       bits 5..7:  action to perform (if > 0)
    802 
    803  Cells may be of format "n" where n represents the next state
    804  (except for the rightmost column).
    805  Cells may also be of format "s(x,y)" where x represents an action
    806  to perform and y represents the next state.
    807 
    808 *******************************************************************
    809  Definitions and type for properties state table
    810 *******************************************************************
    811 */
    812 #define IMPTABPROPS_COLUMNS 14
    813 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
    814 #define GET_STATEPROPS(cell) ((cell)&0x1f)
    815 #define GET_ACTIONPROPS(cell) ((cell)>>5)
    816 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
    817 
    818 static const uint8_t groupProp[] =          /* dirProp regrouped */
    819 {
    820 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
    821     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
    822 };
    823 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
    824 
    825 /******************************************************************
    826 
    827       PROPERTIES  STATE  TABLE
    828 
    829  In table impTabProps,
    830       - the ON column regroups ON and WS
    831       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
    832       - the Res column is the reduced property assigned to a run
    833 
    834  Action 1: process current run1, init new run1
    835         2: init new run2
    836         3: process run1, process run2, init new run1
    837         4: process run1, set run1=run2, init new run2
    838 
    839  Notes:
    840   1) This table is used in resolveImplicitLevels().
    841   2) This table triggers actions when there is a change in the Bidi
    842      property of incoming characters (action 1).
    843   3) Most such property sequences are processed immediately (in
    844      fact, passed to processPropertySeq().
    845   4) However, numbers are assembled as one sequence. This means
    846      that undefined situations (like CS following digits, until
    847      it is known if the next char will be a digit) are held until
    848      following chars define them.
    849      Example: digits followed by CS, then comes another CS or ON;
    850               the digits will be processed, then the CS assigned
    851               as the start of an ON sequence (action 3).
    852   5) There are cases where more than one sequence must be
    853      processed, for instance digits followed by CS followed by L:
    854      the digits must be processed as one sequence, and the CS
    855      must be processed as an ON sequence, all this before starting
    856      assembling chars for the opening L sequence.
    857 
    858 
    859 */
    860 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
    861 {
    862 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,  Res */
    863 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,  DirProp_ON },
    864 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),   DirProp_L },
    865 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),   DirProp_R },
    866 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,   DirProp_R },
    867 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),  DirProp_EN },
    868 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),  DirProp_AN },
    869 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),  DirProp_AN },
    870 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),  DirProp_ON },
    871 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),  DirProp_ON },
    872 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),  DirProp_ON },
    873 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),  DirProp_EN },
    874 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),  DirProp_EN },
    875 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),  DirProp_AN },
    876 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),  DirProp_AN },
    877 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),  DirProp_ON },
    878 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),   DirProp_S },
    879 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),   DirProp_S },
    880 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),   DirProp_B }
    881 };
    882 
    883 /*  we must undef macro s because the levels table have a different
    884  *  structure (4 bits for action and 4 bits for next state.
    885  */
    886 #undef s
    887 
    888 /******************************************************************
    889  The levels state machine tables
    890 *******************************************************************
    891 
    892  All table cells are 8 bits:
    893       bits 0..3:  next state
    894       bits 4..7:  action to perform (if > 0)
    895 
    896  Cells may be of format "n" where n represents the next state
    897  (except for the rightmost column).
    898  Cells may also be of format "s(x,y)" where x represents an action
    899  to perform and y represents the next state.
    900 
    901  This format limits each table to 16 states each and to 15 actions.
    902 
    903 *******************************************************************
    904  Definitions and type for levels state tables
    905 *******************************************************************
    906 */
    907 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
    908 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
    909 #define GET_STATE(cell) ((cell)&0x0f)
    910 #define GET_ACTION(cell) ((cell)>>4)
    911 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
    912 
    913 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
    914 typedef uint8_t ImpAct[];
    915 
    916 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
    917  * instead of having a pair of ImpTab and a pair of ImpAct.
    918  */
    919 typedef struct ImpTabPair {
    920     const void * pImpTab[2];
    921     const void * pImpAct[2];
    922 } ImpTabPair;
    923 
    924 /******************************************************************
    925 
    926       LEVELS  STATE  TABLES
    927 
    928  In all levels state tables,
    929       - state 0 is the initial state
    930       - the Res column is the increment to add to the text level
    931         for this property sequence.
    932 
    933  The impAct arrays for each table of a pair map the local action
    934  numbers of the table to the total list of actions. For instance,
    935  action 2 in a given table corresponds to the action number which
    936  appears in entry [2] of the impAct array for that table.
    937  The first entry of all impAct arrays must be 0.
    938 
    939  Action 1: init conditional sequence
    940         2: prepend conditional sequence to current sequence
    941         3: set ON sequence to new level - 1
    942         4: init EN/AN/ON sequence
    943         5: fix EN/AN/ON sequence followed by R
    944         6: set previous level sequence to level 2
    945 
    946  Notes:
    947   1) These tables are used in processPropertySeq(). The input
    948      is property sequences as determined by resolveImplicitLevels.
    949   2) Most such property sequences are processed immediately
    950      (levels are assigned).
    951   3) However, some sequences cannot be assigned a final level till
    952      one or more following sequences are received. For instance,
    953      ON following an R sequence within an even-level paragraph.
    954      If the following sequence is R, the ON sequence will be
    955      assigned basic run level+1, and so will the R sequence.
    956   4) S is generally handled like ON, since its level will be fixed
    957      to paragraph level in adjustWSLevels().
    958 
    959 */
    960 
    961 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
    962 /*  In this table, conditional sequences receive the higher possible level
    963     until proven otherwise.
    964 */
    965 {
    966 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
    967 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
    968 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
    969 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
    970 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
    971 /* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
    972 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
    973 };
    974 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
    975 /*  In this table, conditional sequences receive the lower possible level
    976     until proven otherwise.
    977 */
    978 {
    979 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
    980 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
    981 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
    982 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
    983 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
    984 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
    985 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
    986 };
    987 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
    988 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
    989                                            &impTabR_DEFAULT},
    990                                           {&impAct0, &impAct0}};
    991 
    992 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
    993 /*  In this table, conditional sequences receive the higher possible level
    994     until proven otherwise.
    995 */
    996 {
    997 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
    998 /* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
    999 /* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
   1000 /* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
   1001 /* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
   1002 /* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
   1003   };
   1004 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
   1005                                                    &impTabR_DEFAULT},
   1006                                                   {&impAct0, &impAct0}};
   1007 
   1008 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
   1009 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1010     until proven that there is L or sor/eor on both sides. AN is handled like EN.
   1011 */
   1012 {
   1013 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1014 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
   1015 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
   1016 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
   1017 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
   1018 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
   1019 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
   1020 };
   1021 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
   1022 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1023     until proven that there is L on both sides. AN is handled like EN.
   1024 */
   1025 {
   1026 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1027 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1028 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1029 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
   1030 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
   1031 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
   1032 };
   1033 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
   1034                         {&impTabL_GROUP_NUMBERS_WITH_R,
   1035                          &impTabR_GROUP_NUMBERS_WITH_R},
   1036                         {&impAct0, &impAct0}};
   1037 
   1038 
   1039 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
   1040 /*  This table is identical to the Default LTR table except that EN and AN are
   1041     handled like L.
   1042 */
   1043 {
   1044 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1045 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
   1046 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
   1047 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
   1048 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
   1049 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
   1050 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
   1051 };
   1052 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
   1053 /*  This table is identical to the Default RTL table except that EN and AN are
   1054     handled like L.
   1055 */
   1056 {
   1057 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1058 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1059 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
   1060 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1061 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
   1062 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
   1063 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
   1064 };
   1065 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
   1066                         {&impTabL_INVERSE_NUMBERS_AS_L,
   1067                          &impTabR_INVERSE_NUMBERS_AS_L},
   1068                         {&impAct0, &impAct0}};
   1069 
   1070 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
   1071 /*  In this table, conditional sequences receive the lower possible level
   1072     until proven otherwise.
   1073 */
   1074 {
   1075 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1076 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1077 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
   1078 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1079 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
   1080 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
   1081 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
   1082 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
   1083 };
   1084 static const ImpAct impAct1 = {0,1,11,12};
   1085 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
   1086  */
   1087 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
   1088                         {&impTabL_DEFAULT,
   1089                          &impTabR_INVERSE_LIKE_DIRECT},
   1090                         {&impAct0, &impAct1}};
   1091 
   1092 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1093 /*  The case handled in this table is (visually):  R EN L
   1094 */
   1095 {
   1096 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1097 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1098 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
   1099 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
   1100 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
   1101 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
   1102 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
   1103 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
   1104 };
   1105 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1106 /*  The cases handled in this table are (visually):  R EN L
   1107                                                      R L AN L
   1108 */
   1109 {
   1110 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1111 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1112 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
   1113 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
   1114 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
   1115 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
   1116 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
   1117 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
   1118 };
   1119 static const ImpAct impAct2 = {0,1,7,8,9,10};
   1120 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
   1121                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
   1122                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1123                         {&impAct0, &impAct2}};
   1124 
   1125 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
   1126                         {&impTabL_NUMBERS_SPECIAL,
   1127                          &impTabR_INVERSE_LIKE_DIRECT},
   1128                         {&impAct0, &impAct1}};
   1129 
   1130 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
   1131 /*  The case handled in this table is (visually):  R EN L
   1132 */
   1133 {
   1134 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1135 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1136 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
   1137 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
   1138 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
   1139 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
   1140 };
   1141 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
   1142                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
   1143                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1144                         {&impAct0, &impAct2}};
   1145 
   1146 #undef s
   1147 
   1148 typedef struct {
   1149     const ImpTab * pImpTab;             /* level table pointer          */
   1150     const ImpAct * pImpAct;             /* action map array             */
   1151     int32_t startON;                    /* start of ON sequence         */
   1152     int32_t startL2EN;                  /* start of level 2 sequence    */
   1153     int32_t lastStrongRTL;              /* index of last found R or AL  */
   1154     int32_t state;                      /* current state                */
   1155     UBiDiLevel runLevel;                /* run level before implicit solving */
   1156 } LevState;
   1157 
   1158 /*------------------------------------------------------------------------*/
   1159 
   1160 static void
   1161 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
   1162   /* param pos:     position where to insert
   1163      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
   1164   */
   1165 {
   1166 #define FIRSTALLOC  10
   1167     Point point;
   1168     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
   1169 
   1170     if (pInsertPoints->capacity == 0)
   1171     {
   1172         pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
   1173         if (pInsertPoints->points == NULL)
   1174         {
   1175             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1176             return;
   1177         }
   1178         pInsertPoints->capacity=FIRSTALLOC;
   1179     }
   1180     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
   1181     {
   1182         void * savePoints=pInsertPoints->points;
   1183         pInsertPoints->points=uprv_realloc(pInsertPoints->points,
   1184                                            pInsertPoints->capacity*2*sizeof(Point));
   1185         if (pInsertPoints->points == NULL)
   1186         {
   1187             pInsertPoints->points=savePoints;
   1188             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1189             return;
   1190         }
   1191         else  pInsertPoints->capacity*=2;
   1192     }
   1193     point.pos=pos;
   1194     point.flag=flag;
   1195     pInsertPoints->points[pInsertPoints->size]=point;
   1196     pInsertPoints->size++;
   1197 #undef FIRSTALLOC
   1198 }
   1199 
   1200 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
   1201 
   1202 /*
   1203  * This implementation of the (Wn) rules applies all rules in one pass.
   1204  * In order to do so, it needs a look-ahead of typically 1 character
   1205  * (except for W5: sequences of ET) and keeps track of changes
   1206  * in a rule Wp that affect a later Wq (p<q).
   1207  *
   1208  * The (Nn) and (In) rules are also performed in that same single loop,
   1209  * but effectively one iteration behind for white space.
   1210  *
   1211  * Since all implicit rules are performed in one step, it is not necessary
   1212  * to actually store the intermediate directional properties in dirProps[].
   1213  */
   1214 
   1215 static void
   1216 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
   1217                    int32_t start, int32_t limit) {
   1218     uint8_t cell, oldStateSeq, actionSeq;
   1219     const ImpTab * pImpTab=pLevState->pImpTab;
   1220     const ImpAct * pImpAct=pLevState->pImpAct;
   1221     UBiDiLevel * levels=pBiDi->levels;
   1222     UBiDiLevel level, addLevel;
   1223     InsertPoints * pInsertPoints;
   1224     int32_t start0, k;
   1225 
   1226     start0=start;                           /* save original start position */
   1227     oldStateSeq=(uint8_t)pLevState->state;
   1228     cell=(*pImpTab)[oldStateSeq][_prop];
   1229     pLevState->state=GET_STATE(cell);       /* isolate the new state */
   1230     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
   1231     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
   1232 
   1233     if(actionSeq) {
   1234         switch(actionSeq) {
   1235         case 1:                         /* init ON seq */
   1236             pLevState->startON=start0;
   1237             break;
   1238 
   1239         case 2:                         /* prepend ON seq to current seq */
   1240             start=pLevState->startON;
   1241             break;
   1242 
   1243         case 3:                         /* L or S after possible relevant EN/AN */
   1244             /* check if we had EN after R/AL */
   1245             if (pLevState->startL2EN >= 0) {
   1246                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1247             }
   1248             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
   1249             /* check if we had any relevant EN/AN after R/AL */
   1250             pInsertPoints=&(pBiDi->insertPoints);
   1251             if ((pInsertPoints->capacity == 0) ||
   1252                 (pInsertPoints->size <= pInsertPoints->confirmed))
   1253             {
   1254                 /* nothing, just clean up */
   1255                 pLevState->lastStrongRTL=-1;
   1256                 /* check if we have a pending conditional segment */
   1257                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
   1258                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
   1259                     start=pLevState->startON;   /* reset to basic run level */
   1260                 }
   1261                 if (_prop == DirProp_S)                /* add LRM before S */
   1262                 {
   1263                     addPoint(pBiDi, start0, LRM_BEFORE);
   1264                     pInsertPoints->confirmed=pInsertPoints->size;
   1265                 }
   1266                 break;
   1267             }
   1268             /* reset previous RTL cont to level for LTR text */
   1269             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
   1270             {
   1271                 /* reset odd level, leave runLevel+2 as is */
   1272                 levels[k]=(levels[k] - 2) & ~1;
   1273             }
   1274             /* mark insert points as confirmed */
   1275             pInsertPoints->confirmed=pInsertPoints->size;
   1276             pLevState->lastStrongRTL=-1;
   1277             if (_prop == DirProp_S)            /* add LRM before S */
   1278             {
   1279                 addPoint(pBiDi, start0, LRM_BEFORE);
   1280                 pInsertPoints->confirmed=pInsertPoints->size;
   1281             }
   1282             break;
   1283 
   1284         case 4:                         /* R/AL after possible relevant EN/AN */
   1285             /* just clean up */
   1286             pInsertPoints=&(pBiDi->insertPoints);
   1287             if (pInsertPoints->capacity > 0)
   1288                 /* remove all non confirmed insert points */
   1289                 pInsertPoints->size=pInsertPoints->confirmed;
   1290             pLevState->startON=-1;
   1291             pLevState->startL2EN=-1;
   1292             pLevState->lastStrongRTL=limit - 1;
   1293             break;
   1294 
   1295         case 5:                         /* EN/AN after R/AL + possible cont */
   1296             /* check for real AN */
   1297             if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
   1298                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
   1299             {
   1300                 /* real AN */
   1301                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
   1302                 {
   1303                     /* just note the righmost digit as a strong RTL */
   1304                     pLevState->lastStrongRTL=limit - 1;
   1305                     break;
   1306                 }
   1307                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
   1308                 {
   1309                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1310                     pLevState->startL2EN=-2;
   1311                 }
   1312                 /* note AN */
   1313                 addPoint(pBiDi, start0, LRM_BEFORE);
   1314                 break;
   1315             }
   1316             /* if first EN/AN after R/AL */
   1317             if (pLevState->startL2EN == -1) {
   1318                 pLevState->startL2EN=start0;
   1319             }
   1320             break;
   1321 
   1322         case 6:                         /* note location of latest R/AL */
   1323             pLevState->lastStrongRTL=limit - 1;
   1324             pLevState->startON=-1;
   1325             break;
   1326 
   1327         case 7:                         /* L after R+ON/EN/AN */
   1328             /* include possible adjacent number on the left */
   1329             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
   1330             if(k>=0) {
   1331                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
   1332                 pInsertPoints=&(pBiDi->insertPoints);
   1333                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
   1334             }
   1335             pLevState->startON=start0;
   1336             break;
   1337 
   1338         case 8:                         /* AN after L */
   1339             /* AN numbers between L text on both sides may be trouble. */
   1340             /* tentatively bracket with LRMs; will be confirmed if followed by L */
   1341             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
   1342             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
   1343             break;
   1344 
   1345         case 9:                         /* R after L+ON/EN/AN */
   1346             /* false alert, infirm LRMs around previous AN */
   1347             pInsertPoints=&(pBiDi->insertPoints);
   1348             pInsertPoints->size=pInsertPoints->confirmed;
   1349             if (_prop == DirProp_S)            /* add RLM before S */
   1350             {
   1351                 addPoint(pBiDi, start0, RLM_BEFORE);
   1352                 pInsertPoints->confirmed=pInsertPoints->size;
   1353             }
   1354             break;
   1355 
   1356         case 10:                        /* L after L+ON/AN */
   1357             level=pLevState->runLevel + addLevel;
   1358             for(k=pLevState->startON; k<start0; k++) {
   1359                 if (levels[k]<level)
   1360                     levels[k]=level;
   1361             }
   1362             pInsertPoints=&(pBiDi->insertPoints);
   1363             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
   1364             pLevState->startON=start0;
   1365             break;
   1366 
   1367         case 11:                        /* L after L+ON+EN/AN/ON */
   1368             level=pLevState->runLevel;
   1369             for(k=start0-1; k>=pLevState->startON; k--) {
   1370                 if(levels[k]==level+3) {
   1371                     while(levels[k]==level+3) {
   1372                         levels[k--]-=2;
   1373                     }
   1374                     while(levels[k]==level) {
   1375                         k--;
   1376                     }
   1377                 }
   1378                 if(levels[k]==level+2) {
   1379                     levels[k]=level;
   1380                     continue;
   1381                 }
   1382                 levels[k]=level+1;
   1383             }
   1384             break;
   1385 
   1386         case 12:                        /* R after L+ON+EN/AN/ON */
   1387             level=pLevState->runLevel+1;
   1388             for(k=start0-1; k>=pLevState->startON; k--) {
   1389                 if(levels[k]>level) {
   1390                     levels[k]-=2;
   1391                 }
   1392             }
   1393             break;
   1394 
   1395         default:                        /* we should never get here */
   1396             U_ASSERT(FALSE);
   1397             break;
   1398         }
   1399     }
   1400     if((addLevel) || (start < start0)) {
   1401         level=pLevState->runLevel + addLevel;
   1402         for(k=start; k<limit; k++) {
   1403             levels[k]=level;
   1404         }
   1405     }
   1406 }
   1407 
   1408 static void
   1409 resolveImplicitLevels(UBiDi *pBiDi,
   1410                       int32_t start, int32_t limit,
   1411                       DirProp sor, DirProp eor) {
   1412     const DirProp *dirProps=pBiDi->dirProps;
   1413 
   1414     LevState levState;
   1415     int32_t i, start1, start2;
   1416     uint8_t oldStateImp, stateImp, actionImp;
   1417     uint8_t gprop, resProp, cell;
   1418     UBool inverseRTL;
   1419     DirProp nextStrongProp=R;
   1420     int32_t nextStrongPos=-1;
   1421 
   1422     levState.startON = -1;  /* silence gcc flow analysis */
   1423 
   1424     /* check for RTL inverse BiDi mode */
   1425     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
   1426      * loop on the text characters from end to start.
   1427      * This would need a different properties state table (at least different
   1428      * actions) and different levels state tables (maybe very similar to the
   1429      * LTR corresponding ones.
   1430      */
   1431     inverseRTL=(UBool)
   1432         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
   1433          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
   1434           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
   1435     /* initialize for levels state table */
   1436     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   1437     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   1438     levState.state=0;
   1439     levState.runLevel=pBiDi->levels[start];
   1440     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
   1441     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
   1442     processPropertySeq(pBiDi, &levState, sor, start, start);
   1443     /* initialize for property state table */
   1444     if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
   1445         stateImp = 1 + sor;
   1446     } else {
   1447         stateImp=0;
   1448     }
   1449     start1=start;
   1450     start2=start;
   1451 
   1452     for(i=start; i<=limit; i++) {
   1453         if(i>=limit) {
   1454             gprop=eor;
   1455         } else {
   1456             DirProp prop, prop1;
   1457             prop=NO_CONTEXT_RTL(dirProps[i]);
   1458             if(inverseRTL) {
   1459                 if(prop==AL) {
   1460                     /* AL before EN does not make it AN */
   1461                     prop=R;
   1462                 } else if(prop==EN) {
   1463                     if(nextStrongPos<=i) {
   1464                         /* look for next strong char (L/R/AL) */
   1465                         int32_t j;
   1466                         nextStrongProp=R;   /* set default */
   1467                         nextStrongPos=limit;
   1468                         for(j=i+1; j<limit; j++) {
   1469                             prop1=NO_CONTEXT_RTL(dirProps[j]);
   1470                             if(prop1==L || prop1==R || prop1==AL) {
   1471                                 nextStrongProp=prop1;
   1472                                 nextStrongPos=j;
   1473                                 break;
   1474                             }
   1475                         }
   1476                     }
   1477                     if(nextStrongProp==AL) {
   1478                         prop=AN;
   1479                     }
   1480                 }
   1481             }
   1482             gprop=groupProp[prop];
   1483         }
   1484         oldStateImp=stateImp;
   1485         cell=impTabProps[oldStateImp][gprop];
   1486         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
   1487         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
   1488         if((i==limit) && (actionImp==0)) {
   1489             /* there is an unprocessed sequence if its property == eor   */
   1490             actionImp=1;                    /* process the last sequence */
   1491         }
   1492         if(actionImp) {
   1493             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
   1494             switch(actionImp) {
   1495             case 1:             /* process current seq1, init new seq1 */
   1496                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
   1497                 start1=i;
   1498                 break;
   1499             case 2:             /* init new seq2 */
   1500                 start2=i;
   1501                 break;
   1502             case 3:             /* process seq1, process seq2, init new seq1 */
   1503                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   1504                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
   1505                 start1=i;
   1506                 break;
   1507             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
   1508                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   1509                 start1=start2;
   1510                 start2=i;
   1511                 break;
   1512             default:            /* we should never get here */
   1513                 U_ASSERT(FALSE);
   1514                 break;
   1515             }
   1516         }
   1517     }
   1518     /* flush possible pending sequence, e.g. ON */
   1519     processPropertySeq(pBiDi, &levState, eor, limit, limit);
   1520 }
   1521 
   1522 /* perform (L1) and (X9) ---------------------------------------------------- */
   1523 
   1524 /*
   1525  * Reset the embedding levels for some non-graphic characters (L1).
   1526  * This function also sets appropriate levels for BN, and
   1527  * explicit embedding types that are supposed to have been removed
   1528  * from the paragraph in (X9).
   1529  */
   1530 static void
   1531 adjustWSLevels(UBiDi *pBiDi) {
   1532     const DirProp *dirProps=pBiDi->dirProps;
   1533     UBiDiLevel *levels=pBiDi->levels;
   1534     int32_t i;
   1535 
   1536     if(pBiDi->flags&MASK_WS) {
   1537         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
   1538         Flags flag;
   1539 
   1540         i=pBiDi->trailingWSStart;
   1541         while(i>0) {
   1542             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
   1543             while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
   1544                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   1545                     levels[i]=0;
   1546                 } else {
   1547                     levels[i]=GET_PARALEVEL(pBiDi, i);
   1548                 }
   1549             }
   1550 
   1551             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
   1552             /* here, i+1 is guaranteed to be <length */
   1553             while(i>0) {
   1554                 flag=DIRPROP_FLAG_NC(dirProps[--i]);
   1555                 if(flag&MASK_BN_EXPLICIT) {
   1556                     levels[i]=levels[i+1];
   1557                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   1558                     levels[i]=0;
   1559                     break;
   1560                 } else if(flag&MASK_B_S) {
   1561                     levels[i]=GET_PARALEVEL(pBiDi, i);
   1562                     break;
   1563                 }
   1564             }
   1565         }
   1566     }
   1567 }
   1568 
   1569 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
   1570 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
   1571 static void
   1572 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
   1573                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
   1574     void *runsOnlyMemory;
   1575     int32_t *visualMap;
   1576     UChar *visualText;
   1577     int32_t saveLength, saveTrailingWSStart;
   1578     const UBiDiLevel *levels;
   1579     UBiDiLevel *saveLevels;
   1580     UBiDiDirection saveDirection;
   1581     UBool saveMayAllocateText;
   1582     Run *runs;
   1583     int32_t visualLength, i, j, visualStart, logicalStart,
   1584             runCount, runLength, addedRuns, insertRemove,
   1585             start, limit, step, indexOddBit, logicalPos,
   1586             index0, index1;
   1587     uint32_t saveOptions;
   1588 
   1589     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
   1590     if(length==0) {
   1591         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
   1592         goto cleanup3;
   1593     }
   1594     /* obtain memory for mapping table and visual text */
   1595     runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
   1596     if(runsOnlyMemory==NULL) {
   1597         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1598         goto cleanup3;
   1599     }
   1600     visualMap=runsOnlyMemory;
   1601     visualText=(UChar *)&visualMap[length];
   1602     saveLevels=(UBiDiLevel *)&visualText[length];
   1603     saveOptions=pBiDi->reorderingOptions;
   1604     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
   1605         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
   1606         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
   1607     }
   1608     paraLevel&=1;                       /* accept only 0 or 1 */
   1609     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
   1610     if(U_FAILURE(*pErrorCode)) {
   1611         goto cleanup3;
   1612     }
   1613     /* we cannot access directly pBiDi->levels since it is not yet set if
   1614      * direction is not MIXED
   1615      */
   1616     levels=ubidi_getLevels(pBiDi, pErrorCode);
   1617     uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
   1618     saveTrailingWSStart=pBiDi->trailingWSStart;
   1619     saveLength=pBiDi->length;
   1620     saveDirection=pBiDi->direction;
   1621 
   1622     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
   1623      * the visual map and the dirProps array to drive the second call
   1624      * to ubidi_setPara (but must make provision for possible removal of
   1625      * BiDi controls.  Alternatively, only use the dirProps array via
   1626      * customized classifier callback.
   1627      */
   1628     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
   1629                                       UBIDI_DO_MIRRORING, pErrorCode);
   1630     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
   1631     if(U_FAILURE(*pErrorCode)) {
   1632         goto cleanup2;
   1633     }
   1634     pBiDi->reorderingOptions=saveOptions;
   1635 
   1636     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
   1637     paraLevel^=1;
   1638     /* Because what we did with reorderingOptions, visualText may be shorter
   1639      * than the original text. But we don't want the levels memory to be
   1640      * reallocated shorter than the original length, since we need to restore
   1641      * the levels as after the first call to ubidi_setpara() before returning.
   1642      * We will force mayAllocateText to FALSE before the second call to
   1643      * ubidi_setpara(), and will restore it afterwards.
   1644      */
   1645     saveMayAllocateText=pBiDi->mayAllocateText;
   1646     pBiDi->mayAllocateText=FALSE;
   1647     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
   1648     pBiDi->mayAllocateText=saveMayAllocateText;
   1649     ubidi_getRuns(pBiDi, pErrorCode);
   1650     if(U_FAILURE(*pErrorCode)) {
   1651         goto cleanup1;
   1652     }
   1653     /* check if some runs must be split, count how many splits */
   1654     addedRuns=0;
   1655     runCount=pBiDi->runCount;
   1656     runs=pBiDi->runs;
   1657     visualStart=0;
   1658     for(i=0; i<runCount; i++, visualStart+=runLength) {
   1659         runLength=runs[i].visualLimit-visualStart;
   1660         if(runLength<2) {
   1661             continue;
   1662         }
   1663         logicalStart=GET_INDEX(runs[i].logicalStart);
   1664         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
   1665             index0=visualMap[j];
   1666             index1=visualMap[j-1];
   1667             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   1668                 addedRuns++;
   1669             }
   1670         }
   1671     }
   1672     if(addedRuns) {
   1673         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
   1674             if(runCount==1) {
   1675                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
   1676                 pBiDi->runsMemory[0]=runs[0];
   1677             }
   1678             runs=pBiDi->runs=pBiDi->runsMemory;
   1679             pBiDi->runCount+=addedRuns;
   1680         } else {
   1681             goto cleanup1;
   1682         }
   1683     }
   1684     /* split runs which are not consecutive in source text */
   1685     for(i=runCount-1; i>=0; i--) {
   1686         runLength= i==0 ? runs[0].visualLimit :
   1687                           runs[i].visualLimit-runs[i-1].visualLimit;
   1688         logicalStart=runs[i].logicalStart;
   1689         indexOddBit=GET_ODD_BIT(logicalStart);
   1690         logicalStart=GET_INDEX(logicalStart);
   1691         if(runLength<2) {
   1692             if(addedRuns) {
   1693                 runs[i+addedRuns]=runs[i];
   1694             }
   1695             logicalPos=visualMap[logicalStart];
   1696             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   1697                                             saveLevels[logicalPos]^indexOddBit);
   1698             continue;
   1699         }
   1700         if(indexOddBit) {
   1701             start=logicalStart;
   1702             limit=logicalStart+runLength-1;
   1703             step=1;
   1704         } else {
   1705             start=logicalStart+runLength-1;
   1706             limit=logicalStart;
   1707             step=-1;
   1708         }
   1709         for(j=start; j!=limit; j+=step) {
   1710             index0=visualMap[j];
   1711             index1=visualMap[j+step];
   1712             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   1713                 logicalPos=BIDI_MIN(visualMap[start], index0);
   1714                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   1715                                             saveLevels[logicalPos]^indexOddBit);
   1716                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
   1717                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
   1718                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
   1719                 runs[i+addedRuns].insertRemove=insertRemove;
   1720                 runs[i].insertRemove&=~insertRemove;
   1721                 start=j+step;
   1722                 addedRuns--;
   1723             }
   1724         }
   1725         if(addedRuns) {
   1726             runs[i+addedRuns]=runs[i];
   1727         }
   1728         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
   1729         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   1730                                             saveLevels[logicalPos]^indexOddBit);
   1731     }
   1732 
   1733   cleanup1:
   1734     /* restore initial paraLevel */
   1735     pBiDi->paraLevel^=1;
   1736   cleanup2:
   1737     /* restore real text */
   1738     pBiDi->text=text;
   1739     pBiDi->length=saveLength;
   1740     pBiDi->originalLength=length;
   1741     pBiDi->direction=saveDirection;
   1742     /* the saved levels should never excess levelsSize, but we check anyway */
   1743     if(saveLength>pBiDi->levelsSize) {
   1744         saveLength=pBiDi->levelsSize;
   1745     }
   1746     uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
   1747     pBiDi->trailingWSStart=saveTrailingWSStart;
   1748     /* free memory for mapping table and visual text */
   1749     uprv_free(runsOnlyMemory);
   1750     if(pBiDi->runCount>1) {
   1751         pBiDi->direction=UBIDI_MIXED;
   1752     }
   1753   cleanup3:
   1754     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
   1755 }
   1756 
   1757 /* ubidi_setPara ------------------------------------------------------------ */
   1758 
   1759 U_CAPI void U_EXPORT2
   1760 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
   1761               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
   1762               UErrorCode *pErrorCode) {
   1763     UBiDiDirection direction;
   1764 
   1765     /* check the argument values */
   1766     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   1767     if(pBiDi==NULL || text==NULL || length<-1 ||
   1768        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
   1769         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1770         return;
   1771     }
   1772 
   1773     if(length==-1) {
   1774         length=u_strlen(text);
   1775     }
   1776 
   1777     /* special treatment for RUNS_ONLY mode */
   1778     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
   1779         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
   1780         return;
   1781     }
   1782 
   1783     /* initialize the UBiDi structure */
   1784     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
   1785     pBiDi->text=text;
   1786     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
   1787     pBiDi->paraLevel=paraLevel;
   1788     pBiDi->direction=UBIDI_LTR;
   1789     pBiDi->paraCount=1;
   1790 
   1791     pBiDi->dirProps=NULL;
   1792     pBiDi->levels=NULL;
   1793     pBiDi->runs=NULL;
   1794     pBiDi->insertPoints.size=0;         /* clean up from last call */
   1795     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
   1796 
   1797     /*
   1798      * Save the original paraLevel if contextual; otherwise, set to 0.
   1799      */
   1800     if(IS_DEFAULT_LEVEL(paraLevel)) {
   1801         pBiDi->defaultParaLevel=paraLevel;
   1802     } else {
   1803         pBiDi->defaultParaLevel=0;
   1804     }
   1805 
   1806     if(length==0) {
   1807         /*
   1808          * For an empty paragraph, create a UBiDi object with the paraLevel and
   1809          * the flags and the direction set but without allocating zero-length arrays.
   1810          * There is nothing more to do.
   1811          */
   1812         if(IS_DEFAULT_LEVEL(paraLevel)) {
   1813             pBiDi->paraLevel&=1;
   1814             pBiDi->defaultParaLevel=0;
   1815         }
   1816         if(paraLevel&1) {
   1817             pBiDi->flags=DIRPROP_FLAG(R);
   1818             pBiDi->direction=UBIDI_RTL;
   1819         } else {
   1820             pBiDi->flags=DIRPROP_FLAG(L);
   1821             pBiDi->direction=UBIDI_LTR;
   1822         }
   1823 
   1824         pBiDi->runCount=0;
   1825         pBiDi->paraCount=0;
   1826         pBiDi->pParaBiDi=pBiDi;         /* mark successful setPara */
   1827         return;
   1828     }
   1829 
   1830     pBiDi->runCount=-1;
   1831 
   1832     /*
   1833      * Get the directional properties,
   1834      * the flags bit-set, and
   1835      * determine the paragraph level if necessary.
   1836      */
   1837     if(getDirPropsMemory(pBiDi, length)) {
   1838         pBiDi->dirProps=pBiDi->dirPropsMemory;
   1839         getDirProps(pBiDi);
   1840     } else {
   1841         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1842         return;
   1843     }
   1844     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
   1845     length= pBiDi->length;
   1846     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
   1847     /* allocate paras memory */
   1848     if(pBiDi->paraCount>1) {
   1849         if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
   1850             pBiDi->paras=pBiDi->parasMemory;
   1851             pBiDi->paras[pBiDi->paraCount-1]=length;
   1852         } else {
   1853             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1854             return;
   1855         }
   1856     } else {
   1857         /* initialize paras for single paragraph */
   1858         pBiDi->paras=pBiDi->simpleParas;
   1859         pBiDi->simpleParas[0]=length;
   1860     }
   1861 
   1862     /* are explicit levels specified? */
   1863     if(embeddingLevels==NULL) {
   1864         /* no: determine explicit levels according to the (Xn) rules */\
   1865         if(getLevelsMemory(pBiDi, length)) {
   1866             pBiDi->levels=pBiDi->levelsMemory;
   1867             direction=resolveExplicitLevels(pBiDi);
   1868         } else {
   1869             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1870             return;
   1871         }
   1872     } else {
   1873         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
   1874         pBiDi->levels=embeddingLevels;
   1875         direction=checkExplicitLevels(pBiDi, pErrorCode);
   1876         if(U_FAILURE(*pErrorCode)) {
   1877             return;
   1878         }
   1879     }
   1880 
   1881     /*
   1882      * The steps after (X9) in the UBiDi algorithm are performed only if
   1883      * the paragraph text has mixed directionality!
   1884      */
   1885     pBiDi->direction=direction;
   1886     switch(direction) {
   1887     case UBIDI_LTR:
   1888         /* make sure paraLevel is even */
   1889         pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
   1890 
   1891         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   1892         pBiDi->trailingWSStart=0;
   1893         break;
   1894     case UBIDI_RTL:
   1895         /* make sure paraLevel is odd */
   1896         pBiDi->paraLevel|=1;
   1897 
   1898         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   1899         pBiDi->trailingWSStart=0;
   1900         break;
   1901     default:
   1902         /*
   1903          *  Choose the right implicit state table
   1904          */
   1905         switch(pBiDi->reorderingMode) {
   1906         case UBIDI_REORDER_DEFAULT:
   1907             pBiDi->pImpTabPair=&impTab_DEFAULT;
   1908             break;
   1909         case UBIDI_REORDER_NUMBERS_SPECIAL:
   1910             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
   1911             break;
   1912         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
   1913             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
   1914             break;
   1915         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
   1916             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
   1917             break;
   1918         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
   1919             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   1920                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
   1921             } else {
   1922                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
   1923             }
   1924             break;
   1925         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
   1926             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   1927                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
   1928             } else {
   1929                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
   1930             }
   1931             break;
   1932         default:
   1933             /* we should never get here */
   1934             U_ASSERT(FALSE);
   1935             break;
   1936         }
   1937         /*
   1938          * If there are no external levels specified and there
   1939          * are no significant explicit level codes in the text,
   1940          * then we can treat the entire paragraph as one run.
   1941          * Otherwise, we need to perform the following rules on runs of
   1942          * the text with the same embedding levels. (X10)
   1943          * "Significant" explicit level codes are ones that actually
   1944          * affect non-BN characters.
   1945          * Examples for "insignificant" ones are empty embeddings
   1946          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
   1947          */
   1948         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
   1949                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
   1950             resolveImplicitLevels(pBiDi, 0, length,
   1951                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
   1952                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
   1953         } else {
   1954             /* sor, eor: start and end types of same-level-run */
   1955             UBiDiLevel *levels=pBiDi->levels;
   1956             int32_t start, limit=0;
   1957             UBiDiLevel level, nextLevel;
   1958             DirProp sor, eor;
   1959 
   1960             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
   1961             level=GET_PARALEVEL(pBiDi, 0);
   1962             nextLevel=levels[0];
   1963             if(level<nextLevel) {
   1964                 eor=GET_LR_FROM_LEVEL(nextLevel);
   1965             } else {
   1966                 eor=GET_LR_FROM_LEVEL(level);
   1967             }
   1968 
   1969             do {
   1970                 /* determine start and limit of the run (end points just behind the run) */
   1971 
   1972                 /* the values for this run's start are the same as for the previous run's end */
   1973                 start=limit;
   1974                 level=nextLevel;
   1975                 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
   1976                     /* except if this is a new paragraph, then set sor = para level */
   1977                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
   1978                 } else {
   1979                     sor=eor;
   1980                 }
   1981 
   1982                 /* search for the limit of this run */
   1983                 while(++limit<length && levels[limit]==level) {}
   1984 
   1985                 /* get the correct level of the next run */
   1986                 if(limit<length) {
   1987                     nextLevel=levels[limit];
   1988                 } else {
   1989                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
   1990                 }
   1991 
   1992                 /* determine eor from max(level, nextLevel); sor is last run's eor */
   1993                 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
   1994                     eor=GET_LR_FROM_LEVEL(nextLevel);
   1995                 } else {
   1996                     eor=GET_LR_FROM_LEVEL(level);
   1997                 }
   1998 
   1999                 /* if the run consists of overridden directional types, then there
   2000                    are no implicit types to be resolved */
   2001                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
   2002                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
   2003                 } else {
   2004                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
   2005                     do {
   2006                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
   2007                     } while(start<limit);
   2008                 }
   2009             } while(limit<length);
   2010         }
   2011         /* check if we got any memory shortage while adding insert points */
   2012         if (U_FAILURE(pBiDi->insertPoints.errorCode))
   2013         {
   2014             *pErrorCode=pBiDi->insertPoints.errorCode;
   2015             return;
   2016         }
   2017         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
   2018         adjustWSLevels(pBiDi);
   2019         break;
   2020     }
   2021     /* add RLM for inverse Bidi with contextual orientation resolving
   2022      * to RTL which would not round-trip otherwise
   2023      */
   2024     if((pBiDi->defaultParaLevel>0) &&
   2025        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
   2026        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
   2027         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
   2028         int32_t i, j, start, last;
   2029         DirProp dirProp;
   2030         for(i=0; i<pBiDi->paraCount; i++) {
   2031             last=pBiDi->paras[i]-1;
   2032             if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
   2033                 continue;           /* LTR paragraph */
   2034             }
   2035             start= i==0 ? 0 : pBiDi->paras[i - 1];
   2036             for(j=last; j>=start; j--) {
   2037                 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
   2038                 if(dirProp==L) {
   2039                     if(j<last) {
   2040                         while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
   2041                             last--;
   2042                         }
   2043                     }
   2044                     addPoint(pBiDi, last, RLM_BEFORE);
   2045                     break;
   2046                 }
   2047                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
   2048                     break;
   2049                 }
   2050             }
   2051         }
   2052     }
   2053 
   2054     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
   2055         pBiDi->resultLength -= pBiDi->controlCount;
   2056     } else {
   2057         pBiDi->resultLength += pBiDi->insertPoints.size;
   2058     }
   2059     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
   2060 }
   2061 
   2062 U_CAPI void U_EXPORT2
   2063 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
   2064     if(pBiDi!=NULL) {
   2065         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
   2066     }
   2067 }
   2068 
   2069 U_CAPI UBool U_EXPORT2
   2070 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
   2071     if(pBiDi!=NULL) {
   2072         return pBiDi->orderParagraphsLTR;
   2073     } else {
   2074         return FALSE;
   2075     }
   2076 }
   2077 
   2078 U_CAPI UBiDiDirection U_EXPORT2
   2079 ubidi_getDirection(const UBiDi *pBiDi) {
   2080     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2081         return pBiDi->direction;
   2082     } else {
   2083         return UBIDI_LTR;
   2084     }
   2085 }
   2086 
   2087 U_CAPI const UChar * U_EXPORT2
   2088 ubidi_getText(const UBiDi *pBiDi) {
   2089     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2090         return pBiDi->text;
   2091     } else {
   2092         return NULL;
   2093     }
   2094 }
   2095 
   2096 U_CAPI int32_t U_EXPORT2
   2097 ubidi_getLength(const UBiDi *pBiDi) {
   2098     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2099         return pBiDi->originalLength;
   2100     } else {
   2101         return 0;
   2102     }
   2103 }
   2104 
   2105 U_CAPI int32_t U_EXPORT2
   2106 ubidi_getProcessedLength(const UBiDi *pBiDi) {
   2107     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2108         return pBiDi->length;
   2109     } else {
   2110         return 0;
   2111     }
   2112 }
   2113 
   2114 U_CAPI int32_t U_EXPORT2
   2115 ubidi_getResultLength(const UBiDi *pBiDi) {
   2116     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2117         return pBiDi->resultLength;
   2118     } else {
   2119         return 0;
   2120     }
   2121 }
   2122 
   2123 /* paragraphs API functions ------------------------------------------------- */
   2124 
   2125 U_CAPI UBiDiLevel U_EXPORT2
   2126 ubidi_getParaLevel(const UBiDi *pBiDi) {
   2127     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2128         return pBiDi->paraLevel;
   2129     } else {
   2130         return 0;
   2131     }
   2132 }
   2133 
   2134 U_CAPI int32_t U_EXPORT2
   2135 ubidi_countParagraphs(UBiDi *pBiDi) {
   2136     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
   2137         return 0;
   2138     } else {
   2139         return pBiDi->paraCount;
   2140     }
   2141 }
   2142 
   2143 U_CAPI void U_EXPORT2
   2144 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
   2145                           int32_t *pParaStart, int32_t *pParaLimit,
   2146                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2147     int32_t paraStart;
   2148 
   2149     /* check the argument values */
   2150     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2151     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
   2152     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
   2153 
   2154     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2155     if(paraIndex) {
   2156         paraStart=pBiDi->paras[paraIndex-1];
   2157     } else {
   2158         paraStart=0;
   2159     }
   2160     if(pParaStart!=NULL) {
   2161         *pParaStart=paraStart;
   2162     }
   2163     if(pParaLimit!=NULL) {
   2164         *pParaLimit=pBiDi->paras[paraIndex];
   2165     }
   2166     if(pParaLevel!=NULL) {
   2167         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
   2168     }
   2169 }
   2170 
   2171 U_CAPI int32_t U_EXPORT2
   2172 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
   2173                           int32_t *pParaStart, int32_t *pParaLimit,
   2174                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2175     uint32_t paraIndex;
   2176 
   2177     /* check the argument values */
   2178     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
   2179     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
   2180     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
   2181     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2182     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
   2183 
   2184     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
   2185     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
   2186     return paraIndex;
   2187 }
   2188 
   2189 U_CAPI void U_EXPORT2
   2190 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
   2191                        const void *newContext, UBiDiClassCallback **oldFn,
   2192                        const void **oldContext, UErrorCode *pErrorCode)
   2193 {
   2194     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2195     if(pBiDi==NULL) {
   2196         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2197         return;
   2198     }
   2199     if( oldFn )
   2200     {
   2201         *oldFn = pBiDi->fnClassCallback;
   2202     }
   2203     if( oldContext )
   2204     {
   2205         *oldContext = pBiDi->coClassCallback;
   2206     }
   2207     pBiDi->fnClassCallback = newFn;
   2208     pBiDi->coClassCallback = newContext;
   2209 }
   2210 
   2211 U_CAPI void U_EXPORT2
   2212 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
   2213 {
   2214     if(pBiDi==NULL) {
   2215         return;
   2216     }
   2217     if( fn )
   2218     {
   2219         *fn = pBiDi->fnClassCallback;
   2220     }
   2221     if( context )
   2222     {
   2223         *context = pBiDi->coClassCallback;
   2224     }
   2225 }
   2226 
   2227 U_CAPI UCharDirection U_EXPORT2
   2228 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
   2229 {
   2230     UCharDirection dir;
   2231 
   2232     if( pBiDi->fnClassCallback == NULL ||
   2233         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
   2234     {
   2235         return ubidi_getClass(pBiDi->bdp, c);
   2236     } else {
   2237         return dir;
   2238     }
   2239 }
   2240 
   2241