Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2013, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  ubidi.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999jul27
     14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
     15 *
     16 */
     17 
     18 #include "cmemory.h"
     19 #include "unicode/utypes.h"
     20 #include "unicode/ustring.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/ubidi.h"
     23 #include "unicode/utf16.h"
     24 #include "ubidi_props.h"
     25 #include "ubidiimp.h"
     26 #include "uassert.h"
     27 
     28 /*
     29  * General implementation notes:
     30  *
     31  * Throughout the implementation, there are comments like (W2) that refer to
     32  * rules of the BiDi algorithm in its version 5, in this example to the second
     33  * rule of the resolution of weak types.
     34  *
     35  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
     36  * character according to UTF-16, the second UChar gets the directional property of
     37  * the entire character assigned, while the first one gets a BN, a boundary
     38  * neutral, type, which is ignored by most of the algorithm according to
     39  * rule (X9) and the implementation suggestions of the BiDi algorithm.
     40  *
     41  * Later, adjustWSLevels() will set the level for each BN to that of the
     42  * following character (UChar), which results in surrogate pairs getting the
     43  * same level on each of their surrogates.
     44  *
     45  * In a UTF-8 implementation, the same thing could be done: the last byte of
     46  * a multi-byte sequence would get the "real" property, while all previous
     47  * bytes of that sequence would get BN.
     48  *
     49  * It is not possible to assign all those parts of a character the same real
     50  * property because this would fail in the resolution of weak types with rules
     51  * that look at immediately surrounding types.
     52  *
     53  * As a related topic, this implementation does not remove Boundary Neutral
     54  * types from the input, but ignores them wherever this is relevant.
     55  * For example, the loop for the resolution of the weak types reads
     56  * types until it finds a non-BN.
     57  * Also, explicit embedding codes are neither changed into BN nor removed.
     58  * They are only treated the same way real BNs are.
     59  * As stated before, adjustWSLevels() takes care of them at the end.
     60  * For the purpose of conformance, the levels of all these codes
     61  * do not matter.
     62  *
     63  * Note that this implementation never modifies the dirProps
     64  * after the initial setup, except for FSI which is changed to either
     65  * LRI or RLI in getDirProps(), and paired brackets which may be changed
     66  * to L or R according to N0.
     67  *
     68  *
     69  * In this implementation, the resolution of weak types (Wn),
     70  * neutrals (Nn), and the assignment of the resolved level (In)
     71  * are all done in one single loop, in resolveImplicitLevels().
     72  * Changes of dirProp values are done on the fly, without writing
     73  * them back to the dirProps array.
     74  *
     75  *
     76  * This implementation contains code that allows to bypass steps of the
     77  * algorithm that are not needed on the specific paragraph
     78  * in order to speed up the most common cases considerably,
     79  * like text that is entirely LTR, or RTL text without numbers.
     80  *
     81  * Most of this is done by setting a bit for each directional property
     82  * in a flags variable and later checking for whether there are
     83  * any LTR characters or any RTL characters, or both, whether
     84  * there are any explicit embedding codes, etc.
     85  *
     86  * If the (Xn) steps are performed, then the flags are re-evaluated,
     87  * because they will then not contain the embedding codes any more
     88  * and will be adjusted for override codes, so that subsequently
     89  * more bypassing may be possible than what the initial flags suggested.
     90  *
     91  * If the text is not mixed-directional, then the
     92  * algorithm steps for the weak type resolution are not performed,
     93  * and all levels are set to the paragraph level.
     94  *
     95  * If there are no explicit embedding codes, then the (Xn) steps
     96  * are not performed.
     97  *
     98  * If embedding levels are supplied as a parameter, then all
     99  * explicit embedding codes are ignored, and the (Xn) steps
    100  * are not performed.
    101  *
    102  * White Space types could get the level of the run they belong to,
    103  * and are checked with a test of (flags&MASK_EMBEDDING) to
    104  * consider if the paragraph direction should be considered in
    105  * the flags variable.
    106  *
    107  * If there are no White Space types in the paragraph, then
    108  * (L1) is not necessary in adjustWSLevels().
    109  */
    110 
    111 /* to avoid some conditional statements, use tiny constant arrays */
    112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
    113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
    114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
    115 
    116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
    117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
    118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
    119 
    120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
    121 
    122 /* UBiDi object management -------------------------------------------------- */
    123 
    124 U_CAPI UBiDi * U_EXPORT2
    125 ubidi_open(void)
    126 {
    127     UErrorCode errorCode=U_ZERO_ERROR;
    128     return ubidi_openSized(0, 0, &errorCode);
    129 }
    130 
    131 U_CAPI UBiDi * U_EXPORT2
    132 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
    133     UBiDi *pBiDi;
    134 
    135     /* check the argument values */
    136     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    137         return NULL;
    138     } else if(maxLength<0 || maxRunCount<0) {
    139         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    140         return NULL;    /* invalid arguments */
    141     }
    142 
    143     /* allocate memory for the object */
    144     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
    145     if(pBiDi==NULL) {
    146         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    147         return NULL;
    148     }
    149 
    150     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
    151     uprv_memset(pBiDi, 0, sizeof(UBiDi));
    152 
    153     /* get BiDi properties */
    154     pBiDi->bdp=ubidi_getSingleton();
    155 
    156     /* allocate memory for arrays as requested */
    157     if(maxLength>0) {
    158         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
    159             !getInitialLevelsMemory(pBiDi, maxLength)
    160         ) {
    161             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    162         }
    163     } else {
    164         pBiDi->mayAllocateText=TRUE;
    165     }
    166 
    167     if(maxRunCount>0) {
    168         if(maxRunCount==1) {
    169             /* use simpleRuns[] */
    170             pBiDi->runsSize=sizeof(Run);
    171         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
    172             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    173         }
    174     } else {
    175         pBiDi->mayAllocateRuns=TRUE;
    176     }
    177 
    178     if(U_SUCCESS(*pErrorCode)) {
    179         return pBiDi;
    180     } else {
    181         ubidi_close(pBiDi);
    182         return NULL;
    183     }
    184 }
    185 
    186 /*
    187  * We are allowed to allocate memory if memory==NULL or
    188  * mayAllocate==TRUE for each array that we need.
    189  * We also try to grow memory as needed if we
    190  * allocate it.
    191  *
    192  * Assume sizeNeeded>0.
    193  * If *pMemory!=NULL, then assume *pSize>0.
    194  *
    195  * ### this realloc() may unnecessarily copy the old data,
    196  * which we know we don't need any more;
    197  * is this the best way to do this??
    198  */
    199 U_CFUNC UBool
    200 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
    201     void **pMemory = (void **)bidiMem;
    202     /* check for existing memory */
    203     if(*pMemory==NULL) {
    204         /* we need to allocate memory */
    205         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
    206             *pSize=sizeNeeded;
    207             return TRUE;
    208         } else {
    209             return FALSE;
    210         }
    211     } else {
    212         if(sizeNeeded<=*pSize) {
    213             /* there is already enough memory */
    214             return TRUE;
    215         }
    216         else if(!mayAllocate) {
    217             /* not enough memory, and we must not allocate */
    218             return FALSE;
    219         } else {
    220             /* we try to grow */
    221             void *memory;
    222             /* in most cases, we do not need the copy-old-data part of
    223              * realloc, but it is needed when adding runs using getRunsMemory()
    224              * in setParaRunsOnly()
    225              */
    226             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
    227                 *pMemory=memory;
    228                 *pSize=sizeNeeded;
    229                 return TRUE;
    230             } else {
    231                 /* we failed to grow */
    232                 return FALSE;
    233             }
    234         }
    235     }
    236 }
    237 
    238 U_CAPI void U_EXPORT2
    239 ubidi_close(UBiDi *pBiDi) {
    240     if(pBiDi!=NULL) {
    241         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
    242         if(pBiDi->dirPropsMemory!=NULL) {
    243             uprv_free(pBiDi->dirPropsMemory);
    244         }
    245         if(pBiDi->levelsMemory!=NULL) {
    246             uprv_free(pBiDi->levelsMemory);
    247         }
    248         if(pBiDi->openingsMemory!=NULL) {
    249             uprv_free(pBiDi->openingsMemory);
    250         }
    251         if(pBiDi->parasMemory!=NULL) {
    252             uprv_free(pBiDi->parasMemory);
    253         }
    254         if(pBiDi->runsMemory!=NULL) {
    255             uprv_free(pBiDi->runsMemory);
    256         }
    257         if(pBiDi->isolatesMemory!=NULL) {
    258             uprv_free(pBiDi->isolatesMemory);
    259         }
    260         if(pBiDi->insertPoints.points!=NULL) {
    261             uprv_free(pBiDi->insertPoints.points);
    262         }
    263 
    264         uprv_free(pBiDi);
    265     }
    266 }
    267 
    268 /* set to approximate "inverse BiDi" ---------------------------------------- */
    269 
    270 U_CAPI void U_EXPORT2
    271 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
    272     if(pBiDi!=NULL) {
    273         pBiDi->isInverse=isInverse;
    274         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
    275                                           : UBIDI_REORDER_DEFAULT;
    276     }
    277 }
    278 
    279 U_CAPI UBool U_EXPORT2
    280 ubidi_isInverse(UBiDi *pBiDi) {
    281     if(pBiDi!=NULL) {
    282         return pBiDi->isInverse;
    283     } else {
    284         return FALSE;
    285     }
    286 }
    287 
    288 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
    289  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
    290  * concept of RUNS_ONLY which is a double operation.
    291  * It could be advantageous to divide this into 3 concepts:
    292  * a) Operation: direct / inverse / RUNS_ONLY
    293  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
    294  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
    295  * This would allow combinations not possible today like RUNS_ONLY with
    296  * NUMBERS_SPECIAL.
    297  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
    298  * REMOVE_CONTROLS for the inverse step.
    299  * Not all combinations would be supported, and probably not all do make sense.
    300  * This would need to document which ones are supported and what are the
    301  * fallbacks for unsupported combinations.
    302  */
    303 U_CAPI void U_EXPORT2
    304 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
    305     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
    306                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
    307         pBiDi->reorderingMode = reorderingMode;
    308         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
    309     }
    310 }
    311 
    312 U_CAPI UBiDiReorderingMode U_EXPORT2
    313 ubidi_getReorderingMode(UBiDi *pBiDi) {
    314     if (pBiDi!=NULL) {
    315         return pBiDi->reorderingMode;
    316     } else {
    317         return UBIDI_REORDER_DEFAULT;
    318     }
    319 }
    320 
    321 U_CAPI void U_EXPORT2
    322 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
    323     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
    324         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
    325     }
    326     if (pBiDi!=NULL) {
    327         pBiDi->reorderingOptions=reorderingOptions;
    328     }
    329 }
    330 
    331 U_CAPI uint32_t U_EXPORT2
    332 ubidi_getReorderingOptions(UBiDi *pBiDi) {
    333     if (pBiDi!=NULL) {
    334         return pBiDi->reorderingOptions;
    335     } else {
    336         return 0;
    337     }
    338 }
    339 
    340 U_CAPI UBiDiDirection U_EXPORT2
    341 ubidi_getBaseDirection(const UChar *text,
    342 int32_t length){
    343 
    344     int32_t i;
    345     UChar32 uchar;
    346     UCharDirection dir;
    347 
    348     if( text==NULL || length<-1 ){
    349         return UBIDI_NEUTRAL;
    350     }
    351 
    352     if(length==-1) {
    353         length=u_strlen(text);
    354     }
    355 
    356     for( i = 0 ; i < length; ) {
    357         /* i is incremented by U16_NEXT */
    358         U16_NEXT(text, i, length, uchar);
    359         dir = u_charDirection(uchar);
    360         if( dir == U_LEFT_TO_RIGHT )
    361                 return UBIDI_LTR;
    362         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
    363                 return UBIDI_RTL;
    364     }
    365     return UBIDI_NEUTRAL;
    366 }
    367 
    368 /* perform (P2)..(P3) ------------------------------------------------------- */
    369 
    370 /**
    371  * Returns the directionality of the first strong character
    372  * after the last B in prologue, if any.
    373  * Requires prologue!=null.
    374  */
    375 static DirProp
    376 firstL_R_AL(UBiDi *pBiDi) {
    377     const UChar *text=pBiDi->prologue;
    378     int32_t length=pBiDi->proLength;
    379     int32_t i;
    380     UChar32 uchar;
    381     DirProp dirProp, result=ON;
    382     for(i=0; i<length; ) {
    383         /* i is incremented by U16_NEXT */
    384         U16_NEXT(text, i, length, uchar);
    385         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
    386         if(result==ON) {
    387             if(dirProp==L || dirProp==R || dirProp==AL) {
    388                 result=dirProp;
    389             }
    390         } else {
    391             if(dirProp==B) {
    392                 result=ON;
    393             }
    394         }
    395     }
    396     return result;
    397 }
    398 
    399 /*
    400  * Check that there are enough entries in the array pointed to by pBiDi->paras
    401  */
    402 static UBool
    403 checkParaCount(UBiDi *pBiDi) {
    404     int32_t count=pBiDi->paraCount;
    405     if(pBiDi->paras==pBiDi->simpleParas) {
    406         if(count<=SIMPLE_PARAS_SIZE)
    407             return TRUE;
    408         if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2))
    409             return FALSE;
    410         pBiDi->paras=pBiDi->parasMemory;
    411         uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para));
    412         return TRUE;
    413     }
    414     if(!getInitialParasMemory(pBiDi, count * 2))
    415         return FALSE;
    416     pBiDi->paras=pBiDi->parasMemory;
    417     return TRUE;
    418 }
    419 
    420 /*
    421  * Get the directional properties for the text, calculate the flags bit-set, and
    422  * determine the paragraph level if necessary (in pBiDi->paras[i].level).
    423  * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
    424  */
    425 static UBool
    426 getDirProps(UBiDi *pBiDi) {
    427     const UChar *text=pBiDi->text;
    428     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
    429 
    430     int32_t i=0, originalLength=pBiDi->originalLength;
    431     Flags flags=0;      /* collect all directionalities in the text */
    432     UChar32 uchar;
    433     DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
    434     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
    435     /* for inverse BiDi, the default para level is set to RTL if there is a
    436        strong R or AL character at either end of the text                            */
    437     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
    438             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
    439              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
    440     int32_t lastArabicPos=-1;
    441     int32_t controlCount=0;
    442     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
    443                                        UBIDI_OPTION_REMOVE_CONTROLS);
    444 
    445     typedef enum {
    446          NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
    447          SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
    448          SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
    449          LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
    450     } State;
    451     State state;
    452     DirProp lastStrong=ON;              /* for default level & inverse BiDi */
    453     /* The following stacks are used to manage isolate sequences. Those
    454        sequences may be nested, but obviously never more deeply than the
    455        maximum explicit embedding level.
    456        lastStack is the index of the last used entry in the stack. A value of -1
    457        means that there is no open isolate sequence.
    458        lastStack is reset to -1 on paragraph boundaries. */
    459     /* The following stack contains the position of the initiator of
    460        each open isolate sequence */
    461     int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
    462     /* The following stack contains the last known state before
    463        encountering the initiator of an isolate sequence */
    464     int8_t  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
    465     int32_t stackLast=-1;
    466 
    467     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
    468         pBiDi->length=0;
    469     defaultParaLevel=pBiDi->paraLevel&1;
    470     if(isDefaultLevel) {
    471         pBiDi->paras[0].level=defaultParaLevel;
    472         lastStrong=defaultParaLevel;
    473         if(pBiDi->proLength>0 &&                    /* there is a prologue */
    474            (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
    475             if(dirProp==L)
    476                 pBiDi->paras[0].level=0;    /* set the default para level */
    477             else
    478                 pBiDi->paras[0].level=1;    /* set the default para level */
    479             state=NOT_SEEKING_STRONG;
    480         } else {
    481             state=SEEKING_STRONG_FOR_PARA;
    482         }
    483     } else {
    484         pBiDi->paras[0].level=pBiDi->paraLevel;
    485         state=NOT_SEEKING_STRONG;
    486     }
    487     /* count paragraphs and determine the paragraph level (P2..P3) */
    488     /*
    489      * see comment in ubidi.h:
    490      * the UBIDI_DEFAULT_XXX values are designed so that
    491      * their bit 0 alone yields the intended default
    492      */
    493     for( /* i=0 above */ ; i<originalLength; ) {
    494         /* i is incremented by U16_NEXT */
    495         U16_NEXT(text, i, originalLength, uchar);
    496         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
    497         dirProps[i-1]=dirProp;
    498         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
    499             flags|=DIRPROP_FLAG(BN);
    500             dirProps[i-2]=BN;
    501         }
    502         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
    503             controlCount++;
    504         if(dirProp==L) {
    505             if(state==SEEKING_STRONG_FOR_PARA) {
    506                 pBiDi->paras[pBiDi->paraCount-1].level=0;
    507                 state=NOT_SEEKING_STRONG;
    508             }
    509             else if(state==SEEKING_STRONG_FOR_FSI) {
    510                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    511                     dirProps[isolateStartStack[stackLast]]=LRI;
    512                     flags|=DIRPROP_FLAG(LRI);
    513                 }
    514                 state=LOOKING_FOR_PDI;
    515             }
    516             lastStrong=L;
    517             continue;
    518         }
    519         if(dirProp==R || dirProp==AL) {
    520             if(state==SEEKING_STRONG_FOR_PARA) {
    521                 pBiDi->paras[pBiDi->paraCount-1].level=1;
    522                 state=NOT_SEEKING_STRONG;
    523             }
    524             else if(state==SEEKING_STRONG_FOR_FSI) {
    525                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    526                     dirProps[isolateStartStack[stackLast]]=RLI;
    527                     flags|=DIRPROP_FLAG(RLI);
    528                 }
    529                 state=LOOKING_FOR_PDI;
    530             }
    531             lastStrong=R;
    532             if(dirProp==AL)
    533                 lastArabicPos=i-1;
    534             continue;
    535         }
    536         if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
    537             stackLast++;
    538             if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    539                 isolateStartStack[stackLast]=i-1;
    540                 previousStateStack[stackLast]=state;
    541             }
    542             if(dirProp==FSI)
    543                 state=SEEKING_STRONG_FOR_FSI;
    544             else
    545                 state=LOOKING_FOR_PDI;
    546             continue;
    547         }
    548         if(dirProp==PDI) {
    549             if(state==SEEKING_STRONG_FOR_FSI) {
    550                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    551                     dirProps[isolateStartStack[stackLast]]=LRI;
    552                     flags|=DIRPROP_FLAG(LRI);
    553                 }
    554             }
    555             if(stackLast>=0) {
    556                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
    557                     state=previousStateStack[stackLast];
    558                 stackLast--;
    559             }
    560             continue;
    561         }
    562         if(dirProp==B) {
    563             if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
    564                 continue;
    565             pBiDi->paras[pBiDi->paraCount-1].limit=i;
    566             if(isDefaultLevelInverse && lastStrong==R)
    567                 pBiDi->paras[pBiDi->paraCount-1].level=1;
    568             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    569                 /* When streaming, we only process whole paragraphs
    570                    thus some updates are only done on paragraph boundaries */
    571                 pBiDi->length=i;        /* i is index to next character */
    572                 pBiDi->controlCount=controlCount;
    573             }
    574             if(i<originalLength) {              /* B not last char in text */
    575                 pBiDi->paraCount++;
    576                 if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
    577                     return FALSE;
    578                 if(isDefaultLevel) {
    579                     pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
    580                     state=SEEKING_STRONG_FOR_PARA;
    581                     lastStrong=defaultParaLevel;
    582                 } else {
    583                     pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
    584                     state=NOT_SEEKING_STRONG;
    585                 }
    586                 stackLast=-1;
    587             }
    588             continue;
    589         }
    590     }
    591     /* Ignore still open isolate sequences with overflow */
    592     if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
    593         stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
    594         if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI)
    595             state=LOOKING_FOR_PDI;
    596     }
    597     /* Resolve direction of still unresolved open FSI sequences */
    598     while(stackLast>=0) {
    599         if(state==SEEKING_STRONG_FOR_FSI) {
    600             dirProps[isolateStartStack[stackLast]]=LRI;
    601             flags|=DIRPROP_FLAG(LRI);
    602         }
    603         state=previousStateStack[stackLast];
    604         stackLast--;
    605     }
    606     /* When streaming, ignore text after the last paragraph separator */
    607     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    608         if(pBiDi->length<originalLength)
    609             pBiDi->paraCount--;
    610     } else {
    611         pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
    612         pBiDi->controlCount=controlCount;
    613     }
    614     /* For inverse bidi, default para direction is RTL if there is
    615        a strong R or AL at either end of the paragraph */
    616     if(isDefaultLevelInverse && lastStrong==R) {
    617         pBiDi->paras[pBiDi->paraCount-1].level=1;
    618     }
    619     if(isDefaultLevel) {
    620         pBiDi->paraLevel=pBiDi->paras[0].level;
    621     }
    622     /* The following is needed to resolve the text direction for default level
    623        paragraphs containing no strong character */
    624     for(i=0; i<pBiDi->paraCount; i++)
    625         flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
    626 
    627     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
    628         flags|=DIRPROP_FLAG(L);
    629     }
    630     pBiDi->flags=flags;
    631     pBiDi->lastArabicPos=lastArabicPos;
    632     return TRUE;
    633 }
    634 
    635 /* determine the paragraph level at position index */
    636 U_CFUNC UBiDiLevel
    637 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
    638     int32_t i;
    639     for(i=0; i<pBiDi->paraCount; i++)
    640         if(pindex<pBiDi->paras[i].limit)
    641             break;
    642     if(i>=pBiDi->paraCount)
    643         i=pBiDi->paraCount-1;
    644     return (UBiDiLevel)(pBiDi->paras[i].level);
    645 }
    646 
    647 /* Functions for handling paired brackets ----------------------------------- */
    648 
    649 /* In the isoRuns array, the first entry is used for text outside of any
    650    isolate sequence.  Higher entries are used for each more deeply nested
    651    isolate sequence. isoRunLast is the index of the last used entry.  The
    652    openings array is used to note the data of opening brackets not yet
    653    matched by a closing bracket, or matched but still susceptible to change
    654    level.
    655    Each isoRun entry contains the index of the first and
    656    one-after-last openings entries for pending opening brackets it
    657    contains.  The next openings entry to use is the one-after-last of the
    658    most deeply nested isoRun entry.
    659    isoRun entries also contain their current embedding level and the last
    660    encountered strong character, since these will be needed to resolve
    661    the level of paired brackets.  */
    662 
    663 static void
    664 bracketInit(UBiDi *pBiDi, BracketData *bd) {
    665     bd->pBiDi=pBiDi;
    666     bd->isoRunLast=0;
    667     bd->isoRuns[0].start=0;
    668     bd->isoRuns[0].limit=0;
    669     bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
    670     bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
    671     bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
    672     if(pBiDi->openingsMemory) {
    673         bd->openings=pBiDi->openingsMemory;
    674         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
    675     } else {
    676         bd->openings=bd->simpleOpenings;
    677         bd->openingsCount=SIMPLE_OPENINGS_SIZE;
    678     }
    679     bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
    680                          bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
    681 }
    682 
    683 /* paragraph boundary */
    684 static void
    685 bracketProcessB(BracketData *bd, UBiDiLevel level) {
    686     bd->isoRunLast=0;
    687     bd->isoRuns[0].limit=0;
    688     bd->isoRuns[0].level=level;
    689     bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1;
    690     bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
    691 }
    692 
    693 /* LRE, LRO, RLE, RLO, PDF */
    694 static void
    695 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
    696                        UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
    697     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    698     DirProp *dirProps=bd->pBiDi->dirProps;
    699     if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
    700         return;
    701     if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)>
    702        (contextLevel&~UBIDI_LEVEL_OVERRIDE))        /* not a PDF */
    703         contextLevel=embeddingLevel;
    704     pLastIsoRun->limit=pLastIsoRun->start;
    705     pLastIsoRun->level=embeddingLevel;
    706     pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1;
    707     pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos;
    708 }
    709 
    710 /* LRI or RLI */
    711 static void
    712 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
    713     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    714     int16_t lastLimit;
    715     lastLimit=pLastIsoRun->limit;
    716     bd->isoRunLast++;
    717     pLastIsoRun++;
    718     pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
    719     pLastIsoRun->level=level;
    720     pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1;
    721     pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0;
    722 }
    723 
    724 /* PDI */
    725 static void
    726 bracketProcessPDI(BracketData *bd) {
    727     bd->isoRunLast--;
    728 }
    729 
    730 /* newly found opening bracket: create an openings entry */
    731 static UBool                            /* return TRUE if success */
    732 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
    733     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    734     Opening *pOpening;
    735     if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
    736         UBiDi *pBiDi=bd->pBiDi;
    737         if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
    738             return FALSE;
    739         if(bd->openings==bd->simpleOpenings)
    740             uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
    741                         SIMPLE_OPENINGS_SIZE * sizeof(Opening));
    742         bd->openings=pBiDi->openingsMemory;     /* may have changed */
    743         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
    744     }
    745     pOpening=&bd->openings[pLastIsoRun->limit];
    746     pOpening->position=position;
    747     pOpening->match=match;
    748     pOpening->contextDir=pLastIsoRun->contextDir;
    749     pOpening->contextPos=pLastIsoRun->contextPos;
    750     pOpening->flags=0;
    751     pLastIsoRun->limit++;
    752     return TRUE;
    753 }
    754 
    755 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
    756 static void
    757 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
    758     /* This function calls itself recursively */
    759     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    760     Opening *qOpening;
    761     DirProp *dirProps=bd->pBiDi->dirProps;
    762     int32_t k, openingPosition, closingPosition;
    763     for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
    764         if(qOpening->match>=0)      /* not an N0c match */
    765             continue;
    766         if(newPropPosition<qOpening->contextPos)
    767             break;
    768         if(newPropPosition>=qOpening->position)
    769             continue;
    770         if(newProp==qOpening->contextDir)
    771             break;
    772         openingPosition=qOpening->position;
    773         dirProps[openingPosition]=dirProps[newPropPosition];
    774         closingPosition=-(qOpening->match);
    775         dirProps[closingPosition]= newProp; /* can never be AL */
    776         qOpening->match=0;                  /* prevent further changes */
    777         fixN0c(bd, k, openingPosition, newProp);
    778         fixN0c(bd, k, closingPosition, newProp);
    779     }
    780 }
    781 
    782 /* handle strong characters, digits and candidates for closing brackets */
    783 static UBool                            /* return TRUE if success */
    784 bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) {
    785     IsoRun *pLastIsoRun;
    786     Opening *pOpening, *qOpening;
    787     DirProp *dirProps, newProp;
    788     UBiDiDirection direction;
    789     uint16_t flag;
    790     int32_t i, k;
    791     UBool stable;
    792     UChar c, match;
    793     dirProps=bd->pBiDi->dirProps;
    794     if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */
    795         pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    796         /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */
    797         if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL))
    798             dirProp=pLastIsoRun->lastStrong;
    799         /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */
    800         if(dirProp==EN) {
    801             if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) {
    802                 dirProp=L;
    803                 if(!bd->isNumbersSpecial)
    804                     dirProps[position]=ENL;
    805             }
    806             else {
    807                 dirProp=pLastIsoRun->lastStrong;    /* may be R or AL */
    808                 if(!bd->isNumbersSpecial)
    809                     dirProps[position]= dirProp==AL ? AN : ENR;
    810             }
    811         }
    812         pLastIsoRun->lastStrong=dirProp;
    813         pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp);
    814         pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position;
    815         if(dirProp==AL || dirProp==AN)
    816             dirProp=R;
    817         flag=DIRPROP_FLAG(dirProp);
    818         /* strong characters found after an unmatched opening bracket
    819            must be noted for possibly applying N0b */
    820         for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
    821             bd->openings[i].flags|=flag;
    822         return TRUE;
    823     }
    824     if(dirProp!=ON)
    825         return TRUE;
    826     /* First see if it is a matching closing bracket. Hopefully, this is more
    827        efficient than checking if it is a closing bracket at all */
    828     c=bd->pBiDi->text[position];
    829     pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    830     for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) {
    831         if(bd->openings[i].match!=c)
    832             continue;
    833         /* We have a match */
    834         pOpening=&bd->openings[i];
    835         direction=pLastIsoRun->level&1;
    836         stable=TRUE;            /* assume stable until proved otherwise */
    837 
    838         /* The stable flag is set when brackets are paired and their
    839            level is resolved and cannot be changed by what will be
    840            found later in the source string.
    841            An unstable match can occur only when applying N0c, where
    842            the resolved level depends on the preceding context, and
    843            this context may be affected by text occurring later.
    844            Example: RTL paragraph containing:  abc[(latin) HEBREW]
    845            When the closing parenthesis is encountered, it appears
    846            that N0c1 must be applied since 'abc' sets an opposite
    847            direction context and both parentheses receive level 2.
    848            However, when the closing square bracket is processed,
    849            N0b applies because of 'HEBREW' being included within the
    850            brackets, thus the square brackets are treated like R and
    851            receive level 1. However, this changes the preceding
    852            context of the opening parenthesis, and it now appears
    853            that N0c2 must be applied to the parentheses rather than
    854            N0c1. */
    855 
    856         if((direction==0 && pOpening->flags&FOUND_L) ||
    857            (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
    858             newProp=direction;
    859         }
    860         else if(pOpening->flags&(FOUND_L|FOUND_R)) {    /* N0c */
    861             if(direction!=pOpening->contextDir) {
    862                 newProp=pOpening->contextDir;           /* N0c1 */
    863                 /* it is stable if there is no preceding text or in
    864                    conditions too complicated and not worth checking */
    865                 stable=(i==pLastIsoRun->start);
    866             }
    867             else
    868                 newProp=direction;                      /* N0c2 */
    869         }
    870         else {
    871             newProp=BN;                                 /* N0d */
    872         }
    873         if(newProp!=BN) {
    874             dirProps[pOpening->position]=newProp;
    875             dirProps[position]=newProp;
    876             pLastIsoRun->contextDir=newProp;
    877             pLastIsoRun->contextPos=position;
    878         }
    879         /* Update nested N0c pairs that may be affected */
    880         if(newProp==direction)
    881             fixN0c(bd, i, pOpening->position, newProp);
    882         if(stable) {
    883             pLastIsoRun->limit=i;   /* forget any brackets nested within this pair */
    884             /* remove lower located synonyms if any */
    885             while(pLastIsoRun->limit>pLastIsoRun->start &&
    886                   bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
    887                 pLastIsoRun->limit--;
    888         }
    889         else {
    890             pOpening->match=-position;
    891             /* neutralize lower located synonyms if any */
    892             k=i-1;
    893             while(k>=pLastIsoRun->start &&
    894                   bd->openings[k].position==pOpening->position)
    895                 bd->openings[k--].match=0;
    896             /* neutralize any unmatched opening between the current pair;
    897                this will also neutralize higher located synonyms if any */
    898             for(k=i+1; k<pLastIsoRun->limit; k++) {
    899                 qOpening=&bd->openings[k];
    900                 if(qOpening->position>=position)
    901                     break;
    902                 if(qOpening->match>0)
    903                     qOpening->match=0;
    904             }
    905         }
    906         return TRUE;
    907     }
    908     /* We get here only if the ON character was not a matching closing bracket */
    909     /* Now see if it is an opening bracket */
    910     match=u_getBidiPairedBracket(c);    /* get the matching char */
    911     if(match==c)                        /* if no matching char */
    912         return TRUE;
    913     if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN)
    914         return TRUE;                    /* not an opening bracket */
    915     /* special case: process synonyms
    916        create an opening entry for each synonym */
    917     if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
    918         if(!bracketAddOpening(bd, 0x3009, position))
    919             return FALSE;
    920     }
    921     else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
    922         if(!bracketAddOpening(bd, 0x232A, position))
    923             return FALSE;
    924     }
    925     return bracketAddOpening(bd, match, position);
    926 }
    927 
    928 /* perform (X1)..(X9) ------------------------------------------------------- */
    929 
    930 /* determine if the text is mixed-directional or single-directional */
    931 static UBiDiDirection
    932 directionFromFlags(UBiDi *pBiDi) {
    933     Flags flags=pBiDi->flags;
    934     /* if the text contains AN and neutrals, then some neutrals may become RTL */
    935     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
    936         return UBIDI_LTR;
    937     } else if(!(flags&MASK_LTR)) {
    938         return UBIDI_RTL;
    939     } else {
    940         return UBIDI_MIXED;
    941     }
    942 }
    943 
    944 /*
    945  * Resolve the explicit levels as specified by explicit embedding codes.
    946  * Recalculate the flags to have them reflect the real properties
    947  * after taking the explicit embeddings into account.
    948  *
    949  * The BiDi algorithm is designed to result in the same behavior whether embedding
    950  * levels are externally specified (from "styled text", supposedly the preferred
    951  * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
    952  * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
    953  * However, in a real implementation, the removal of these codes and their index
    954  * positions in the plain text is undesirable since it would result in
    955  * reallocated, reindexed text.
    956  * Instead, this implementation leaves the codes in there and just ignores them
    957  * in the subsequent processing.
    958  * In order to get the same reordering behavior, positions with a BN or a not-isolate
    959  * explicit embedding code just get the same level assigned as the last "real"
    960  * character.
    961  *
    962  * Some implementations, not this one, then overwrite some of these
    963  * directionality properties at "real" same-level-run boundaries by
    964  * L or R codes so that the resolution of weak types can be performed on the
    965  * entire paragraph at once instead of having to parse it once more and
    966  * perform that resolution on same-level-runs.
    967  * This limits the scope of the implicit rules in effectively
    968  * the same way as the run limits.
    969  *
    970  * Instead, this implementation does not modify these codes, except for
    971  * paired brackets whose properties (ON) may be replaced by L or R.
    972  * On one hand, the paragraph has to be scanned for same-level-runs, but
    973  * on the other hand, this saves another loop to reset these codes,
    974  * or saves making and modifying a copy of dirProps[].
    975  *
    976  *
    977  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
    978  *
    979  *
    980  * Handling the stack of explicit levels (Xn):
    981  *
    982  * With the BiDi stack of explicit levels, as pushed with each
    983  * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI,
    984  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
    985  *
    986  * In order to have a correct push-pop semantics even in the case of overflows,
    987  * overflow counters and a valid isolate counter are used as described in UAX#9
    988  * section 3.3.2 "Explicit Levels and Directions".
    989  *
    990  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
    991  */
    992 static UBiDiDirection
    993 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
    994     DirProp *dirProps=pBiDi->dirProps;
    995     UBiDiLevel *levels=pBiDi->levels;
    996     const UChar *text=pBiDi->text;
    997 
    998     int32_t i=0, length=pBiDi->length;
    999     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
   1000     DirProp dirProp;
   1001     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
   1002     UBiDiDirection direction;
   1003     pBiDi->isolateCount=0;
   1004 
   1005     if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
   1006 
   1007     /* determine if the text is mixed-directional or single-directional */
   1008     direction=directionFromFlags(pBiDi);
   1009 
   1010     /* we may not need to resolve any explicit levels */
   1011     if((direction!=UBIDI_MIXED)) {
   1012         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
   1013         return direction;
   1014     }
   1015     if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
   1016         /* inverse BiDi: mixed, but all characters are at the same embedding level */
   1017         /* set all levels to the paragraph level */
   1018         int32_t paraIndex, start, limit;
   1019         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
   1020             if(paraIndex==0)
   1021                 start=0;
   1022             else
   1023                 start=pBiDi->paras[paraIndex-1].limit;
   1024             limit=pBiDi->paras[paraIndex].limit;
   1025             level=pBiDi->paras[paraIndex].level;
   1026             for(i=start; i<limit; i++)
   1027                 levels[i]=level;
   1028         }
   1029         return direction;   /* no bracket matching for inverse BiDi */
   1030     }
   1031     if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
   1032         /* no embeddings, set all levels to the paragraph level */
   1033         /* we still have to perform bracket matching */
   1034         int32_t paraIndex, start, limit;
   1035         BracketData bracketData;
   1036         bracketInit(pBiDi, &bracketData);
   1037         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
   1038             if(paraIndex==0)
   1039                 start=0;
   1040             else
   1041                 start=pBiDi->paras[paraIndex-1].limit;
   1042             limit=pBiDi->paras[paraIndex].limit;
   1043             level=pBiDi->paras[paraIndex].level;
   1044             for(i=start; i<limit; i++) {
   1045                 levels[i]=level;
   1046                 dirProp=dirProps[i];
   1047                 if(dirProp==B) {
   1048                     if((i+1)<length) {
   1049                         if(text[i]==CR && text[i+1]==LF)
   1050                             continue;   /* skip CR when followed by LF */
   1051                         bracketProcessB(&bracketData, level);
   1052                     }
   1053                     continue;
   1054                 }
   1055                 if(!bracketProcessChar(&bracketData, i, dirProp)) {
   1056                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1057                     return UBIDI_LTR;
   1058                 }
   1059             }
   1060         }
   1061         return direction;
   1062     }
   1063     {
   1064         /* continue to perform (Xn) */
   1065 
   1066         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
   1067         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
   1068         UBiDiLevel embeddingLevel=level, newLevel;
   1069         UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
   1070         int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
   1071 
   1072         uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
   1073                                                         but we need one more entry as base */
   1074         uint32_t stackLast=0;
   1075         int32_t overflowIsolateCount=0;
   1076         int32_t overflowEmbeddingCount=0;
   1077         int32_t validIsolateCount=0;
   1078         BracketData bracketData;
   1079         bracketInit(pBiDi, &bracketData);
   1080         stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
   1081 
   1082         /* recalculate the flags */
   1083         flags=0;
   1084 
   1085         for(i=0; i<length; ++i) {
   1086             dirProp=dirProps[i];
   1087             switch(dirProp) {
   1088             case LRE:
   1089             case RLE:
   1090             case LRO:
   1091             case RLO:
   1092                 /* (X2, X3, X4, X5) */
   1093                 flags|=DIRPROP_FLAG(BN);
   1094                 if (dirProp==LRE || dirProp==LRO)
   1095                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
   1096                 else
   1097                     newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
   1098                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
   1099                                                          overflowEmbeddingCount==0) {
   1100                     lastCcPos=i;
   1101                     embeddingLevel=newLevel;
   1102                     if(dirProp==LRO || dirProp==RLO)
   1103                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
   1104                     stackLast++;
   1105                     stack[stackLast]=embeddingLevel;
   1106                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
   1107                        since this has already been done for newLevel which is
   1108                        the source for embeddingLevel.
   1109                      */
   1110                 } else {
   1111                     dirProps[i]|=IGNORE_CC;
   1112                     if(overflowIsolateCount==0)
   1113                         overflowEmbeddingCount++;
   1114                 }
   1115                 break;
   1116             case PDF:
   1117                 /* (X7) */
   1118                 flags|=DIRPROP_FLAG(BN);
   1119                 /* handle all the overflow cases first */
   1120                 if(overflowIsolateCount) {
   1121                     dirProps[i]|=IGNORE_CC;
   1122                     break;
   1123                 }
   1124                 if(overflowEmbeddingCount) {
   1125                     dirProps[i]|=IGNORE_CC;
   1126                     overflowEmbeddingCount--;
   1127                     break;
   1128                 }
   1129                 if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
   1130                     lastCcPos=i;
   1131                     stackLast--;
   1132                     embeddingLevel=(UBiDiLevel)stack[stackLast];
   1133                 } else
   1134                     dirProps[i]|=IGNORE_CC;
   1135                 break;
   1136             case LRI:
   1137             case RLI:
   1138                 if(embeddingLevel!=previousLevel) {
   1139                     bracketProcessBoundary(&bracketData, lastCcPos,
   1140                                            previousLevel, embeddingLevel);
   1141                     previousLevel=embeddingLevel;
   1142                 }
   1143                 /* (X5a, X5b) */
   1144                 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
   1145                 level=embeddingLevel;
   1146                 if(dirProp==LRI)
   1147                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
   1148                 else
   1149                     newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
   1150                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
   1151                                                          overflowEmbeddingCount==0) {
   1152                     lastCcPos=i;
   1153                     previousLevel=embeddingLevel;
   1154                     validIsolateCount++;
   1155                     if(validIsolateCount>pBiDi->isolateCount)
   1156                         pBiDi->isolateCount=validIsolateCount;
   1157                     embeddingLevel=newLevel;
   1158                     stackLast++;
   1159                     stack[stackLast]=embeddingLevel+ISOLATE;
   1160                     bracketProcessLRI_RLI(&bracketData, embeddingLevel);
   1161                 } else {
   1162                     dirProps[i]|=IGNORE_CC;
   1163                     overflowIsolateCount++;
   1164                 }
   1165                 break;
   1166             case PDI:
   1167                 if(embeddingLevel!=previousLevel) {
   1168                     bracketProcessBoundary(&bracketData, lastCcPos,
   1169                                            previousLevel, embeddingLevel);
   1170                 }
   1171                 /* (X6a) */
   1172                 if(overflowIsolateCount) {
   1173                     dirProps[i]|=IGNORE_CC;
   1174                     overflowIsolateCount--;
   1175                 }
   1176                 else if(validIsolateCount) {
   1177                     lastCcPos=i;
   1178                     overflowEmbeddingCount=0;
   1179                     while(stack[stackLast]<ISOLATE) /* pop embedding entries */
   1180                         stackLast--;                /* until the last isolate entry */
   1181                     stackLast--;                    /* pop also the last isolate entry */
   1182                     validIsolateCount--;
   1183                     bracketProcessPDI(&bracketData);
   1184                 } else
   1185                     dirProps[i]|=IGNORE_CC;
   1186                 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
   1187                 previousLevel=level=embeddingLevel;
   1188                 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
   1189                 break;
   1190             case B:
   1191                 level=GET_PARALEVEL(pBiDi, i);
   1192                 if((i+1)<length) {
   1193                     if(text[i]==CR && text[i+1]==LF)
   1194                         break;          /* skip CR when followed by LF */
   1195                     overflowEmbeddingCount=overflowIsolateCount=0;
   1196                     validIsolateCount=0;
   1197                     stackLast=0;
   1198                     stack[0]=level; /* initialize base entry to para level, no override, no isolate */
   1199                     previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
   1200                     bracketProcessB(&bracketData, embeddingLevel);
   1201                 }
   1202                 flags|=DIRPROP_FLAG(B);
   1203                 break;
   1204             case BN:
   1205                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
   1206                 /* they will get their levels set correctly in adjustWSLevels() */
   1207                 flags|=DIRPROP_FLAG(BN);
   1208                 break;
   1209             default:
   1210                 /* all other types get the "real" level */
   1211                 level=embeddingLevel;
   1212                 if(embeddingLevel!=previousLevel) {
   1213                     bracketProcessBoundary(&bracketData, lastCcPos,
   1214                                            previousLevel, embeddingLevel);
   1215                     previousLevel=embeddingLevel;
   1216                 }
   1217                 if(level&UBIDI_LEVEL_OVERRIDE)
   1218                     flags|=DIRPROP_FLAG_LR(level);
   1219                 else
   1220                     flags|=DIRPROP_FLAG(dirProp);
   1221                 if(!bracketProcessChar(&bracketData, i, dirProp))
   1222                     return -1;
   1223                 break;
   1224             }
   1225 
   1226             /*
   1227              * We need to set reasonable levels even on BN codes and
   1228              * explicit codes because we will later look at same-level runs (X10).
   1229              */
   1230             levels[i]=level;
   1231             if(i>0 && levels[i-1]!=level) {
   1232                 flags|=DIRPROP_FLAG_MULTI_RUNS;
   1233                 if(level&UBIDI_LEVEL_OVERRIDE)
   1234                     flags|=DIRPROP_FLAG_O(level);
   1235                 else
   1236                     flags|=DIRPROP_FLAG_E(level);
   1237             }
   1238             if(DIRPROP_FLAG(dirProp)&MASK_ISO)
   1239                 level=embeddingLevel;
   1240         }
   1241         if(flags&MASK_EMBEDDING) {
   1242             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
   1243         }
   1244         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
   1245             flags|=DIRPROP_FLAG(L);
   1246         }
   1247 
   1248         /* subsequently, ignore the explicit codes and BN (X9) */
   1249 
   1250         /* again, determine if the text is mixed-directional or single-directional */
   1251         pBiDi->flags=flags;
   1252         direction=directionFromFlags(pBiDi);
   1253     }
   1254     return direction;
   1255 }
   1256 
   1257 /*
   1258  * Use a pre-specified embedding levels array:
   1259  *
   1260  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
   1261  * ignore all explicit codes (X9),
   1262  * and check all the preset levels.
   1263  *
   1264  * Recalculate the flags to have them reflect the real properties
   1265  * after taking the explicit embeddings into account.
   1266  */
   1267 static UBiDiDirection
   1268 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
   1269     DirProp *dirProps=pBiDi->dirProps;
   1270     DirProp dirProp;
   1271     UBiDiLevel *levels=pBiDi->levels;
   1272     int32_t isolateCount=0;
   1273 
   1274     int32_t i, length=pBiDi->length;
   1275     Flags flags=0;  /* collect all directionalities in the text */
   1276     UBiDiLevel level;
   1277     pBiDi->isolateCount=0;
   1278 
   1279     for(i=0; i<length; ++i) {
   1280         level=levels[i];
   1281         dirProp=dirProps[i];
   1282         if(dirProp==LRI || dirProp==RLI) {
   1283             isolateCount++;
   1284             if(isolateCount>pBiDi->isolateCount)
   1285                 pBiDi->isolateCount=isolateCount;
   1286         }
   1287         else if(dirProp==PDI)
   1288             isolateCount--;
   1289         else if(dirProp==B)
   1290             isolateCount=0;
   1291         if(level&UBIDI_LEVEL_OVERRIDE) {
   1292             /* keep the override flag in levels[i] but adjust the flags */
   1293             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
   1294             flags|=DIRPROP_FLAG_O(level);
   1295         } else {
   1296             /* set the flags */
   1297             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
   1298         }
   1299         if((level<GET_PARALEVEL(pBiDi, i) &&
   1300             !((0==level)&&(dirProp==B))) ||
   1301            (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
   1302             /* level out of bounds */
   1303             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1304             return UBIDI_LTR;
   1305         }
   1306     }
   1307     if(flags&MASK_EMBEDDING) {
   1308         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
   1309     }
   1310 
   1311     /* determine if the text is mixed-directional or single-directional */
   1312     pBiDi->flags=flags;
   1313     return directionFromFlags(pBiDi);
   1314 }
   1315 
   1316 /******************************************************************
   1317  The Properties state machine table
   1318 *******************************************************************
   1319 
   1320  All table cells are 8 bits:
   1321       bits 0..4:  next state
   1322       bits 5..7:  action to perform (if > 0)
   1323 
   1324  Cells may be of format "n" where n represents the next state
   1325  (except for the rightmost column).
   1326  Cells may also be of format "s(x,y)" where x represents an action
   1327  to perform and y represents the next state.
   1328 
   1329 *******************************************************************
   1330  Definitions and type for properties state table
   1331 *******************************************************************
   1332 */
   1333 #define IMPTABPROPS_COLUMNS 16
   1334 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
   1335 #define GET_STATEPROPS(cell) ((cell)&0x1f)
   1336 #define GET_ACTIONPROPS(cell) ((cell)>>5)
   1337 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
   1338 
   1339 static const uint8_t groupProp[] =          /* dirProp regrouped */
   1340 {
   1341 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
   1342     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
   1343 };
   1344 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
   1345 
   1346 /******************************************************************
   1347 
   1348       PROPERTIES  STATE  TABLE
   1349 
   1350  In table impTabProps,
   1351       - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
   1352       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
   1353       - the Res column is the reduced property assigned to a run
   1354 
   1355  Action 1: process current run1, init new run1
   1356         2: init new run2
   1357         3: process run1, process run2, init new run1
   1358         4: process run1, set run1=run2, init new run2
   1359 
   1360  Notes:
   1361   1) This table is used in resolveImplicitLevels().
   1362   2) This table triggers actions when there is a change in the Bidi
   1363      property of incoming characters (action 1).
   1364   3) Most such property sequences are processed immediately (in
   1365      fact, passed to processPropertySeq().
   1366   4) However, numbers are assembled as one sequence. This means
   1367      that undefined situations (like CS following digits, until
   1368      it is known if the next char will be a digit) are held until
   1369      following chars define them.
   1370      Example: digits followed by CS, then comes another CS or ON;
   1371               the digits will be processed, then the CS assigned
   1372               as the start of an ON sequence (action 3).
   1373   5) There are cases where more than one sequence must be
   1374      processed, for instance digits followed by CS followed by L:
   1375      the digits must be processed as one sequence, and the CS
   1376      must be processed as an ON sequence, all this before starting
   1377      assembling chars for the opening L sequence.
   1378 
   1379 
   1380 */
   1381 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
   1382 {
   1383 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
   1384 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
   1385 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
   1386 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
   1387 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
   1388 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
   1389 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
   1390 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
   1391 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
   1392 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
   1393 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
   1394 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
   1395 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
   1396 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
   1397 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
   1398 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
   1399 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
   1400 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
   1401 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
   1402 /*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
   1403 /*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
   1404 /*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
   1405 /*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
   1406 /*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
   1407 /*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
   1408 };
   1409 
   1410 /*  we must undef macro s because the levels table have a different
   1411  *  structure (4 bits for action and 4 bits for next state.
   1412  */
   1413 #undef s
   1414 
   1415 /******************************************************************
   1416  The levels state machine tables
   1417 *******************************************************************
   1418 
   1419  All table cells are 8 bits:
   1420       bits 0..3:  next state
   1421       bits 4..7:  action to perform (if > 0)
   1422 
   1423  Cells may be of format "n" where n represents the next state
   1424  (except for the rightmost column).
   1425  Cells may also be of format "s(x,y)" where x represents an action
   1426  to perform and y represents the next state.
   1427 
   1428  This format limits each table to 16 states each and to 15 actions.
   1429 
   1430 *******************************************************************
   1431  Definitions and type for levels state tables
   1432 *******************************************************************
   1433 */
   1434 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
   1435 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
   1436 #define GET_STATE(cell) ((cell)&0x0f)
   1437 #define GET_ACTION(cell) ((cell)>>4)
   1438 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
   1439 
   1440 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
   1441 typedef uint8_t ImpAct[];
   1442 
   1443 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
   1444  * instead of having a pair of ImpTab and a pair of ImpAct.
   1445  */
   1446 typedef struct ImpTabPair {
   1447     const void * pImpTab[2];
   1448     const void * pImpAct[2];
   1449 } ImpTabPair;
   1450 
   1451 /******************************************************************
   1452 
   1453       LEVELS  STATE  TABLES
   1454 
   1455  In all levels state tables,
   1456       - state 0 is the initial state
   1457       - the Res column is the increment to add to the text level
   1458         for this property sequence.
   1459 
   1460  The impAct arrays for each table of a pair map the local action
   1461  numbers of the table to the total list of actions. For instance,
   1462  action 2 in a given table corresponds to the action number which
   1463  appears in entry [2] of the impAct array for that table.
   1464  The first entry of all impAct arrays must be 0.
   1465 
   1466  Action 1: init conditional sequence
   1467         2: prepend conditional sequence to current sequence
   1468         3: set ON sequence to new level - 1
   1469         4: init EN/AN/ON sequence
   1470         5: fix EN/AN/ON sequence followed by R
   1471         6: set previous level sequence to level 2
   1472 
   1473  Notes:
   1474   1) These tables are used in processPropertySeq(). The input
   1475      is property sequences as determined by resolveImplicitLevels.
   1476   2) Most such property sequences are processed immediately
   1477      (levels are assigned).
   1478   3) However, some sequences cannot be assigned a final level till
   1479      one or more following sequences are received. For instance,
   1480      ON following an R sequence within an even-level paragraph.
   1481      If the following sequence is R, the ON sequence will be
   1482      assigned basic run level+1, and so will the R sequence.
   1483   4) S is generally handled like ON, since its level will be fixed
   1484      to paragraph level in adjustWSLevels().
   1485 
   1486 */
   1487 
   1488 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
   1489 /*  In this table, conditional sequences receive the higher possible level
   1490     until proven otherwise.
   1491 */
   1492 {
   1493 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1494 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1495 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
   1496 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
   1497 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
   1498 /* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
   1499 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
   1500 };
   1501 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
   1502 /*  In this table, conditional sequences receive the lower possible level
   1503     until proven otherwise.
   1504 */
   1505 {
   1506 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1507 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1508 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
   1509 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1510 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
   1511 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
   1512 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
   1513 };
   1514 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
   1515 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
   1516                                            &impTabR_DEFAULT},
   1517                                           {&impAct0, &impAct0}};
   1518 
   1519 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
   1520 /*  In this table, conditional sequences receive the higher possible level
   1521     until proven otherwise.
   1522 */
   1523 {
   1524 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1525 /* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
   1526 /* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
   1527 /* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
   1528 /* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
   1529 /* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
   1530   };
   1531 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
   1532                                                    &impTabR_DEFAULT},
   1533                                                   {&impAct0, &impAct0}};
   1534 
   1535 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
   1536 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1537     until proven that there is L or sor/eor on both sides. AN is handled like EN.
   1538 */
   1539 {
   1540 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1541 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
   1542 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
   1543 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
   1544 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
   1545 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
   1546 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
   1547 };
   1548 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
   1549 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1550     until proven that there is L on both sides. AN is handled like EN.
   1551 */
   1552 {
   1553 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1554 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1555 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1556 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
   1557 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
   1558 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
   1559 };
   1560 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
   1561                         {&impTabL_GROUP_NUMBERS_WITH_R,
   1562                          &impTabR_GROUP_NUMBERS_WITH_R},
   1563                         {&impAct0, &impAct0}};
   1564 
   1565 
   1566 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
   1567 /*  This table is identical to the Default LTR table except that EN and AN are
   1568     handled like L.
   1569 */
   1570 {
   1571 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1572 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
   1573 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
   1574 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
   1575 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
   1576 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
   1577 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
   1578 };
   1579 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
   1580 /*  This table is identical to the Default RTL table except that EN and AN are
   1581     handled like L.
   1582 */
   1583 {
   1584 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1585 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1586 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
   1587 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1588 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
   1589 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
   1590 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
   1591 };
   1592 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
   1593                         {&impTabL_INVERSE_NUMBERS_AS_L,
   1594                          &impTabR_INVERSE_NUMBERS_AS_L},
   1595                         {&impAct0, &impAct0}};
   1596 
   1597 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
   1598 /*  In this table, conditional sequences receive the lower possible level
   1599     until proven otherwise.
   1600 */
   1601 {
   1602 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1603 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1604 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
   1605 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1606 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
   1607 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
   1608 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
   1609 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
   1610 };
   1611 static const ImpAct impAct1 = {0,1,11,12};
   1612 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
   1613  */
   1614 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
   1615                         {&impTabL_DEFAULT,
   1616                          &impTabR_INVERSE_LIKE_DIRECT},
   1617                         {&impAct0, &impAct1}};
   1618 
   1619 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1620 /*  The case handled in this table is (visually):  R EN L
   1621 */
   1622 {
   1623 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1624 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1625 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
   1626 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
   1627 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
   1628 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
   1629 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
   1630 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
   1631 };
   1632 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1633 /*  The cases handled in this table are (visually):  R EN L
   1634                                                      R L AN L
   1635 */
   1636 {
   1637 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1638 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1639 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
   1640 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
   1641 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
   1642 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
   1643 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
   1644 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
   1645 };
   1646 static const ImpAct impAct2 = {0,1,7,8,9,10};
   1647 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
   1648                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
   1649                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1650                         {&impAct0, &impAct2}};
   1651 
   1652 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
   1653                         {&impTabL_NUMBERS_SPECIAL,
   1654                          &impTabR_INVERSE_LIKE_DIRECT},
   1655                         {&impAct0, &impAct1}};
   1656 
   1657 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
   1658 /*  The case handled in this table is (visually):  R EN L
   1659 */
   1660 {
   1661 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1662 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1663 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
   1664 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
   1665 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
   1666 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
   1667 };
   1668 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
   1669                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
   1670                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1671                         {&impAct0, &impAct2}};
   1672 
   1673 #undef s
   1674 
   1675 typedef struct {
   1676     const ImpTab * pImpTab;             /* level table pointer          */
   1677     const ImpAct * pImpAct;             /* action map array             */
   1678     int32_t startON;                    /* start of ON sequence         */
   1679     int32_t startL2EN;                  /* start of level 2 sequence    */
   1680     int32_t lastStrongRTL;              /* index of last found R or AL  */
   1681     int32_t state;                      /* current state                */
   1682     int32_t runStart;                   /* start position of the run    */
   1683     UBiDiLevel runLevel;                /* run level before implicit solving */
   1684 } LevState;
   1685 
   1686 /*------------------------------------------------------------------------*/
   1687 
   1688 static void
   1689 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
   1690   /* param pos:     position where to insert
   1691      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
   1692   */
   1693 {
   1694 #define FIRSTALLOC  10
   1695     Point point;
   1696     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
   1697 
   1698     if (pInsertPoints->capacity == 0)
   1699     {
   1700         pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
   1701         if (pInsertPoints->points == NULL)
   1702         {
   1703             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1704             return;
   1705         }
   1706         pInsertPoints->capacity=FIRSTALLOC;
   1707     }
   1708     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
   1709     {
   1710         void * savePoints=pInsertPoints->points;
   1711         pInsertPoints->points=uprv_realloc(pInsertPoints->points,
   1712                                            pInsertPoints->capacity*2*sizeof(Point));
   1713         if (pInsertPoints->points == NULL)
   1714         {
   1715             pInsertPoints->points=savePoints;
   1716             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1717             return;
   1718         }
   1719         else  pInsertPoints->capacity*=2;
   1720     }
   1721     point.pos=pos;
   1722     point.flag=flag;
   1723     pInsertPoints->points[pInsertPoints->size]=point;
   1724     pInsertPoints->size++;
   1725 #undef FIRSTALLOC
   1726 }
   1727 
   1728 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
   1729 
   1730 /*
   1731  * This implementation of the (Wn) rules applies all rules in one pass.
   1732  * In order to do so, it needs a look-ahead of typically 1 character
   1733  * (except for W5: sequences of ET) and keeps track of changes
   1734  * in a rule Wp that affect a later Wq (p<q).
   1735  *
   1736  * The (Nn) and (In) rules are also performed in that same single loop,
   1737  * but effectively one iteration behind for white space.
   1738  *
   1739  * Since all implicit rules are performed in one step, it is not necessary
   1740  * to actually store the intermediate directional properties in dirProps[].
   1741  */
   1742 
   1743 static void
   1744 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
   1745                    int32_t start, int32_t limit) {
   1746     uint8_t cell, oldStateSeq, actionSeq;
   1747     const ImpTab * pImpTab=pLevState->pImpTab;
   1748     const ImpAct * pImpAct=pLevState->pImpAct;
   1749     UBiDiLevel * levels=pBiDi->levels;
   1750     UBiDiLevel level, addLevel;
   1751     InsertPoints * pInsertPoints;
   1752     int32_t start0, k;
   1753 
   1754     start0=start;                           /* save original start position */
   1755     oldStateSeq=(uint8_t)pLevState->state;
   1756     cell=(*pImpTab)[oldStateSeq][_prop];
   1757     pLevState->state=GET_STATE(cell);       /* isolate the new state */
   1758     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
   1759     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
   1760 
   1761     if(actionSeq) {
   1762         switch(actionSeq) {
   1763         case 1:                         /* init ON seq */
   1764             pLevState->startON=start0;
   1765             break;
   1766 
   1767         case 2:                         /* prepend ON seq to current seq */
   1768             start=pLevState->startON;
   1769             break;
   1770 
   1771         case 3:                         /* L or S after possible relevant EN/AN */
   1772             /* check if we had EN after R/AL */
   1773             if (pLevState->startL2EN >= 0) {
   1774                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1775             }
   1776             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
   1777             /* check if we had any relevant EN/AN after R/AL */
   1778             pInsertPoints=&(pBiDi->insertPoints);
   1779             if ((pInsertPoints->capacity == 0) ||
   1780                 (pInsertPoints->size <= pInsertPoints->confirmed))
   1781             {
   1782                 /* nothing, just clean up */
   1783                 pLevState->lastStrongRTL=-1;
   1784                 /* check if we have a pending conditional segment */
   1785                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
   1786                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
   1787                     start=pLevState->startON;   /* reset to basic run level */
   1788                 }
   1789                 if (_prop == DirProp_S)                /* add LRM before S */
   1790                 {
   1791                     addPoint(pBiDi, start0, LRM_BEFORE);
   1792                     pInsertPoints->confirmed=pInsertPoints->size;
   1793                 }
   1794                 break;
   1795             }
   1796             /* reset previous RTL cont to level for LTR text */
   1797             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
   1798             {
   1799                 /* reset odd level, leave runLevel+2 as is */
   1800                 levels[k]=(levels[k] - 2) & ~1;
   1801             }
   1802             /* mark insert points as confirmed */
   1803             pInsertPoints->confirmed=pInsertPoints->size;
   1804             pLevState->lastStrongRTL=-1;
   1805             if (_prop == DirProp_S)            /* add LRM before S */
   1806             {
   1807                 addPoint(pBiDi, start0, LRM_BEFORE);
   1808                 pInsertPoints->confirmed=pInsertPoints->size;
   1809             }
   1810             break;
   1811 
   1812         case 4:                         /* R/AL after possible relevant EN/AN */
   1813             /* just clean up */
   1814             pInsertPoints=&(pBiDi->insertPoints);
   1815             if (pInsertPoints->capacity > 0)
   1816                 /* remove all non confirmed insert points */
   1817                 pInsertPoints->size=pInsertPoints->confirmed;
   1818             pLevState->startON=-1;
   1819             pLevState->startL2EN=-1;
   1820             pLevState->lastStrongRTL=limit - 1;
   1821             break;
   1822 
   1823         case 5:                         /* EN/AN after R/AL + possible cont */
   1824             /* check for real AN */
   1825             if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
   1826                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
   1827             {
   1828                 /* real AN */
   1829                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
   1830                 {
   1831                     /* just note the righmost digit as a strong RTL */
   1832                     pLevState->lastStrongRTL=limit - 1;
   1833                     break;
   1834                 }
   1835                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
   1836                 {
   1837                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1838                     pLevState->startL2EN=-2;
   1839                 }
   1840                 /* note AN */
   1841                 addPoint(pBiDi, start0, LRM_BEFORE);
   1842                 break;
   1843             }
   1844             /* if first EN/AN after R/AL */
   1845             if (pLevState->startL2EN == -1) {
   1846                 pLevState->startL2EN=start0;
   1847             }
   1848             break;
   1849 
   1850         case 6:                         /* note location of latest R/AL */
   1851             pLevState->lastStrongRTL=limit - 1;
   1852             pLevState->startON=-1;
   1853             break;
   1854 
   1855         case 7:                         /* L after R+ON/EN/AN */
   1856             /* include possible adjacent number on the left */
   1857             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
   1858             if(k>=0) {
   1859                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
   1860                 pInsertPoints=&(pBiDi->insertPoints);
   1861                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
   1862             }
   1863             pLevState->startON=start0;
   1864             break;
   1865 
   1866         case 8:                         /* AN after L */
   1867             /* AN numbers between L text on both sides may be trouble. */
   1868             /* tentatively bracket with LRMs; will be confirmed if followed by L */
   1869             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
   1870             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
   1871             break;
   1872 
   1873         case 9:                         /* R after L+ON/EN/AN */
   1874             /* false alert, infirm LRMs around previous AN */
   1875             pInsertPoints=&(pBiDi->insertPoints);
   1876             pInsertPoints->size=pInsertPoints->confirmed;
   1877             if (_prop == DirProp_S)            /* add RLM before S */
   1878             {
   1879                 addPoint(pBiDi, start0, RLM_BEFORE);
   1880                 pInsertPoints->confirmed=pInsertPoints->size;
   1881             }
   1882             break;
   1883 
   1884         case 10:                        /* L after L+ON/AN */
   1885             level=pLevState->runLevel + addLevel;
   1886             for(k=pLevState->startON; k<start0; k++) {
   1887                 if (levels[k]<level)
   1888                     levels[k]=level;
   1889             }
   1890             pInsertPoints=&(pBiDi->insertPoints);
   1891             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
   1892             pLevState->startON=start0;
   1893             break;
   1894 
   1895         case 11:                        /* L after L+ON+EN/AN/ON */
   1896             level=pLevState->runLevel;
   1897             for(k=start0-1; k>=pLevState->startON; k--) {
   1898                 if(levels[k]==level+3) {
   1899                     while(levels[k]==level+3) {
   1900                         levels[k--]-=2;
   1901                     }
   1902                     while(levels[k]==level) {
   1903                         k--;
   1904                     }
   1905                 }
   1906                 if(levels[k]==level+2) {
   1907                     levels[k]=level;
   1908                     continue;
   1909                 }
   1910                 levels[k]=level+1;
   1911             }
   1912             break;
   1913 
   1914         case 12:                        /* R after L+ON+EN/AN/ON */
   1915             level=pLevState->runLevel+1;
   1916             for(k=start0-1; k>=pLevState->startON; k--) {
   1917                 if(levels[k]>level) {
   1918                     levels[k]-=2;
   1919                 }
   1920             }
   1921             break;
   1922 
   1923         default:                        /* we should never get here */
   1924             U_ASSERT(FALSE);
   1925             break;
   1926         }
   1927     }
   1928     if((addLevel) || (start < start0)) {
   1929         level=pLevState->runLevel + addLevel;
   1930         if(start>=pLevState->runStart) {
   1931             for(k=start; k<limit; k++) {
   1932                 levels[k]=level;
   1933             }
   1934         } else {
   1935             DirProp *dirProps=pBiDi->dirProps, dirProp;
   1936             int32_t isolateCount=0;
   1937             for(k=start; k<limit; k++) {
   1938                 dirProp=dirProps[k];
   1939                 if(dirProp==PDI)
   1940                     isolateCount--;
   1941                 if(isolateCount==0)
   1942                     levels[k]=level;
   1943                 if(dirProp==LRI || dirProp==RLI)
   1944                     isolateCount++;
   1945             }
   1946         }
   1947     }
   1948 }
   1949 
   1950 /**
   1951  * Returns the directionality of the last strong character at the end of the prologue, if any.
   1952  * Requires prologue!=null.
   1953  */
   1954 static DirProp
   1955 lastL_R_AL(UBiDi *pBiDi) {
   1956     const UChar *text=pBiDi->prologue;
   1957     int32_t length=pBiDi->proLength;
   1958     int32_t i;
   1959     UChar32 uchar;
   1960     DirProp dirProp;
   1961     for(i=length; i>0; ) {
   1962         /* i is decremented by U16_PREV */
   1963         U16_PREV(text, 0, i, uchar);
   1964         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
   1965         if(dirProp==L) {
   1966             return DirProp_L;
   1967         }
   1968         if(dirProp==R || dirProp==AL) {
   1969             return DirProp_R;
   1970         }
   1971         if(dirProp==B) {
   1972             return DirProp_ON;
   1973         }
   1974     }
   1975     return DirProp_ON;
   1976 }
   1977 
   1978 /**
   1979  * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
   1980  * Requires epilogue!=null.
   1981  */
   1982 static DirProp
   1983 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
   1984     const UChar *text=pBiDi->epilogue;
   1985     int32_t length=pBiDi->epiLength;
   1986     int32_t i;
   1987     UChar32 uchar;
   1988     DirProp dirProp;
   1989     for(i=0; i<length; ) {
   1990         /* i is incremented by U16_NEXT */
   1991         U16_NEXT(text, i, length, uchar);
   1992         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
   1993         if(dirProp==L) {
   1994             return DirProp_L;
   1995         }
   1996         if(dirProp==R || dirProp==AL) {
   1997             return DirProp_R;
   1998         }
   1999         if(dirProp==EN) {
   2000             return DirProp_EN;
   2001         }
   2002         if(dirProp==AN) {
   2003             return DirProp_AN;
   2004         }
   2005     }
   2006     return DirProp_ON;
   2007 }
   2008 
   2009 static void
   2010 resolveImplicitLevels(UBiDi *pBiDi,
   2011                       int32_t start, int32_t limit,
   2012                       DirProp sor, DirProp eor) {
   2013     const DirProp *dirProps=pBiDi->dirProps;
   2014     DirProp dirProp;
   2015     LevState levState;
   2016     int32_t i, start1, start2;
   2017     uint16_t oldStateImp, stateImp, actionImp;
   2018     uint8_t gprop, resProp, cell;
   2019     UBool inverseRTL;
   2020     DirProp nextStrongProp=R;
   2021     int32_t nextStrongPos=-1;
   2022 
   2023     /* check for RTL inverse BiDi mode */
   2024     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
   2025      * loop on the text characters from end to start.
   2026      * This would need a different properties state table (at least different
   2027      * actions) and different levels state tables (maybe very similar to the
   2028      * LTR corresponding ones.
   2029      */
   2030     inverseRTL=(UBool)
   2031         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
   2032          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
   2033           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
   2034 
   2035     /* initialize for property and levels state tables */
   2036     levState.startON=-1;
   2037     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   2038     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   2039     levState.runStart=start;
   2040     levState.runLevel=pBiDi->levels[start];
   2041     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
   2042     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
   2043     if(start==0 && pBiDi->proLength>0) {
   2044         DirProp lastStrong=lastL_R_AL(pBiDi);
   2045         if(lastStrong!=DirProp_ON) {
   2046             sor=lastStrong;
   2047         }
   2048     }
   2049     /* The isolates[] entries contain enough information to
   2050        resume the bidi algorithm in the same state as it was
   2051        when it was interrupted by an isolate sequence. */
   2052     if(dirProps[start]==PDI) {
   2053         start1=pBiDi->isolates[pBiDi->isolateCount].start1;
   2054         stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
   2055         levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
   2056         pBiDi->isolateCount--;
   2057     } else {
   2058         start1=start;
   2059         if(dirProps[start]==NSM)
   2060             stateImp = 1 + sor;
   2061         else
   2062             stateImp=0;
   2063         levState.state=0;
   2064         processPropertySeq(pBiDi, &levState, sor, start, start);
   2065     }
   2066     start2=start;
   2067 
   2068     for(i=start; i<=limit; i++) {
   2069         if(i>=limit) {
   2070             if(limit>start) {
   2071                 dirProp=pBiDi->dirProps[limit-1];
   2072                 if(dirProp==LRI || dirProp==RLI)
   2073                     break;  /* no forced closing for sequence ending with LRI/RLI */
   2074             }
   2075             gprop=eor;
   2076         } else {
   2077             DirProp prop, prop1;
   2078             prop=PURE_DIRPROP(dirProps[i]);
   2079             if(inverseRTL) {
   2080                 if(prop==AL) {
   2081                     /* AL before EN does not make it AN */
   2082                     prop=R;
   2083                 } else if(prop==EN) {
   2084                     if(nextStrongPos<=i) {
   2085                         /* look for next strong char (L/R/AL) */
   2086                         int32_t j;
   2087                         nextStrongProp=R;   /* set default */
   2088                         nextStrongPos=limit;
   2089                         for(j=i+1; j<limit; j++) {
   2090                             prop1=dirProps[j];
   2091                             if(prop1==L || prop1==R || prop1==AL) {
   2092                                 nextStrongProp=prop1;
   2093                                 nextStrongPos=j;
   2094                                 break;
   2095                             }
   2096                         }
   2097                     }
   2098                     if(nextStrongProp==AL) {
   2099                         prop=AN;
   2100                     }
   2101                 }
   2102             }
   2103             gprop=groupProp[prop];
   2104         }
   2105         oldStateImp=stateImp;
   2106         cell=impTabProps[oldStateImp][gprop];
   2107         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
   2108         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
   2109         if((i==limit) && (actionImp==0)) {
   2110             /* there is an unprocessed sequence if its property == eor   */
   2111             actionImp=1;                    /* process the last sequence */
   2112         }
   2113         if(actionImp) {
   2114             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
   2115             switch(actionImp) {
   2116             case 1:             /* process current seq1, init new seq1 */
   2117                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
   2118                 start1=i;
   2119                 break;
   2120             case 2:             /* init new seq2 */
   2121                 start2=i;
   2122                 break;
   2123             case 3:             /* process seq1, process seq2, init new seq1 */
   2124                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   2125                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
   2126                 start1=i;
   2127                 break;
   2128             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
   2129                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   2130                 start1=start2;
   2131                 start2=i;
   2132                 break;
   2133             default:            /* we should never get here */
   2134                 U_ASSERT(FALSE);
   2135                 break;
   2136             }
   2137         }
   2138     }
   2139 
   2140     /* flush possible pending sequence, e.g. ON */
   2141     if(limit==pBiDi->length && pBiDi->epiLength>0) {
   2142         DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
   2143         if(firstStrong!=DirProp_ON) {
   2144             eor=firstStrong;
   2145         }
   2146     }
   2147 
   2148     dirProp=dirProps[limit-1];
   2149     if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
   2150         pBiDi->isolateCount++;
   2151         pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
   2152         pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
   2153         pBiDi->isolates[pBiDi->isolateCount].start1=start1;
   2154     }
   2155     else
   2156         processPropertySeq(pBiDi, &levState, eor, limit, limit);
   2157 }
   2158 
   2159 /* perform (L1) and (X9) ---------------------------------------------------- */
   2160 
   2161 /*
   2162  * Reset the embedding levels for some non-graphic characters (L1).
   2163  * This function also sets appropriate levels for BN, and
   2164  * explicit embedding types that are supposed to have been removed
   2165  * from the paragraph in (X9).
   2166  */
   2167 static void
   2168 adjustWSLevels(UBiDi *pBiDi) {
   2169     const DirProp *dirProps=pBiDi->dirProps;
   2170     UBiDiLevel *levels=pBiDi->levels;
   2171     int32_t i;
   2172 
   2173     if(pBiDi->flags&MASK_WS) {
   2174         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
   2175         Flags flag;
   2176 
   2177         i=pBiDi->trailingWSStart;
   2178         while(i>0) {
   2179             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
   2180             while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) {
   2181                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   2182                     levels[i]=0;
   2183                 } else {
   2184                     levels[i]=GET_PARALEVEL(pBiDi, i);
   2185                 }
   2186             }
   2187 
   2188             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
   2189             /* here, i+1 is guaranteed to be <length */
   2190             while(i>0) {
   2191                 flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i]));
   2192                 if(flag&MASK_BN_EXPLICIT) {
   2193                     levels[i]=levels[i+1];
   2194                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   2195                     levels[i]=0;
   2196                     break;
   2197                 } else if(flag&MASK_B_S) {
   2198                     levels[i]=GET_PARALEVEL(pBiDi, i);
   2199                     break;
   2200                 }
   2201             }
   2202         }
   2203     }
   2204 }
   2205 
   2206 U_CAPI void U_EXPORT2
   2207 ubidi_setContext(UBiDi *pBiDi,
   2208                  const UChar *prologue, int32_t proLength,
   2209                  const UChar *epilogue, int32_t epiLength,
   2210                  UErrorCode *pErrorCode) {
   2211     /* check the argument values */
   2212     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2213     if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
   2214        (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
   2215         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2216         return;
   2217     }
   2218 
   2219     if(proLength==-1) {
   2220         pBiDi->proLength=u_strlen(prologue);
   2221     } else {
   2222         pBiDi->proLength=proLength;
   2223     }
   2224     if(epiLength==-1) {
   2225         pBiDi->epiLength=u_strlen(epilogue);
   2226     } else {
   2227         pBiDi->epiLength=epiLength;
   2228     }
   2229     pBiDi->prologue=prologue;
   2230     pBiDi->epilogue=epilogue;
   2231 }
   2232 
   2233 static void
   2234 setParaSuccess(UBiDi *pBiDi) {
   2235     pBiDi->proLength=0;                 /* forget the last context */
   2236     pBiDi->epiLength=0;
   2237     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
   2238 }
   2239 
   2240 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
   2241 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
   2242 
   2243 static void
   2244 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
   2245                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
   2246     void *runsOnlyMemory;
   2247     int32_t *visualMap;
   2248     UChar *visualText;
   2249     int32_t saveLength, saveTrailingWSStart;
   2250     const UBiDiLevel *levels;
   2251     UBiDiLevel *saveLevels;
   2252     UBiDiDirection saveDirection;
   2253     UBool saveMayAllocateText;
   2254     Run *runs;
   2255     int32_t visualLength, i, j, visualStart, logicalStart,
   2256             runCount, runLength, addedRuns, insertRemove,
   2257             start, limit, step, indexOddBit, logicalPos,
   2258             index0, index1;
   2259     uint32_t saveOptions;
   2260 
   2261     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
   2262     if(length==0) {
   2263         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
   2264         goto cleanup3;
   2265     }
   2266     /* obtain memory for mapping table and visual text */
   2267     runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
   2268     if(runsOnlyMemory==NULL) {
   2269         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2270         goto cleanup3;
   2271     }
   2272     visualMap=runsOnlyMemory;
   2273     visualText=(UChar *)&visualMap[length];
   2274     saveLevels=(UBiDiLevel *)&visualText[length];
   2275     saveOptions=pBiDi->reorderingOptions;
   2276     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
   2277         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
   2278         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
   2279     }
   2280     paraLevel&=1;                       /* accept only 0 or 1 */
   2281     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
   2282     if(U_FAILURE(*pErrorCode)) {
   2283         goto cleanup3;
   2284     }
   2285     /* we cannot access directly pBiDi->levels since it is not yet set if
   2286      * direction is not MIXED
   2287      */
   2288     levels=ubidi_getLevels(pBiDi, pErrorCode);
   2289     uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
   2290     saveTrailingWSStart=pBiDi->trailingWSStart;
   2291     saveLength=pBiDi->length;
   2292     saveDirection=pBiDi->direction;
   2293 
   2294     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
   2295      * the visual map and the dirProps array to drive the second call
   2296      * to ubidi_setPara (but must make provision for possible removal of
   2297      * BiDi controls.  Alternatively, only use the dirProps array via
   2298      * customized classifier callback.
   2299      */
   2300     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
   2301                                       UBIDI_DO_MIRRORING, pErrorCode);
   2302     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
   2303     if(U_FAILURE(*pErrorCode)) {
   2304         goto cleanup2;
   2305     }
   2306     pBiDi->reorderingOptions=saveOptions;
   2307 
   2308     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
   2309     paraLevel^=1;
   2310     /* Because what we did with reorderingOptions, visualText may be shorter
   2311      * than the original text. But we don't want the levels memory to be
   2312      * reallocated shorter than the original length, since we need to restore
   2313      * the levels as after the first call to ubidi_setpara() before returning.
   2314      * We will force mayAllocateText to FALSE before the second call to
   2315      * ubidi_setpara(), and will restore it afterwards.
   2316      */
   2317     saveMayAllocateText=pBiDi->mayAllocateText;
   2318     pBiDi->mayAllocateText=FALSE;
   2319     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
   2320     pBiDi->mayAllocateText=saveMayAllocateText;
   2321     ubidi_getRuns(pBiDi, pErrorCode);
   2322     if(U_FAILURE(*pErrorCode)) {
   2323         goto cleanup1;
   2324     }
   2325     /* check if some runs must be split, count how many splits */
   2326     addedRuns=0;
   2327     runCount=pBiDi->runCount;
   2328     runs=pBiDi->runs;
   2329     visualStart=0;
   2330     for(i=0; i<runCount; i++, visualStart+=runLength) {
   2331         runLength=runs[i].visualLimit-visualStart;
   2332         if(runLength<2) {
   2333             continue;
   2334         }
   2335         logicalStart=GET_INDEX(runs[i].logicalStart);
   2336         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
   2337             index0=visualMap[j];
   2338             index1=visualMap[j-1];
   2339             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   2340                 addedRuns++;
   2341             }
   2342         }
   2343     }
   2344     if(addedRuns) {
   2345         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
   2346             if(runCount==1) {
   2347                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
   2348                 pBiDi->runsMemory[0]=runs[0];
   2349             }
   2350             runs=pBiDi->runs=pBiDi->runsMemory;
   2351             pBiDi->runCount+=addedRuns;
   2352         } else {
   2353             goto cleanup1;
   2354         }
   2355     }
   2356     /* split runs which are not consecutive in source text */
   2357     for(i=runCount-1; i>=0; i--) {
   2358         runLength= i==0 ? runs[0].visualLimit :
   2359                           runs[i].visualLimit-runs[i-1].visualLimit;
   2360         logicalStart=runs[i].logicalStart;
   2361         indexOddBit=GET_ODD_BIT(logicalStart);
   2362         logicalStart=GET_INDEX(logicalStart);
   2363         if(runLength<2) {
   2364             if(addedRuns) {
   2365                 runs[i+addedRuns]=runs[i];
   2366             }
   2367             logicalPos=visualMap[logicalStart];
   2368             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   2369                                             saveLevels[logicalPos]^indexOddBit);
   2370             continue;
   2371         }
   2372         if(indexOddBit) {
   2373             start=logicalStart;
   2374             limit=logicalStart+runLength-1;
   2375             step=1;
   2376         } else {
   2377             start=logicalStart+runLength-1;
   2378             limit=logicalStart;
   2379             step=-1;
   2380         }
   2381         for(j=start; j!=limit; j+=step) {
   2382             index0=visualMap[j];
   2383             index1=visualMap[j+step];
   2384             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   2385                 logicalPos=BIDI_MIN(visualMap[start], index0);
   2386                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   2387                                             saveLevels[logicalPos]^indexOddBit);
   2388                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
   2389                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
   2390                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
   2391                 runs[i+addedRuns].insertRemove=insertRemove;
   2392                 runs[i].insertRemove&=~insertRemove;
   2393                 start=j+step;
   2394                 addedRuns--;
   2395             }
   2396         }
   2397         if(addedRuns) {
   2398             runs[i+addedRuns]=runs[i];
   2399         }
   2400         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
   2401         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   2402                                             saveLevels[logicalPos]^indexOddBit);
   2403     }
   2404 
   2405   cleanup1:
   2406     /* restore initial paraLevel */
   2407     pBiDi->paraLevel^=1;
   2408   cleanup2:
   2409     /* restore real text */
   2410     pBiDi->text=text;
   2411     pBiDi->length=saveLength;
   2412     pBiDi->originalLength=length;
   2413     pBiDi->direction=saveDirection;
   2414     /* the saved levels should never excess levelsSize, but we check anyway */
   2415     if(saveLength>pBiDi->levelsSize) {
   2416         saveLength=pBiDi->levelsSize;
   2417     }
   2418     uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
   2419     pBiDi->trailingWSStart=saveTrailingWSStart;
   2420     /* free memory for mapping table and visual text */
   2421     uprv_free(runsOnlyMemory);
   2422     if(pBiDi->runCount>1) {
   2423         pBiDi->direction=UBIDI_MIXED;
   2424     }
   2425   cleanup3:
   2426     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
   2427 }
   2428 
   2429 /* ubidi_setPara ------------------------------------------------------------ */
   2430 
   2431 U_CAPI void U_EXPORT2
   2432 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
   2433               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
   2434               UErrorCode *pErrorCode) {
   2435     UBiDiDirection direction;
   2436 
   2437     /* check the argument values */
   2438     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2439     if(pBiDi==NULL || text==NULL || length<-1 ||
   2440        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
   2441         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2442         return;
   2443     }
   2444 
   2445     if(length==-1) {
   2446         length=u_strlen(text);
   2447     }
   2448 
   2449     /* special treatment for RUNS_ONLY mode */
   2450     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
   2451         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
   2452         return;
   2453     }
   2454 
   2455     /* initialize the UBiDi structure */
   2456     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
   2457     pBiDi->text=text;
   2458     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
   2459     pBiDi->paraLevel=paraLevel;
   2460     pBiDi->direction=paraLevel&1;
   2461     pBiDi->paraCount=1;
   2462 
   2463     pBiDi->dirProps=NULL;
   2464     pBiDi->levels=NULL;
   2465     pBiDi->runs=NULL;
   2466     pBiDi->insertPoints.size=0;         /* clean up from last call */
   2467     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
   2468 
   2469     /*
   2470      * Save the original paraLevel if contextual; otherwise, set to 0.
   2471      */
   2472     pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
   2473 
   2474     if(length==0) {
   2475         /*
   2476          * For an empty paragraph, create a UBiDi object with the paraLevel and
   2477          * the flags and the direction set but without allocating zero-length arrays.
   2478          * There is nothing more to do.
   2479          */
   2480         if(IS_DEFAULT_LEVEL(paraLevel)) {
   2481             pBiDi->paraLevel&=1;
   2482             pBiDi->defaultParaLevel=0;
   2483         }
   2484         pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
   2485         pBiDi->runCount=0;
   2486         pBiDi->paraCount=0;
   2487         setParaSuccess(pBiDi);          /* mark successful setPara */
   2488         return;
   2489     }
   2490 
   2491     pBiDi->runCount=-1;
   2492 
   2493     /* allocate paras memory */
   2494     if(pBiDi->parasMemory)
   2495         pBiDi->paras=pBiDi->parasMemory;
   2496     else
   2497         pBiDi->paras=pBiDi->simpleParas;
   2498 
   2499     /*
   2500      * Get the directional properties,
   2501      * the flags bit-set, and
   2502      * determine the paragraph level if necessary.
   2503      */
   2504     if(getDirPropsMemory(pBiDi, length)) {
   2505         pBiDi->dirProps=pBiDi->dirPropsMemory;
   2506         if(!getDirProps(pBiDi)) {
   2507             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2508             return;
   2509         }
   2510     } else {
   2511         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2512         return;
   2513     }
   2514     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
   2515     length= pBiDi->length;
   2516     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
   2517 
   2518     /* are explicit levels specified? */
   2519     if(embeddingLevels==NULL) {
   2520         /* no: determine explicit levels according to the (Xn) rules */\
   2521         if(getLevelsMemory(pBiDi, length)) {
   2522             pBiDi->levels=pBiDi->levelsMemory;
   2523             direction=resolveExplicitLevels(pBiDi, pErrorCode);
   2524             if(U_FAILURE(*pErrorCode)) {
   2525                 return;
   2526             }
   2527         } else {
   2528             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2529             return;
   2530         }
   2531     } else {
   2532         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
   2533         pBiDi->levels=embeddingLevels;
   2534         direction=checkExplicitLevels(pBiDi, pErrorCode);
   2535         if(U_FAILURE(*pErrorCode)) {
   2536             return;
   2537         }
   2538     }
   2539 
   2540     /* allocate isolate memory */
   2541     if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE)
   2542         pBiDi->isolates=pBiDi->simpleIsolates;
   2543     else
   2544         if(pBiDi->isolateCount<=pBiDi->isolatesSize)
   2545             pBiDi->isolates=pBiDi->isolatesMemory;
   2546         else {
   2547             if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
   2548                 pBiDi->isolates=pBiDi->isolatesMemory;
   2549             } else {
   2550                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2551                 return;
   2552             }
   2553         }
   2554     pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
   2555 
   2556     /*
   2557      * The steps after (X9) in the UBiDi algorithm are performed only if
   2558      * the paragraph text has mixed directionality!
   2559      */
   2560     pBiDi->direction=direction;
   2561     switch(direction) {
   2562     case UBIDI_LTR:
   2563         /* make sure paraLevel is even */
   2564         pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
   2565 
   2566         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   2567         pBiDi->trailingWSStart=0;
   2568         break;
   2569     case UBIDI_RTL:
   2570         /* make sure paraLevel is odd */
   2571         pBiDi->paraLevel|=1;
   2572 
   2573         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   2574         pBiDi->trailingWSStart=0;
   2575         break;
   2576     default:
   2577         /*
   2578          *  Choose the right implicit state table
   2579          */
   2580         switch(pBiDi->reorderingMode) {
   2581         case UBIDI_REORDER_DEFAULT:
   2582             pBiDi->pImpTabPair=&impTab_DEFAULT;
   2583             break;
   2584         case UBIDI_REORDER_NUMBERS_SPECIAL:
   2585             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
   2586             break;
   2587         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
   2588             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
   2589             break;
   2590         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
   2591             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
   2592             break;
   2593         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
   2594             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   2595                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
   2596             } else {
   2597                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
   2598             }
   2599             break;
   2600         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
   2601             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   2602                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
   2603             } else {
   2604                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
   2605             }
   2606             break;
   2607         default:
   2608             /* we should never get here */
   2609             U_ASSERT(FALSE);
   2610             break;
   2611         }
   2612         /*
   2613          * If there are no external levels specified and there
   2614          * are no significant explicit level codes in the text,
   2615          * then we can treat the entire paragraph as one run.
   2616          * Otherwise, we need to perform the following rules on runs of
   2617          * the text with the same embedding levels. (X10)
   2618          * "Significant" explicit level codes are ones that actually
   2619          * affect non-BN characters.
   2620          * Examples for "insignificant" ones are empty embeddings
   2621          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
   2622          */
   2623         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
   2624                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
   2625             resolveImplicitLevels(pBiDi, 0, length,
   2626                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
   2627                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
   2628         } else {
   2629             /* sor, eor: start and end types of same-level-run */
   2630             UBiDiLevel *levels=pBiDi->levels;
   2631             int32_t start, limit=0;
   2632             UBiDiLevel level, nextLevel;
   2633             DirProp sor, eor;
   2634 
   2635             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
   2636             level=GET_PARALEVEL(pBiDi, 0);
   2637             nextLevel=levels[0];
   2638             if(level<nextLevel) {
   2639                 eor=GET_LR_FROM_LEVEL(nextLevel);
   2640             } else {
   2641                 eor=GET_LR_FROM_LEVEL(level);
   2642             }
   2643 
   2644             do {
   2645                 /* determine start and limit of the run (end points just behind the run) */
   2646 
   2647                 /* the values for this run's start are the same as for the previous run's end */
   2648                 start=limit;
   2649                 level=nextLevel;
   2650                 if((start>0) && (pBiDi->dirProps[start-1]==B)) {
   2651                     /* except if this is a new paragraph, then set sor = para level */
   2652                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
   2653                 } else {
   2654                     sor=eor;
   2655                 }
   2656 
   2657                 /* search for the limit of this run */
   2658                 while(++limit<length && levels[limit]==level) {}
   2659 
   2660                 /* get the correct level of the next run */
   2661                 if(limit<length) {
   2662                     nextLevel=levels[limit];
   2663                 } else {
   2664                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
   2665                 }
   2666 
   2667                 /* determine eor from max(level, nextLevel); sor is last run's eor */
   2668                 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
   2669                     eor=GET_LR_FROM_LEVEL(nextLevel);
   2670                 } else {
   2671                     eor=GET_LR_FROM_LEVEL(level);
   2672                 }
   2673 
   2674                 /* if the run consists of overridden directional types, then there
   2675                    are no implicit types to be resolved */
   2676                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
   2677                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
   2678                 } else {
   2679                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
   2680                     do {
   2681                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
   2682                     } while(start<limit);
   2683                 }
   2684             } while(limit<length);
   2685         }
   2686         /* check if we got any memory shortage while adding insert points */
   2687         if (U_FAILURE(pBiDi->insertPoints.errorCode))
   2688         {
   2689             *pErrorCode=pBiDi->insertPoints.errorCode;
   2690             return;
   2691         }
   2692         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
   2693         adjustWSLevels(pBiDi);
   2694         break;
   2695     }
   2696     /* add RLM for inverse Bidi with contextual orientation resolving
   2697      * to RTL which would not round-trip otherwise
   2698      */
   2699     if((pBiDi->defaultParaLevel>0) &&
   2700        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
   2701        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
   2702         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
   2703         int32_t i, j, start, last;
   2704         UBiDiLevel level;
   2705         DirProp dirProp;
   2706         for(i=0; i<pBiDi->paraCount; i++) {
   2707             last=(pBiDi->paras[i].limit)-1;
   2708             level=pBiDi->paras[i].level;
   2709             if(level==0)
   2710                 continue;           /* LTR paragraph */
   2711             start= i==0 ? 0 : pBiDi->paras[i-1].limit;
   2712             for(j=last; j>=start; j--) {
   2713                 dirProp=pBiDi->dirProps[j];
   2714                 if(dirProp==L) {
   2715                     if(j<last) {
   2716                         while(pBiDi->dirProps[last]==B) {
   2717                             last--;
   2718                         }
   2719                     }
   2720                     addPoint(pBiDi, last, RLM_BEFORE);
   2721                     break;
   2722                 }
   2723                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
   2724                     break;
   2725                 }
   2726             }
   2727         }
   2728     }
   2729 
   2730     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
   2731         pBiDi->resultLength -= pBiDi->controlCount;
   2732     } else {
   2733         pBiDi->resultLength += pBiDi->insertPoints.size;
   2734     }
   2735     setParaSuccess(pBiDi);              /* mark successful setPara */
   2736 }
   2737 
   2738 U_CAPI void U_EXPORT2
   2739 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
   2740     if(pBiDi!=NULL) {
   2741         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
   2742     }
   2743 }
   2744 
   2745 U_CAPI UBool U_EXPORT2
   2746 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
   2747     if(pBiDi!=NULL) {
   2748         return pBiDi->orderParagraphsLTR;
   2749     } else {
   2750         return FALSE;
   2751     }
   2752 }
   2753 
   2754 U_CAPI UBiDiDirection U_EXPORT2
   2755 ubidi_getDirection(const UBiDi *pBiDi) {
   2756     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2757         return pBiDi->direction;
   2758     } else {
   2759         return UBIDI_LTR;
   2760     }
   2761 }
   2762 
   2763 U_CAPI const UChar * U_EXPORT2
   2764 ubidi_getText(const UBiDi *pBiDi) {
   2765     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2766         return pBiDi->text;
   2767     } else {
   2768         return NULL;
   2769     }
   2770 }
   2771 
   2772 U_CAPI int32_t U_EXPORT2
   2773 ubidi_getLength(const UBiDi *pBiDi) {
   2774     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2775         return pBiDi->originalLength;
   2776     } else {
   2777         return 0;
   2778     }
   2779 }
   2780 
   2781 U_CAPI int32_t U_EXPORT2
   2782 ubidi_getProcessedLength(const UBiDi *pBiDi) {
   2783     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2784         return pBiDi->length;
   2785     } else {
   2786         return 0;
   2787     }
   2788 }
   2789 
   2790 U_CAPI int32_t U_EXPORT2
   2791 ubidi_getResultLength(const UBiDi *pBiDi) {
   2792     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2793         return pBiDi->resultLength;
   2794     } else {
   2795         return 0;
   2796     }
   2797 }
   2798 
   2799 /* paragraphs API functions ------------------------------------------------- */
   2800 
   2801 U_CAPI UBiDiLevel U_EXPORT2
   2802 ubidi_getParaLevel(const UBiDi *pBiDi) {
   2803     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2804         return pBiDi->paraLevel;
   2805     } else {
   2806         return 0;
   2807     }
   2808 }
   2809 
   2810 U_CAPI int32_t U_EXPORT2
   2811 ubidi_countParagraphs(UBiDi *pBiDi) {
   2812     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
   2813         return 0;
   2814     } else {
   2815         return pBiDi->paraCount;
   2816     }
   2817 }
   2818 
   2819 U_CAPI void U_EXPORT2
   2820 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
   2821                           int32_t *pParaStart, int32_t *pParaLimit,
   2822                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2823     int32_t paraStart;
   2824 
   2825     /* check the argument values */
   2826     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2827     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
   2828     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
   2829 
   2830     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2831     if(paraIndex) {
   2832         paraStart=pBiDi->paras[paraIndex-1].limit;
   2833     } else {
   2834         paraStart=0;
   2835     }
   2836     if(pParaStart!=NULL) {
   2837         *pParaStart=paraStart;
   2838     }
   2839     if(pParaLimit!=NULL) {
   2840         *pParaLimit=pBiDi->paras[paraIndex].limit;
   2841     }
   2842     if(pParaLevel!=NULL) {
   2843         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
   2844     }
   2845 }
   2846 
   2847 U_CAPI int32_t U_EXPORT2
   2848 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
   2849                           int32_t *pParaStart, int32_t *pParaLimit,
   2850                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2851     int32_t paraIndex;
   2852 
   2853     /* check the argument values */
   2854     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
   2855     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
   2856     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
   2857     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2858     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
   2859 
   2860     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
   2861     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
   2862     return paraIndex;
   2863 }
   2864 
   2865 U_CAPI void U_EXPORT2
   2866 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
   2867                        const void *newContext, UBiDiClassCallback **oldFn,
   2868                        const void **oldContext, UErrorCode *pErrorCode)
   2869 {
   2870     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2871     if(pBiDi==NULL) {
   2872         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2873         return;
   2874     }
   2875     if( oldFn )
   2876     {
   2877         *oldFn = pBiDi->fnClassCallback;
   2878     }
   2879     if( oldContext )
   2880     {
   2881         *oldContext = pBiDi->coClassCallback;
   2882     }
   2883     pBiDi->fnClassCallback = newFn;
   2884     pBiDi->coClassCallback = newContext;
   2885 }
   2886 
   2887 U_CAPI void U_EXPORT2
   2888 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
   2889 {
   2890     if(pBiDi==NULL) {
   2891         return;
   2892     }
   2893     if( fn )
   2894     {
   2895         *fn = pBiDi->fnClassCallback;
   2896     }
   2897     if( context )
   2898     {
   2899         *context = pBiDi->coClassCallback;
   2900     }
   2901 }
   2902 
   2903 U_CAPI UCharDirection U_EXPORT2
   2904 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
   2905 {
   2906     UCharDirection dir;
   2907 
   2908     if( pBiDi->fnClassCallback == NULL ||
   2909         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
   2910     {
   2911         dir = ubidi_getClass(pBiDi->bdp, c);
   2912     }
   2913     if(dir >= U_CHAR_DIRECTION_COUNT) {
   2914         dir = ON;
   2915     }
   2916     return dir;
   2917 }
   2918