Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  ubidi.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999jul27
     14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
     15 */
     16 
     17 #include "cmemory.h"
     18 #include "unicode/utypes.h"
     19 #include "unicode/ustring.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/ubidi.h"
     22 #include "ubidi_props.h"
     23 #include "ubidiimp.h"
     24 #include "uassert.h"
     25 
     26 /*
     27  * General implementation notes:
     28  *
     29  * Throughout the implementation, there are comments like (W2) that refer to
     30  * rules of the BiDi algorithm in its version 5, in this example to the second
     31  * rule of the resolution of weak types.
     32  *
     33  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
     34  * character according to UTF-16, the second UChar gets the directional property of
     35  * the entire character assigned, while the first one gets a BN, a boundary
     36  * neutral, type, which is ignored by most of the algorithm according to
     37  * rule (X9) and the implementation suggestions of the BiDi algorithm.
     38  *
     39  * Later, adjustWSLevels() will set the level for each BN to that of the
     40  * following character (UChar), which results in surrogate pairs getting the
     41  * same level on each of their surrogates.
     42  *
     43  * In a UTF-8 implementation, the same thing could be done: the last byte of
     44  * a multi-byte sequence would get the "real" property, while all previous
     45  * bytes of that sequence would get BN.
     46  *
     47  * It is not possible to assign all those parts of a character the same real
     48  * property because this would fail in the resolution of weak types with rules
     49  * that look at immediately surrounding types.
     50  *
     51  * As a related topic, this implementation does not remove Boundary Neutral
     52  * types from the input, but ignores them wherever this is relevant.
     53  * For example, the loop for the resolution of the weak types reads
     54  * types until it finds a non-BN.
     55  * Also, explicit embedding codes are neither changed into BN nor removed.
     56  * They are only treated the same way real BNs are.
     57  * As stated before, adjustWSLevels() takes care of them at the end.
     58  * For the purpose of conformance, the levels of all these codes
     59  * do not matter.
     60  *
     61  * Note that this implementation never modifies the dirProps
     62  * after the initial setup.
     63  *
     64  *
     65  * In this implementation, the resolution of weak types (Wn),
     66  * neutrals (Nn), and the assignment of the resolved level (In)
     67  * are all done in one single loop, in resolveImplicitLevels().
     68  * Changes of dirProp values are done on the fly, without writing
     69  * them back to the dirProps array.
     70  *
     71  *
     72  * This implementation contains code that allows to bypass steps of the
     73  * algorithm that are not needed on the specific paragraph
     74  * in order to speed up the most common cases considerably,
     75  * like text that is entirely LTR, or RTL text without numbers.
     76  *
     77  * Most of this is done by setting a bit for each directional property
     78  * in a flags variable and later checking for whether there are
     79  * any LTR characters or any RTL characters, or both, whether
     80  * there are any explicit embedding codes, etc.
     81  *
     82  * If the (Xn) steps are performed, then the flags are re-evaluated,
     83  * because they will then not contain the embedding codes any more
     84  * and will be adjusted for override codes, so that subsequently
     85  * more bypassing may be possible than what the initial flags suggested.
     86  *
     87  * If the text is not mixed-directional, then the
     88  * algorithm steps for the weak type resolution are not performed,
     89  * and all levels are set to the paragraph level.
     90  *
     91  * If there are no explicit embedding codes, then the (Xn) steps
     92  * are not performed.
     93  *
     94  * If embedding levels are supplied as a parameter, then all
     95  * explicit embedding codes are ignored, and the (Xn) steps
     96  * are not performed.
     97  *
     98  * White Space types could get the level of the run they belong to,
     99  * and are checked with a test of (flags&MASK_EMBEDDING) to
    100  * consider if the paragraph direction should be considered in
    101  * the flags variable.
    102  *
    103  * If there are no White Space types in the paragraph, then
    104  * (L1) is not necessary in adjustWSLevels().
    105  */
    106 
    107 /* to avoid some conditional statements, use tiny constant arrays */
    108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
    109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
    110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
    111 
    112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
    113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
    114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
    115 
    116 /* UBiDi object management -------------------------------------------------- */
    117 
    118 U_CAPI UBiDi * U_EXPORT2
    119 ubidi_open(void)
    120 {
    121     UErrorCode errorCode=U_ZERO_ERROR;
    122     return ubidi_openSized(0, 0, &errorCode);
    123 }
    124 
    125 U_CAPI UBiDi * U_EXPORT2
    126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
    127     UBiDi *pBiDi;
    128 
    129     /* check the argument values */
    130     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    131         return NULL;
    132     } else if(maxLength<0 || maxRunCount<0) {
    133         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    134         return NULL;    /* invalid arguments */
    135     }
    136 
    137     /* allocate memory for the object */
    138     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
    139     if(pBiDi==NULL) {
    140         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    141         return NULL;
    142     }
    143 
    144     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
    145     uprv_memset(pBiDi, 0, sizeof(UBiDi));
    146 
    147     /* get BiDi properties */
    148     pBiDi->bdp=ubidi_getSingleton();
    149 
    150     /* allocate memory for arrays as requested */
    151     if(maxLength>0) {
    152         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
    153             !getInitialLevelsMemory(pBiDi, maxLength)
    154         ) {
    155             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    156         }
    157     } else {
    158         pBiDi->mayAllocateText=TRUE;
    159     }
    160 
    161     if(maxRunCount>0) {
    162         if(maxRunCount==1) {
    163             /* use simpleRuns[] */
    164             pBiDi->runsSize=sizeof(Run);
    165         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
    166             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    167         }
    168     } else {
    169         pBiDi->mayAllocateRuns=TRUE;
    170     }
    171 
    172     if(U_SUCCESS(*pErrorCode)) {
    173         return pBiDi;
    174     } else {
    175         ubidi_close(pBiDi);
    176         return NULL;
    177     }
    178 }
    179 
    180 /*
    181  * We are allowed to allocate memory if memory==NULL or
    182  * mayAllocate==TRUE for each array that we need.
    183  * We also try to grow memory as needed if we
    184  * allocate it.
    185  *
    186  * Assume sizeNeeded>0.
    187  * If *pMemory!=NULL, then assume *pSize>0.
    188  *
    189  * ### this realloc() may unnecessarily copy the old data,
    190  * which we know we don't need any more;
    191  * is this the best way to do this??
    192  */
    193 U_CFUNC UBool
    194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
    195     void **pMemory = (void **)bidiMem;
    196     /* check for existing memory */
    197     if(*pMemory==NULL) {
    198         /* we need to allocate memory */
    199         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
    200             *pSize=sizeNeeded;
    201             return TRUE;
    202         } else {
    203             return FALSE;
    204         }
    205     } else {
    206         if(sizeNeeded<=*pSize) {
    207             /* there is already enough memory */
    208             return TRUE;
    209         }
    210         else if(!mayAllocate) {
    211             /* not enough memory, and we must not allocate */
    212             return FALSE;
    213         } else {
    214             /* we try to grow */
    215             void *memory;
    216             /* in most cases, we do not need the copy-old-data part of
    217              * realloc, but it is needed when adding runs using getRunsMemory()
    218              * in setParaRunsOnly()
    219              */
    220             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
    221                 *pMemory=memory;
    222                 *pSize=sizeNeeded;
    223                 return TRUE;
    224             } else {
    225                 /* we failed to grow */
    226                 return FALSE;
    227             }
    228         }
    229     }
    230 }
    231 
    232 U_CAPI void U_EXPORT2
    233 ubidi_close(UBiDi *pBiDi) {
    234     if(pBiDi!=NULL) {
    235         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
    236         if(pBiDi->dirPropsMemory!=NULL) {
    237             uprv_free(pBiDi->dirPropsMemory);
    238         }
    239         if(pBiDi->levelsMemory!=NULL) {
    240             uprv_free(pBiDi->levelsMemory);
    241         }
    242         if(pBiDi->runsMemory!=NULL) {
    243             uprv_free(pBiDi->runsMemory);
    244         }
    245         if(pBiDi->parasMemory!=NULL) {
    246             uprv_free(pBiDi->parasMemory);
    247         }
    248         if(pBiDi->insertPoints.points!=NULL) {
    249             uprv_free(pBiDi->insertPoints.points);
    250         }
    251 
    252         uprv_free(pBiDi);
    253     }
    254 }
    255 
    256 /* set to approximate "inverse BiDi" ---------------------------------------- */
    257 
    258 U_CAPI void U_EXPORT2
    259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
    260     if(pBiDi!=NULL) {
    261         pBiDi->isInverse=isInverse;
    262         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
    263                                           : UBIDI_REORDER_DEFAULT;
    264     }
    265 }
    266 
    267 U_CAPI UBool U_EXPORT2
    268 ubidi_isInverse(UBiDi *pBiDi) {
    269     if(pBiDi!=NULL) {
    270         return pBiDi->isInverse;
    271     } else {
    272         return FALSE;
    273     }
    274 }
    275 
    276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
    277  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
    278  * concept of RUNS_ONLY which is a double operation.
    279  * It could be advantageous to divide this into 3 concepts:
    280  * a) Operation: direct / inverse / RUNS_ONLY
    281  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
    282  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
    283  * This would allow combinations not possible today like RUNS_ONLY with
    284  * NUMBERS_SPECIAL.
    285  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
    286  * REMOVE_CONTROLS for the inverse step.
    287  * Not all combinations would be supported, and probably not all do make sense.
    288  * This would need to document which ones are supported and what are the
    289  * fallbacks for unsupported combinations.
    290  */
    291 U_CAPI void U_EXPORT2
    292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
    293     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
    294                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
    295         pBiDi->reorderingMode = reorderingMode;
    296         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
    297     }
    298 }
    299 
    300 U_CAPI UBiDiReorderingMode U_EXPORT2
    301 ubidi_getReorderingMode(UBiDi *pBiDi) {
    302     if (pBiDi!=NULL) {
    303         return pBiDi->reorderingMode;
    304     } else {
    305         return UBIDI_REORDER_DEFAULT;
    306     }
    307 }
    308 
    309 U_CAPI void U_EXPORT2
    310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
    311     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
    312         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
    313     }
    314     if (pBiDi!=NULL) {
    315         pBiDi->reorderingOptions=reorderingOptions;
    316     }
    317 }
    318 
    319 U_CAPI uint32_t U_EXPORT2
    320 ubidi_getReorderingOptions(UBiDi *pBiDi) {
    321     if (pBiDi!=NULL) {
    322         return pBiDi->reorderingOptions;
    323     } else {
    324         return 0;
    325     }
    326 }
    327 
    328 U_CAPI UBiDiDirection U_EXPORT2
    329 ubidi_getBaseDirection(const UChar *text,
    330 int32_t length){
    331 
    332     int32_t i;
    333     UChar32 uchar;
    334     UCharDirection dir;
    335 
    336     if( text==NULL || length<-1 ){
    337         return UBIDI_NEUTRAL;
    338     }
    339 
    340     if(length==-1) {
    341         length=u_strlen(text);
    342     }
    343 
    344     for( i = 0 ; i < length; ) {
    345         /* i is incremented by U16_NEXT */
    346         U16_NEXT(text, i, length, uchar);
    347         dir = u_charDirection(uchar);
    348         if( dir == U_LEFT_TO_RIGHT )
    349                 return UBIDI_LTR;
    350         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
    351                 return UBIDI_RTL;
    352     }
    353     return UBIDI_NEUTRAL;
    354 }
    355 
    356 /* perform (P2)..(P3) ------------------------------------------------------- */
    357 
    358 static DirProp
    359 firstL_R_AL(UBiDi *pBiDi) {
    360     /* return first strong char after the last B in prologue if any */
    361     const UChar *text=pBiDi->prologue;
    362     int32_t length=pBiDi->proLength;
    363     int32_t i;
    364     UChar32 uchar;
    365     DirProp dirProp, result=ON;
    366     for(i=0; i<length; ) {
    367         /* i is incremented by U16_NEXT */
    368         U16_NEXT(text, i, length, uchar);
    369         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
    370         if(result==ON) {
    371             if(dirProp==L || dirProp==R || dirProp==AL) {
    372                 result=dirProp;
    373             }
    374         } else {
    375             if(dirProp==B) {
    376                 result=ON;
    377             }
    378         }
    379     }
    380     return result;
    381 }
    382 
    383 /*
    384  * Get the directional properties for the text,
    385  * calculate the flags bit-set, and
    386  * determine the paragraph level if necessary.
    387  */
    388 static void
    389 getDirProps(UBiDi *pBiDi) {
    390     const UChar *text=pBiDi->text;
    391     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
    392 
    393     int32_t i=0, i1, length=pBiDi->originalLength;
    394     Flags flags=0;      /* collect all directionalities in the text */
    395     UChar32 uchar;
    396     DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
    397     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
    398     /* for inverse BiDi, the default para level is set to RTL if there is a
    399        strong R or AL character at either end of the text                           */
    400     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
    401             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
    402              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
    403     int32_t lastArabicPos=-1;
    404     int32_t controlCount=0;
    405     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
    406                                        UBIDI_OPTION_REMOVE_CONTROLS);
    407 
    408     typedef enum {
    409          NOT_CONTEXTUAL,                /* 0: not contextual paraLevel */
    410          LOOKING_FOR_STRONG,            /* 1: looking for first strong char */
    411          FOUND_STRONG_CHAR              /* 2: found first strong char       */
    412     } State;
    413     State state;
    414     int32_t paraStart=0;                /* index of first char in paragraph */
    415     DirProp paraDir;                    /* == CONTEXT_RTL within paragraphs
    416                                            starting with strong R char      */
    417     DirProp lastStrongDir=0;            /* for default level & inverse BiDi */
    418     int32_t lastStrongLTR=0;            /* for STREAMING option             */
    419 
    420     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    421         pBiDi->length=0;
    422         lastStrongLTR=0;
    423     }
    424     if(isDefaultLevel) {
    425         DirProp lastStrong;
    426         paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
    427         if(pBiDi->proLength>0 &&
    428            (lastStrong=firstL_R_AL(pBiDi))!=ON) {
    429             paraDir=(lastStrong==L) ? 0 : CONTEXT_RTL;
    430             state=FOUND_STRONG_CHAR;
    431         } else {
    432             paraDir=paraDirDefault;
    433             state=LOOKING_FOR_STRONG;
    434         }
    435         lastStrongDir=paraDir;
    436     } else {
    437         state=NOT_CONTEXTUAL;
    438         paraDir=0;
    439     }
    440     /* count paragraphs and determine the paragraph level (P2..P3) */
    441     /*
    442      * see comment in ubidi.h:
    443      * the DEFAULT_XXX values are designed so that
    444      * their bit 0 alone yields the intended default
    445      */
    446     for( /* i=0 above */ ; i<length; ) {
    447         /* i is incremented by U16_NEXT */
    448         U16_NEXT(text, i, length, uchar);
    449         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
    450         dirProps[i-1]=dirProp|paraDir;
    451         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
    452             flags|=DIRPROP_FLAG(BN);
    453             dirProps[i-2]=(DirProp)(BN|paraDir);
    454         }
    455         if(state==LOOKING_FOR_STRONG) {
    456             if(dirProp==L) {
    457                 state=FOUND_STRONG_CHAR;
    458                 if(paraDir) {
    459                     paraDir=0;
    460                     for(i1=paraStart; i1<i; i1++) {
    461                         dirProps[i1]&=~CONTEXT_RTL;
    462                     }
    463                 }
    464                 continue;
    465             }
    466             if(dirProp==R || dirProp==AL) {
    467                 state=FOUND_STRONG_CHAR;
    468                 if(paraDir==0) {
    469                     paraDir=CONTEXT_RTL;
    470                     for(i1=paraStart; i1<i; i1++) {
    471                         dirProps[i1]|=CONTEXT_RTL;
    472                     }
    473                 }
    474                 continue;
    475             }
    476         }
    477         if(dirProp==L) {
    478             lastStrongDir=0;
    479             lastStrongLTR=i;            /* i is index to next character */
    480         }
    481         else if(dirProp==R) {
    482             lastStrongDir=CONTEXT_RTL;
    483         }
    484         else if(dirProp==AL) {
    485             lastStrongDir=CONTEXT_RTL;
    486             lastArabicPos=i-1;
    487         }
    488         else if(dirProp==B) {
    489             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    490                 pBiDi->length=i;        /* i is index to next character */
    491             }
    492             if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
    493                 for( ; paraStart<i; paraStart++) {
    494                     dirProps[paraStart]|=CONTEXT_RTL;
    495                 }
    496             }
    497             if(i<length) {              /* B not last char in text */
    498                 if(!((uchar==CR) && (text[i]==LF))) {
    499                     pBiDi->paraCount++;
    500                 }
    501                 if(isDefaultLevel) {
    502                     state=LOOKING_FOR_STRONG;
    503                     paraStart=i;        /* i is index to next character */
    504                     paraDir=paraDirDefault;
    505                     lastStrongDir=paraDirDefault;
    506                 }
    507             }
    508         }
    509         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
    510             controlCount++;
    511         }
    512     }
    513     if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
    514         for(i1=paraStart; i1<length; i1++) {
    515             dirProps[i1]|=CONTEXT_RTL;
    516         }
    517     }
    518     if(isDefaultLevel) {
    519         pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
    520     }
    521     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    522         if((lastStrongLTR>pBiDi->length) &&
    523            (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
    524             pBiDi->length = lastStrongLTR;
    525         }
    526         if(pBiDi->length<pBiDi->originalLength) {
    527             pBiDi->paraCount--;
    528         }
    529     }
    530     /* The following line does nothing new for contextual paraLevel, but is
    531        needed for absolute paraLevel.                               */
    532     flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    533 
    534     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
    535         flags|=DIRPROP_FLAG(L);
    536     }
    537 
    538     pBiDi->controlCount = controlCount;
    539     pBiDi->flags=flags;
    540     pBiDi->lastArabicPos=lastArabicPos;
    541 }
    542 
    543 /* perform (X1)..(X9) ------------------------------------------------------- */
    544 
    545 /* determine if the text is mixed-directional or single-directional */
    546 static UBiDiDirection
    547 directionFromFlags(UBiDi *pBiDi) {
    548     Flags flags=pBiDi->flags;
    549     /* if the text contains AN and neutrals, then some neutrals may become RTL */
    550     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
    551         return UBIDI_LTR;
    552     } else if(!(flags&MASK_LTR)) {
    553         return UBIDI_RTL;
    554     } else {
    555         return UBIDI_MIXED;
    556     }
    557 }
    558 
    559 /*
    560  * Resolve the explicit levels as specified by explicit embedding codes.
    561  * Recalculate the flags to have them reflect the real properties
    562  * after taking the explicit embeddings into account.
    563  *
    564  * The BiDi algorithm is designed to result in the same behavior whether embedding
    565  * levels are externally specified (from "styled text", supposedly the preferred
    566  * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
    567  * That is why (X9) instructs to remove all explicit codes (and BN).
    568  * However, in a real implementation, this removal of these codes and their index
    569  * positions in the plain text is undesirable since it would result in
    570  * reallocated, reindexed text.
    571  * Instead, this implementation leaves the codes in there and just ignores them
    572  * in the subsequent processing.
    573  * In order to get the same reordering behavior, positions with a BN or an
    574  * explicit embedding code just get the same level assigned as the last "real"
    575  * character.
    576  *
    577  * Some implementations, not this one, then overwrite some of these
    578  * directionality properties at "real" same-level-run boundaries by
    579  * L or R codes so that the resolution of weak types can be performed on the
    580  * entire paragraph at once instead of having to parse it once more and
    581  * perform that resolution on same-level-runs.
    582  * This limits the scope of the implicit rules in effectively
    583  * the same way as the run limits.
    584  *
    585  * Instead, this implementation does not modify these codes.
    586  * On one hand, the paragraph has to be scanned for same-level-runs, but
    587  * on the other hand, this saves another loop to reset these codes,
    588  * or saves making and modifying a copy of dirProps[].
    589  *
    590  *
    591  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
    592  *
    593  *
    594  * Handling the stack of explicit levels (Xn):
    595  *
    596  * With the BiDi stack of explicit levels,
    597  * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
    598  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
    599  *
    600  * In order to have a correct push-pop semantics even in the case of overflows,
    601  * there are two overflow counters:
    602  * - countOver60 is incremented with each LRx at level 60
    603  * - from level 60, one RLx increases the level to 61
    604  * - countOver61 is incremented with each LRx and RLx at level 61
    605  *
    606  * Popping levels with PDF must work in the opposite order so that level 61
    607  * is correct at the correct point. Underflows (too many PDFs) must be checked.
    608  *
    609  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
    610  */
    611 static UBiDiDirection
    612 resolveExplicitLevels(UBiDi *pBiDi) {
    613     const DirProp *dirProps=pBiDi->dirProps;
    614     UBiDiLevel *levels=pBiDi->levels;
    615     const UChar *text=pBiDi->text;
    616 
    617     int32_t i=0, length=pBiDi->length;
    618     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
    619     DirProp dirProp;
    620     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
    621 
    622     UBiDiDirection direction;
    623     int32_t paraIndex=0;
    624 
    625     /* determine if the text is mixed-directional or single-directional */
    626     direction=directionFromFlags(pBiDi);
    627 
    628     /* we may not need to resolve any explicit levels, but for multiple
    629        paragraphs we want to loop on all chars to set the para boundaries */
    630     if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
    631         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
    632     } else if((pBiDi->paraCount==1) &&
    633               (!(flags&MASK_EXPLICIT) ||
    634                (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
    635         /* mixed, but all characters are at the same embedding level */
    636         /* or we are in "inverse BiDi" */
    637         /* and we don't have contextual multiple paragraphs with some B char */
    638         /* set all levels to the paragraph level */
    639         for(i=0; i<length; ++i) {
    640             levels[i]=level;
    641         }
    642     } else {
    643         /* continue to perform (Xn) */
    644 
    645         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
    646         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
    647         UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
    648 
    649         UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
    650         uint32_t countOver60=0, countOver61=0;  /* count overflows of explicit levels */
    651 
    652         /* recalculate the flags */
    653         flags=0;
    654 
    655         for(i=0; i<length; ++i) {
    656             dirProp=NO_CONTEXT_RTL(dirProps[i]);
    657             switch(dirProp) {
    658             case LRE:
    659             case LRO:
    660                 /* (X3, X5) */
    661                 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
    662                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
    663                     stack[stackTop]=embeddingLevel;
    664                     ++stackTop;
    665                     embeddingLevel=newLevel;
    666                     if(dirProp==LRO) {
    667                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
    668                     }
    669                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
    670                        since this has already been done for newLevel which is
    671                        the source for embeddingLevel.
    672                      */
    673                 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
    674                     ++countOver61;
    675                 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
    676                     ++countOver60;
    677                 }
    678                 flags|=DIRPROP_FLAG(BN);
    679                 break;
    680             case RLE:
    681             case RLO:
    682                 /* (X2, X4) */
    683                 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
    684                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
    685                     stack[stackTop]=embeddingLevel;
    686                     ++stackTop;
    687                     embeddingLevel=newLevel;
    688                     if(dirProp==RLO) {
    689                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
    690                     }
    691                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
    692                        since this has already been done for newLevel which is
    693                        the source for embeddingLevel.
    694                      */
    695                 } else {
    696                     ++countOver61;
    697                 }
    698                 flags|=DIRPROP_FLAG(BN);
    699                 break;
    700             case PDF:
    701                 /* (X7) */
    702                 /* handle all the overflow cases first */
    703                 if(countOver61>0) {
    704                     --countOver61;
    705                 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
    706                     /* handle LRx overflows from level 60 */
    707                     --countOver60;
    708                 } else if(stackTop>0) {
    709                     /* this is the pop operation; it also pops level 61 while countOver60>0 */
    710                     --stackTop;
    711                     embeddingLevel=stack[stackTop];
    712                 /* } else { (underflow) */
    713                 }
    714                 flags|=DIRPROP_FLAG(BN);
    715                 break;
    716             case B:
    717                 stackTop=0;
    718                 countOver60=countOver61=0;
    719                 level=GET_PARALEVEL(pBiDi, i);
    720                 if((i+1)<length) {
    721                     embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
    722                     if(!((text[i]==CR) && (text[i+1]==LF))) {
    723                         pBiDi->paras[paraIndex++]=i+1;
    724                     }
    725                 }
    726                 flags|=DIRPROP_FLAG(B);
    727                 break;
    728             case BN:
    729                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
    730                 /* they will get their levels set correctly in adjustWSLevels() */
    731                 flags|=DIRPROP_FLAG(BN);
    732                 break;
    733             default:
    734                 /* all other types get the "real" level */
    735                 if(level!=embeddingLevel) {
    736                     level=embeddingLevel;
    737                     if(level&UBIDI_LEVEL_OVERRIDE) {
    738                         flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
    739                     } else {
    740                         flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
    741                     }
    742                 }
    743                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
    744                     flags|=DIRPROP_FLAG(dirProp);
    745                 }
    746                 break;
    747             }
    748 
    749             /*
    750              * We need to set reasonable levels even on BN codes and
    751              * explicit codes because we will later look at same-level runs (X10).
    752              */
    753             levels[i]=level;
    754         }
    755         if(flags&MASK_EMBEDDING) {
    756             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    757         }
    758         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
    759             flags|=DIRPROP_FLAG(L);
    760         }
    761 
    762         /* subsequently, ignore the explicit codes and BN (X9) */
    763 
    764         /* again, determine if the text is mixed-directional or single-directional */
    765         pBiDi->flags=flags;
    766         direction=directionFromFlags(pBiDi);
    767     }
    768 
    769     return direction;
    770 }
    771 
    772 /*
    773  * Use a pre-specified embedding levels array:
    774  *
    775  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
    776  * ignore all explicit codes (X9),
    777  * and check all the preset levels.
    778  *
    779  * Recalculate the flags to have them reflect the real properties
    780  * after taking the explicit embeddings into account.
    781  */
    782 static UBiDiDirection
    783 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
    784     const DirProp *dirProps=pBiDi->dirProps;
    785     DirProp dirProp;
    786     UBiDiLevel *levels=pBiDi->levels;
    787     const UChar *text=pBiDi->text;
    788 
    789     int32_t i, length=pBiDi->length;
    790     Flags flags=0;  /* collect all directionalities in the text */
    791     UBiDiLevel level;
    792     uint32_t paraIndex=0;
    793 
    794     for(i=0; i<length; ++i) {
    795         level=levels[i];
    796         dirProp=NO_CONTEXT_RTL(dirProps[i]);
    797         if(level&UBIDI_LEVEL_OVERRIDE) {
    798             /* keep the override flag in levels[i] but adjust the flags */
    799             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
    800             flags|=DIRPROP_FLAG_O(level);
    801         } else {
    802             /* set the flags */
    803             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
    804         }
    805         if((level<GET_PARALEVEL(pBiDi, i) &&
    806             !((0==level)&&(dirProp==B))) ||
    807            (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
    808             /* level out of bounds */
    809             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    810             return UBIDI_LTR;
    811         }
    812         if((dirProp==B) && ((i+1)<length)) {
    813             if(!((text[i]==CR) && (text[i+1]==LF))) {
    814                 pBiDi->paras[paraIndex++]=i+1;
    815             }
    816         }
    817     }
    818     if(flags&MASK_EMBEDDING) {
    819         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
    820     }
    821 
    822     /* determine if the text is mixed-directional or single-directional */
    823     pBiDi->flags=flags;
    824     return directionFromFlags(pBiDi);
    825 }
    826 
    827 /******************************************************************
    828  The Properties state machine table
    829 *******************************************************************
    830 
    831  All table cells are 8 bits:
    832       bits 0..4:  next state
    833       bits 5..7:  action to perform (if > 0)
    834 
    835  Cells may be of format "n" where n represents the next state
    836  (except for the rightmost column).
    837  Cells may also be of format "s(x,y)" where x represents an action
    838  to perform and y represents the next state.
    839 
    840 *******************************************************************
    841  Definitions and type for properties state table
    842 *******************************************************************
    843 */
    844 #define IMPTABPROPS_COLUMNS 14
    845 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
    846 #define GET_STATEPROPS(cell) ((cell)&0x1f)
    847 #define GET_ACTIONPROPS(cell) ((cell)>>5)
    848 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
    849 
    850 static const uint8_t groupProp[] =          /* dirProp regrouped */
    851 {
    852 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
    853     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
    854 };
    855 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
    856 
    857 /******************************************************************
    858 
    859       PROPERTIES  STATE  TABLE
    860 
    861  In table impTabProps,
    862       - the ON column regroups ON and WS
    863       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
    864       - the Res column is the reduced property assigned to a run
    865 
    866  Action 1: process current run1, init new run1
    867         2: init new run2
    868         3: process run1, process run2, init new run1
    869         4: process run1, set run1=run2, init new run2
    870 
    871  Notes:
    872   1) This table is used in resolveImplicitLevels().
    873   2) This table triggers actions when there is a change in the Bidi
    874      property of incoming characters (action 1).
    875   3) Most such property sequences are processed immediately (in
    876      fact, passed to processPropertySeq().
    877   4) However, numbers are assembled as one sequence. This means
    878      that undefined situations (like CS following digits, until
    879      it is known if the next char will be a digit) are held until
    880      following chars define them.
    881      Example: digits followed by CS, then comes another CS or ON;
    882               the digits will be processed, then the CS assigned
    883               as the start of an ON sequence (action 3).
    884   5) There are cases where more than one sequence must be
    885      processed, for instance digits followed by CS followed by L:
    886      the digits must be processed as one sequence, and the CS
    887      must be processed as an ON sequence, all this before starting
    888      assembling chars for the opening L sequence.
    889 
    890 
    891 */
    892 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
    893 {
    894 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,  Res */
    895 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,  DirProp_ON },
    896 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),   DirProp_L },
    897 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),   DirProp_R },
    898 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,   DirProp_R },
    899 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),  DirProp_EN },
    900 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),  DirProp_AN },
    901 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),  DirProp_AN },
    902 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),  DirProp_ON },
    903 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),  DirProp_ON },
    904 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),  DirProp_ON },
    905 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),  DirProp_EN },
    906 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),  DirProp_EN },
    907 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),  DirProp_AN },
    908 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),  DirProp_AN },
    909 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),  DirProp_ON },
    910 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),   DirProp_S },
    911 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),   DirProp_S },
    912 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),   DirProp_B }
    913 };
    914 
    915 /*  we must undef macro s because the levels table have a different
    916  *  structure (4 bits for action and 4 bits for next state.
    917  */
    918 #undef s
    919 
    920 /******************************************************************
    921  The levels state machine tables
    922 *******************************************************************
    923 
    924  All table cells are 8 bits:
    925       bits 0..3:  next state
    926       bits 4..7:  action to perform (if > 0)
    927 
    928  Cells may be of format "n" where n represents the next state
    929  (except for the rightmost column).
    930  Cells may also be of format "s(x,y)" where x represents an action
    931  to perform and y represents the next state.
    932 
    933  This format limits each table to 16 states each and to 15 actions.
    934 
    935 *******************************************************************
    936  Definitions and type for levels state tables
    937 *******************************************************************
    938 */
    939 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
    940 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
    941 #define GET_STATE(cell) ((cell)&0x0f)
    942 #define GET_ACTION(cell) ((cell)>>4)
    943 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
    944 
    945 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
    946 typedef uint8_t ImpAct[];
    947 
    948 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
    949  * instead of having a pair of ImpTab and a pair of ImpAct.
    950  */
    951 typedef struct ImpTabPair {
    952     const void * pImpTab[2];
    953     const void * pImpAct[2];
    954 } ImpTabPair;
    955 
    956 /******************************************************************
    957 
    958       LEVELS  STATE  TABLES
    959 
    960  In all levels state tables,
    961       - state 0 is the initial state
    962       - the Res column is the increment to add to the text level
    963         for this property sequence.
    964 
    965  The impAct arrays for each table of a pair map the local action
    966  numbers of the table to the total list of actions. For instance,
    967  action 2 in a given table corresponds to the action number which
    968  appears in entry [2] of the impAct array for that table.
    969  The first entry of all impAct arrays must be 0.
    970 
    971  Action 1: init conditional sequence
    972         2: prepend conditional sequence to current sequence
    973         3: set ON sequence to new level - 1
    974         4: init EN/AN/ON sequence
    975         5: fix EN/AN/ON sequence followed by R
    976         6: set previous level sequence to level 2
    977 
    978  Notes:
    979   1) These tables are used in processPropertySeq(). The input
    980      is property sequences as determined by resolveImplicitLevels.
    981   2) Most such property sequences are processed immediately
    982      (levels are assigned).
    983   3) However, some sequences cannot be assigned a final level till
    984      one or more following sequences are received. For instance,
    985      ON following an R sequence within an even-level paragraph.
    986      If the following sequence is R, the ON sequence will be
    987      assigned basic run level+1, and so will the R sequence.
    988   4) S is generally handled like ON, since its level will be fixed
    989      to paragraph level in adjustWSLevels().
    990 
    991 */
    992 
    993 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
    994 /*  In this table, conditional sequences receive the higher possible level
    995     until proven otherwise.
    996 */
    997 {
    998 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
    999 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1000 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
   1001 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
   1002 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
   1003 /* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
   1004 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
   1005 };
   1006 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
   1007 /*  In this table, conditional sequences receive the lower possible level
   1008     until proven otherwise.
   1009 */
   1010 {
   1011 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1012 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1013 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
   1014 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1015 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
   1016 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
   1017 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
   1018 };
   1019 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
   1020 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
   1021                                            &impTabR_DEFAULT},
   1022                                           {&impAct0, &impAct0}};
   1023 
   1024 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
   1025 /*  In this table, conditional sequences receive the higher possible level
   1026     until proven otherwise.
   1027 */
   1028 {
   1029 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1030 /* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
   1031 /* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
   1032 /* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
   1033 /* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
   1034 /* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
   1035   };
   1036 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
   1037                                                    &impTabR_DEFAULT},
   1038                                                   {&impAct0, &impAct0}};
   1039 
   1040 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
   1041 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1042     until proven that there is L or sor/eor on both sides. AN is handled like EN.
   1043 */
   1044 {
   1045 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1046 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
   1047 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
   1048 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
   1049 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
   1050 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
   1051 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
   1052 };
   1053 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
   1054 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1055     until proven that there is L on both sides. AN is handled like EN.
   1056 */
   1057 {
   1058 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1059 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1060 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1061 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
   1062 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
   1063 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
   1064 };
   1065 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
   1066                         {&impTabL_GROUP_NUMBERS_WITH_R,
   1067                          &impTabR_GROUP_NUMBERS_WITH_R},
   1068                         {&impAct0, &impAct0}};
   1069 
   1070 
   1071 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
   1072 /*  This table is identical to the Default LTR table except that EN and AN are
   1073     handled like L.
   1074 */
   1075 {
   1076 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1077 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
   1078 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
   1079 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
   1080 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
   1081 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
   1082 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
   1083 };
   1084 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
   1085 /*  This table is identical to the Default RTL table except that EN and AN are
   1086     handled like L.
   1087 */
   1088 {
   1089 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1090 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1091 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
   1092 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1093 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
   1094 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
   1095 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
   1096 };
   1097 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
   1098                         {&impTabL_INVERSE_NUMBERS_AS_L,
   1099                          &impTabR_INVERSE_NUMBERS_AS_L},
   1100                         {&impAct0, &impAct0}};
   1101 
   1102 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
   1103 /*  In this table, conditional sequences receive the lower possible level
   1104     until proven otherwise.
   1105 */
   1106 {
   1107 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1108 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1109 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
   1110 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1111 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
   1112 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
   1113 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
   1114 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
   1115 };
   1116 static const ImpAct impAct1 = {0,1,11,12};
   1117 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
   1118  */
   1119 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
   1120                         {&impTabL_DEFAULT,
   1121                          &impTabR_INVERSE_LIKE_DIRECT},
   1122                         {&impAct0, &impAct1}};
   1123 
   1124 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1125 /*  The case handled in this table is (visually):  R EN L
   1126 */
   1127 {
   1128 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1129 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1130 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
   1131 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
   1132 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
   1133 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
   1134 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
   1135 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
   1136 };
   1137 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1138 /*  The cases handled in this table are (visually):  R EN L
   1139                                                      R L AN L
   1140 */
   1141 {
   1142 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1143 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1144 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
   1145 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
   1146 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
   1147 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
   1148 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
   1149 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
   1150 };
   1151 static const ImpAct impAct2 = {0,1,7,8,9,10};
   1152 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
   1153                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
   1154                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1155                         {&impAct0, &impAct2}};
   1156 
   1157 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
   1158                         {&impTabL_NUMBERS_SPECIAL,
   1159                          &impTabR_INVERSE_LIKE_DIRECT},
   1160                         {&impAct0, &impAct1}};
   1161 
   1162 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
   1163 /*  The case handled in this table is (visually):  R EN L
   1164 */
   1165 {
   1166 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1167 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1168 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
   1169 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
   1170 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
   1171 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
   1172 };
   1173 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
   1174                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
   1175                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1176                         {&impAct0, &impAct2}};
   1177 
   1178 #undef s
   1179 
   1180 typedef struct {
   1181     const ImpTab * pImpTab;             /* level table pointer          */
   1182     const ImpAct * pImpAct;             /* action map array             */
   1183     int32_t startON;                    /* start of ON sequence         */
   1184     int32_t startL2EN;                  /* start of level 2 sequence    */
   1185     int32_t lastStrongRTL;              /* index of last found R or AL  */
   1186     int32_t state;                      /* current state                */
   1187     UBiDiLevel runLevel;                /* run level before implicit solving */
   1188 } LevState;
   1189 
   1190 /*------------------------------------------------------------------------*/
   1191 
   1192 static void
   1193 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
   1194   /* param pos:     position where to insert
   1195      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
   1196   */
   1197 {
   1198 #define FIRSTALLOC  10
   1199     Point point;
   1200     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
   1201 
   1202     if (pInsertPoints->capacity == 0)
   1203     {
   1204         pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
   1205         if (pInsertPoints->points == NULL)
   1206         {
   1207             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1208             return;
   1209         }
   1210         pInsertPoints->capacity=FIRSTALLOC;
   1211     }
   1212     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
   1213     {
   1214         void * savePoints=pInsertPoints->points;
   1215         pInsertPoints->points=uprv_realloc(pInsertPoints->points,
   1216                                            pInsertPoints->capacity*2*sizeof(Point));
   1217         if (pInsertPoints->points == NULL)
   1218         {
   1219             pInsertPoints->points=savePoints;
   1220             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1221             return;
   1222         }
   1223         else  pInsertPoints->capacity*=2;
   1224     }
   1225     point.pos=pos;
   1226     point.flag=flag;
   1227     pInsertPoints->points[pInsertPoints->size]=point;
   1228     pInsertPoints->size++;
   1229 #undef FIRSTALLOC
   1230 }
   1231 
   1232 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
   1233 
   1234 /*
   1235  * This implementation of the (Wn) rules applies all rules in one pass.
   1236  * In order to do so, it needs a look-ahead of typically 1 character
   1237  * (except for W5: sequences of ET) and keeps track of changes
   1238  * in a rule Wp that affect a later Wq (p<q).
   1239  *
   1240  * The (Nn) and (In) rules are also performed in that same single loop,
   1241  * but effectively one iteration behind for white space.
   1242  *
   1243  * Since all implicit rules are performed in one step, it is not necessary
   1244  * to actually store the intermediate directional properties in dirProps[].
   1245  */
   1246 
   1247 static void
   1248 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
   1249                    int32_t start, int32_t limit) {
   1250     uint8_t cell, oldStateSeq, actionSeq;
   1251     const ImpTab * pImpTab=pLevState->pImpTab;
   1252     const ImpAct * pImpAct=pLevState->pImpAct;
   1253     UBiDiLevel * levels=pBiDi->levels;
   1254     UBiDiLevel level, addLevel;
   1255     InsertPoints * pInsertPoints;
   1256     int32_t start0, k;
   1257 
   1258     start0=start;                           /* save original start position */
   1259     oldStateSeq=(uint8_t)pLevState->state;
   1260     cell=(*pImpTab)[oldStateSeq][_prop];
   1261     pLevState->state=GET_STATE(cell);       /* isolate the new state */
   1262     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
   1263     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
   1264 
   1265     if(actionSeq) {
   1266         switch(actionSeq) {
   1267         case 1:                         /* init ON seq */
   1268             pLevState->startON=start0;
   1269             break;
   1270 
   1271         case 2:                         /* prepend ON seq to current seq */
   1272             start=pLevState->startON;
   1273             break;
   1274 
   1275         case 3:                         /* L or S after possible relevant EN/AN */
   1276             /* check if we had EN after R/AL */
   1277             if (pLevState->startL2EN >= 0) {
   1278                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1279             }
   1280             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
   1281             /* check if we had any relevant EN/AN after R/AL */
   1282             pInsertPoints=&(pBiDi->insertPoints);
   1283             if ((pInsertPoints->capacity == 0) ||
   1284                 (pInsertPoints->size <= pInsertPoints->confirmed))
   1285             {
   1286                 /* nothing, just clean up */
   1287                 pLevState->lastStrongRTL=-1;
   1288                 /* check if we have a pending conditional segment */
   1289                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
   1290                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
   1291                     start=pLevState->startON;   /* reset to basic run level */
   1292                 }
   1293                 if (_prop == DirProp_S)                /* add LRM before S */
   1294                 {
   1295                     addPoint(pBiDi, start0, LRM_BEFORE);
   1296                     pInsertPoints->confirmed=pInsertPoints->size;
   1297                 }
   1298                 break;
   1299             }
   1300             /* reset previous RTL cont to level for LTR text */
   1301             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
   1302             {
   1303                 /* reset odd level, leave runLevel+2 as is */
   1304                 levels[k]=(levels[k] - 2) & ~1;
   1305             }
   1306             /* mark insert points as confirmed */
   1307             pInsertPoints->confirmed=pInsertPoints->size;
   1308             pLevState->lastStrongRTL=-1;
   1309             if (_prop == DirProp_S)            /* add LRM before S */
   1310             {
   1311                 addPoint(pBiDi, start0, LRM_BEFORE);
   1312                 pInsertPoints->confirmed=pInsertPoints->size;
   1313             }
   1314             break;
   1315 
   1316         case 4:                         /* R/AL after possible relevant EN/AN */
   1317             /* just clean up */
   1318             pInsertPoints=&(pBiDi->insertPoints);
   1319             if (pInsertPoints->capacity > 0)
   1320                 /* remove all non confirmed insert points */
   1321                 pInsertPoints->size=pInsertPoints->confirmed;
   1322             pLevState->startON=-1;
   1323             pLevState->startL2EN=-1;
   1324             pLevState->lastStrongRTL=limit - 1;
   1325             break;
   1326 
   1327         case 5:                         /* EN/AN after R/AL + possible cont */
   1328             /* check for real AN */
   1329             if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
   1330                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
   1331             {
   1332                 /* real AN */
   1333                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
   1334                 {
   1335                     /* just note the righmost digit as a strong RTL */
   1336                     pLevState->lastStrongRTL=limit - 1;
   1337                     break;
   1338                 }
   1339                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
   1340                 {
   1341                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1342                     pLevState->startL2EN=-2;
   1343                 }
   1344                 /* note AN */
   1345                 addPoint(pBiDi, start0, LRM_BEFORE);
   1346                 break;
   1347             }
   1348             /* if first EN/AN after R/AL */
   1349             if (pLevState->startL2EN == -1) {
   1350                 pLevState->startL2EN=start0;
   1351             }
   1352             break;
   1353 
   1354         case 6:                         /* note location of latest R/AL */
   1355             pLevState->lastStrongRTL=limit - 1;
   1356             pLevState->startON=-1;
   1357             break;
   1358 
   1359         case 7:                         /* L after R+ON/EN/AN */
   1360             /* include possible adjacent number on the left */
   1361             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
   1362             if(k>=0) {
   1363                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
   1364                 pInsertPoints=&(pBiDi->insertPoints);
   1365                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
   1366             }
   1367             pLevState->startON=start0;
   1368             break;
   1369 
   1370         case 8:                         /* AN after L */
   1371             /* AN numbers between L text on both sides may be trouble. */
   1372             /* tentatively bracket with LRMs; will be confirmed if followed by L */
   1373             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
   1374             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
   1375             break;
   1376 
   1377         case 9:                         /* R after L+ON/EN/AN */
   1378             /* false alert, infirm LRMs around previous AN */
   1379             pInsertPoints=&(pBiDi->insertPoints);
   1380             pInsertPoints->size=pInsertPoints->confirmed;
   1381             if (_prop == DirProp_S)            /* add RLM before S */
   1382             {
   1383                 addPoint(pBiDi, start0, RLM_BEFORE);
   1384                 pInsertPoints->confirmed=pInsertPoints->size;
   1385             }
   1386             break;
   1387 
   1388         case 10:                        /* L after L+ON/AN */
   1389             level=pLevState->runLevel + addLevel;
   1390             for(k=pLevState->startON; k<start0; k++) {
   1391                 if (levels[k]<level)
   1392                     levels[k]=level;
   1393             }
   1394             pInsertPoints=&(pBiDi->insertPoints);
   1395             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
   1396             pLevState->startON=start0;
   1397             break;
   1398 
   1399         case 11:                        /* L after L+ON+EN/AN/ON */
   1400             level=pLevState->runLevel;
   1401             for(k=start0-1; k>=pLevState->startON; k--) {
   1402                 if(levels[k]==level+3) {
   1403                     while(levels[k]==level+3) {
   1404                         levels[k--]-=2;
   1405                     }
   1406                     while(levels[k]==level) {
   1407                         k--;
   1408                     }
   1409                 }
   1410                 if(levels[k]==level+2) {
   1411                     levels[k]=level;
   1412                     continue;
   1413                 }
   1414                 levels[k]=level+1;
   1415             }
   1416             break;
   1417 
   1418         case 12:                        /* R after L+ON+EN/AN/ON */
   1419             level=pLevState->runLevel+1;
   1420             for(k=start0-1; k>=pLevState->startON; k--) {
   1421                 if(levels[k]>level) {
   1422                     levels[k]-=2;
   1423                 }
   1424             }
   1425             break;
   1426 
   1427         default:                        /* we should never get here */
   1428             U_ASSERT(FALSE);
   1429             break;
   1430         }
   1431     }
   1432     if((addLevel) || (start < start0)) {
   1433         level=pLevState->runLevel + addLevel;
   1434         for(k=start; k<limit; k++) {
   1435             levels[k]=level;
   1436         }
   1437     }
   1438 }
   1439 
   1440 static DirProp
   1441 lastL_R_AL(UBiDi *pBiDi) {
   1442     /* return last strong char at the end of the prologue */
   1443     const UChar *text=pBiDi->prologue;
   1444     int32_t length=pBiDi->proLength;
   1445     int32_t i;
   1446     UChar32 uchar;
   1447     DirProp dirProp;
   1448     for(i=length; i>0; ) {
   1449         /* i is decremented by U16_PREV */
   1450         U16_PREV(text, 0, i, uchar);
   1451         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
   1452         if(dirProp==L) {
   1453             return DirProp_L;
   1454         }
   1455         if(dirProp==R || dirProp==AL) {
   1456             return DirProp_R;
   1457         }
   1458         if(dirProp==B) {
   1459             return DirProp_ON;
   1460         }
   1461     }
   1462     return DirProp_ON;
   1463 }
   1464 
   1465 static DirProp
   1466 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
   1467     /* return first strong char or digit in epilogue */
   1468     const UChar *text=pBiDi->epilogue;
   1469     int32_t length=pBiDi->epiLength;
   1470     int32_t i;
   1471     UChar32 uchar;
   1472     DirProp dirProp;
   1473     for(i=0; i<length; ) {
   1474         /* i is incremented by U16_NEXT */
   1475         U16_NEXT(text, i, length, uchar);
   1476         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
   1477         if(dirProp==L) {
   1478             return DirProp_L;
   1479         }
   1480         if(dirProp==R || dirProp==AL) {
   1481             return DirProp_R;
   1482         }
   1483         if(dirProp==EN) {
   1484             return DirProp_EN;
   1485         }
   1486         if(dirProp==AN) {
   1487             return DirProp_AN;
   1488         }
   1489     }
   1490     return DirProp_ON;
   1491 }
   1492 
   1493 static void
   1494 resolveImplicitLevels(UBiDi *pBiDi,
   1495                       int32_t start, int32_t limit,
   1496                       DirProp sor, DirProp eor) {
   1497     const DirProp *dirProps=pBiDi->dirProps;
   1498 
   1499     LevState levState;
   1500     int32_t i, start1, start2;
   1501     uint8_t oldStateImp, stateImp, actionImp;
   1502     uint8_t gprop, resProp, cell;
   1503     UBool inverseRTL;
   1504     DirProp nextStrongProp=R;
   1505     int32_t nextStrongPos=-1;
   1506 
   1507     levState.startON = -1;  /* silence gcc flow analysis */
   1508 
   1509     /* check for RTL inverse BiDi mode */
   1510     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
   1511      * loop on the text characters from end to start.
   1512      * This would need a different properties state table (at least different
   1513      * actions) and different levels state tables (maybe very similar to the
   1514      * LTR corresponding ones.
   1515      */
   1516     inverseRTL=(UBool)
   1517         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
   1518          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
   1519           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
   1520     /* initialize for levels state table */
   1521     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   1522     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   1523     levState.state=0;
   1524     levState.runLevel=pBiDi->levels[start];
   1525     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
   1526     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
   1527     if(start==0 && pBiDi->proLength>0) {
   1528         DirProp lastStrong=lastL_R_AL(pBiDi);
   1529         if(lastStrong!=DirProp_ON) {
   1530             sor=lastStrong;
   1531         }
   1532     }
   1533     processPropertySeq(pBiDi, &levState, sor, start, start);
   1534     /* initialize for property state table */
   1535     if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
   1536         stateImp = 1 + sor;
   1537     } else {
   1538         stateImp=0;
   1539     }
   1540     start1=start;
   1541     start2=start;
   1542 
   1543     for(i=start; i<=limit; i++) {
   1544         if(i>=limit) {
   1545             gprop=eor;
   1546         } else {
   1547             DirProp prop, prop1;
   1548             prop=NO_CONTEXT_RTL(dirProps[i]);
   1549             if(inverseRTL) {
   1550                 if(prop==AL) {
   1551                     /* AL before EN does not make it AN */
   1552                     prop=R;
   1553                 } else if(prop==EN) {
   1554                     if(nextStrongPos<=i) {
   1555                         /* look for next strong char (L/R/AL) */
   1556                         int32_t j;
   1557                         nextStrongProp=R;   /* set default */
   1558                         nextStrongPos=limit;
   1559                         for(j=i+1; j<limit; j++) {
   1560                             prop1=NO_CONTEXT_RTL(dirProps[j]);
   1561                             if(prop1==L || prop1==R || prop1==AL) {
   1562                                 nextStrongProp=prop1;
   1563                                 nextStrongPos=j;
   1564                                 break;
   1565                             }
   1566                         }
   1567                     }
   1568                     if(nextStrongProp==AL) {
   1569                         prop=AN;
   1570                     }
   1571                 }
   1572             }
   1573             gprop=groupProp[prop];
   1574         }
   1575         oldStateImp=stateImp;
   1576         cell=impTabProps[oldStateImp][gprop];
   1577         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
   1578         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
   1579         if((i==limit) && (actionImp==0)) {
   1580             /* there is an unprocessed sequence if its property == eor   */
   1581             actionImp=1;                    /* process the last sequence */
   1582         }
   1583         if(actionImp) {
   1584             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
   1585             switch(actionImp) {
   1586             case 1:             /* process current seq1, init new seq1 */
   1587                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
   1588                 start1=i;
   1589                 break;
   1590             case 2:             /* init new seq2 */
   1591                 start2=i;
   1592                 break;
   1593             case 3:             /* process seq1, process seq2, init new seq1 */
   1594                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   1595                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
   1596                 start1=i;
   1597                 break;
   1598             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
   1599                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   1600                 start1=start2;
   1601                 start2=i;
   1602                 break;
   1603             default:            /* we should never get here */
   1604                 U_ASSERT(FALSE);
   1605                 break;
   1606             }
   1607         }
   1608     }
   1609     /* flush possible pending sequence, e.g. ON */
   1610     if(limit==pBiDi->length && pBiDi->epiLength>0) {
   1611         DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
   1612         if(firstStrong!=DirProp_ON) {
   1613             eor=firstStrong;
   1614         }
   1615     }
   1616     processPropertySeq(pBiDi, &levState, eor, limit, limit);
   1617 }
   1618 
   1619 /* perform (L1) and (X9) ---------------------------------------------------- */
   1620 
   1621 /*
   1622  * Reset the embedding levels for some non-graphic characters (L1).
   1623  * This function also sets appropriate levels for BN, and
   1624  * explicit embedding types that are supposed to have been removed
   1625  * from the paragraph in (X9).
   1626  */
   1627 static void
   1628 adjustWSLevels(UBiDi *pBiDi) {
   1629     const DirProp *dirProps=pBiDi->dirProps;
   1630     UBiDiLevel *levels=pBiDi->levels;
   1631     int32_t i;
   1632 
   1633     if(pBiDi->flags&MASK_WS) {
   1634         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
   1635         Flags flag;
   1636 
   1637         i=pBiDi->trailingWSStart;
   1638         while(i>0) {
   1639             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
   1640             while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
   1641                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   1642                     levels[i]=0;
   1643                 } else {
   1644                     levels[i]=GET_PARALEVEL(pBiDi, i);
   1645                 }
   1646             }
   1647 
   1648             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
   1649             /* here, i+1 is guaranteed to be <length */
   1650             while(i>0) {
   1651                 flag=DIRPROP_FLAG_NC(dirProps[--i]);
   1652                 if(flag&MASK_BN_EXPLICIT) {
   1653                     levels[i]=levels[i+1];
   1654                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   1655                     levels[i]=0;
   1656                     break;
   1657                 } else if(flag&MASK_B_S) {
   1658                     levels[i]=GET_PARALEVEL(pBiDi, i);
   1659                     break;
   1660                 }
   1661             }
   1662         }
   1663     }
   1664 }
   1665 
   1666 U_DRAFT void U_EXPORT2
   1667 ubidi_setContext(UBiDi *pBiDi,
   1668                  const UChar *prologue, int32_t proLength,
   1669                  const UChar *epilogue, int32_t epiLength,
   1670                  UErrorCode *pErrorCode) {
   1671     /* check the argument values */
   1672     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   1673     if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
   1674        (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
   1675         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1676         return;
   1677     }
   1678 
   1679     if(proLength==-1) {
   1680         pBiDi->proLength=u_strlen(prologue);
   1681     } else {
   1682         pBiDi->proLength=proLength;
   1683     }
   1684     if(epiLength==-1) {
   1685         pBiDi->epiLength=u_strlen(epilogue);
   1686     } else {
   1687         pBiDi->epiLength=epiLength;
   1688     }
   1689     pBiDi->prologue=prologue;
   1690     pBiDi->epilogue=epilogue;
   1691 }
   1692 
   1693 static void
   1694 setParaSuccess(UBiDi *pBiDi) {
   1695     pBiDi->proLength=0;                 /* forget the last context */
   1696     pBiDi->epiLength=0;
   1697     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
   1698 }
   1699 
   1700 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
   1701 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
   1702 static void
   1703 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
   1704                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
   1705     void *runsOnlyMemory;
   1706     int32_t *visualMap;
   1707     UChar *visualText;
   1708     int32_t saveLength, saveTrailingWSStart;
   1709     const UBiDiLevel *levels;
   1710     UBiDiLevel *saveLevels;
   1711     UBiDiDirection saveDirection;
   1712     UBool saveMayAllocateText;
   1713     Run *runs;
   1714     int32_t visualLength, i, j, visualStart, logicalStart,
   1715             runCount, runLength, addedRuns, insertRemove,
   1716             start, limit, step, indexOddBit, logicalPos,
   1717             index0, index1;
   1718     uint32_t saveOptions;
   1719 
   1720     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
   1721     if(length==0) {
   1722         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
   1723         goto cleanup3;
   1724     }
   1725     /* obtain memory for mapping table and visual text */
   1726     runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
   1727     if(runsOnlyMemory==NULL) {
   1728         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1729         goto cleanup3;
   1730     }
   1731     visualMap=runsOnlyMemory;
   1732     visualText=(UChar *)&visualMap[length];
   1733     saveLevels=(UBiDiLevel *)&visualText[length];
   1734     saveOptions=pBiDi->reorderingOptions;
   1735     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
   1736         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
   1737         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
   1738     }
   1739     paraLevel&=1;                       /* accept only 0 or 1 */
   1740     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
   1741     if(U_FAILURE(*pErrorCode)) {
   1742         goto cleanup3;
   1743     }
   1744     /* we cannot access directly pBiDi->levels since it is not yet set if
   1745      * direction is not MIXED
   1746      */
   1747     levels=ubidi_getLevels(pBiDi, pErrorCode);
   1748     uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
   1749     saveTrailingWSStart=pBiDi->trailingWSStart;
   1750     saveLength=pBiDi->length;
   1751     saveDirection=pBiDi->direction;
   1752 
   1753     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
   1754      * the visual map and the dirProps array to drive the second call
   1755      * to ubidi_setPara (but must make provision for possible removal of
   1756      * BiDi controls.  Alternatively, only use the dirProps array via
   1757      * customized classifier callback.
   1758      */
   1759     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
   1760                                       UBIDI_DO_MIRRORING, pErrorCode);
   1761     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
   1762     if(U_FAILURE(*pErrorCode)) {
   1763         goto cleanup2;
   1764     }
   1765     pBiDi->reorderingOptions=saveOptions;
   1766 
   1767     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
   1768     paraLevel^=1;
   1769     /* Because what we did with reorderingOptions, visualText may be shorter
   1770      * than the original text. But we don't want the levels memory to be
   1771      * reallocated shorter than the original length, since we need to restore
   1772      * the levels as after the first call to ubidi_setpara() before returning.
   1773      * We will force mayAllocateText to FALSE before the second call to
   1774      * ubidi_setpara(), and will restore it afterwards.
   1775      */
   1776     saveMayAllocateText=pBiDi->mayAllocateText;
   1777     pBiDi->mayAllocateText=FALSE;
   1778     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
   1779     pBiDi->mayAllocateText=saveMayAllocateText;
   1780     ubidi_getRuns(pBiDi, pErrorCode);
   1781     if(U_FAILURE(*pErrorCode)) {
   1782         goto cleanup1;
   1783     }
   1784     /* check if some runs must be split, count how many splits */
   1785     addedRuns=0;
   1786     runCount=pBiDi->runCount;
   1787     runs=pBiDi->runs;
   1788     visualStart=0;
   1789     for(i=0; i<runCount; i++, visualStart+=runLength) {
   1790         runLength=runs[i].visualLimit-visualStart;
   1791         if(runLength<2) {
   1792             continue;
   1793         }
   1794         logicalStart=GET_INDEX(runs[i].logicalStart);
   1795         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
   1796             index0=visualMap[j];
   1797             index1=visualMap[j-1];
   1798             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   1799                 addedRuns++;
   1800             }
   1801         }
   1802     }
   1803     if(addedRuns) {
   1804         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
   1805             if(runCount==1) {
   1806                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
   1807                 pBiDi->runsMemory[0]=runs[0];
   1808             }
   1809             runs=pBiDi->runs=pBiDi->runsMemory;
   1810             pBiDi->runCount+=addedRuns;
   1811         } else {
   1812             goto cleanup1;
   1813         }
   1814     }
   1815     /* split runs which are not consecutive in source text */
   1816     for(i=runCount-1; i>=0; i--) {
   1817         runLength= i==0 ? runs[0].visualLimit :
   1818                           runs[i].visualLimit-runs[i-1].visualLimit;
   1819         logicalStart=runs[i].logicalStart;
   1820         indexOddBit=GET_ODD_BIT(logicalStart);
   1821         logicalStart=GET_INDEX(logicalStart);
   1822         if(runLength<2) {
   1823             if(addedRuns) {
   1824                 runs[i+addedRuns]=runs[i];
   1825             }
   1826             logicalPos=visualMap[logicalStart];
   1827             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   1828                                             saveLevels[logicalPos]^indexOddBit);
   1829             continue;
   1830         }
   1831         if(indexOddBit) {
   1832             start=logicalStart;
   1833             limit=logicalStart+runLength-1;
   1834             step=1;
   1835         } else {
   1836             start=logicalStart+runLength-1;
   1837             limit=logicalStart;
   1838             step=-1;
   1839         }
   1840         for(j=start; j!=limit; j+=step) {
   1841             index0=visualMap[j];
   1842             index1=visualMap[j+step];
   1843             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   1844                 logicalPos=BIDI_MIN(visualMap[start], index0);
   1845                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   1846                                             saveLevels[logicalPos]^indexOddBit);
   1847                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
   1848                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
   1849                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
   1850                 runs[i+addedRuns].insertRemove=insertRemove;
   1851                 runs[i].insertRemove&=~insertRemove;
   1852                 start=j+step;
   1853                 addedRuns--;
   1854             }
   1855         }
   1856         if(addedRuns) {
   1857             runs[i+addedRuns]=runs[i];
   1858         }
   1859         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
   1860         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   1861                                             saveLevels[logicalPos]^indexOddBit);
   1862     }
   1863 
   1864   cleanup1:
   1865     /* restore initial paraLevel */
   1866     pBiDi->paraLevel^=1;
   1867   cleanup2:
   1868     /* restore real text */
   1869     pBiDi->text=text;
   1870     pBiDi->length=saveLength;
   1871     pBiDi->originalLength=length;
   1872     pBiDi->direction=saveDirection;
   1873     /* the saved levels should never excess levelsSize, but we check anyway */
   1874     if(saveLength>pBiDi->levelsSize) {
   1875         saveLength=pBiDi->levelsSize;
   1876     }
   1877     uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
   1878     pBiDi->trailingWSStart=saveTrailingWSStart;
   1879     /* free memory for mapping table and visual text */
   1880     uprv_free(runsOnlyMemory);
   1881     if(pBiDi->runCount>1) {
   1882         pBiDi->direction=UBIDI_MIXED;
   1883     }
   1884   cleanup3:
   1885     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
   1886 }
   1887 
   1888 /* ubidi_setPara ------------------------------------------------------------ */
   1889 
   1890 U_CAPI void U_EXPORT2
   1891 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
   1892               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
   1893               UErrorCode *pErrorCode) {
   1894     UBiDiDirection direction;
   1895 
   1896     /* check the argument values */
   1897     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   1898     if(pBiDi==NULL || text==NULL || length<-1 ||
   1899        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
   1900         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1901         return;
   1902     }
   1903 
   1904     if(length==-1) {
   1905         length=u_strlen(text);
   1906     }
   1907 
   1908     /* special treatment for RUNS_ONLY mode */
   1909     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
   1910         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
   1911         return;
   1912     }
   1913 
   1914     /* initialize the UBiDi structure */
   1915     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
   1916     pBiDi->text=text;
   1917     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
   1918     pBiDi->paraLevel=paraLevel;
   1919     pBiDi->direction=UBIDI_LTR;
   1920     pBiDi->paraCount=1;
   1921 
   1922     pBiDi->dirProps=NULL;
   1923     pBiDi->levels=NULL;
   1924     pBiDi->runs=NULL;
   1925     pBiDi->insertPoints.size=0;         /* clean up from last call */
   1926     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
   1927 
   1928     /*
   1929      * Save the original paraLevel if contextual; otherwise, set to 0.
   1930      */
   1931     if(IS_DEFAULT_LEVEL(paraLevel)) {
   1932         pBiDi->defaultParaLevel=paraLevel;
   1933     } else {
   1934         pBiDi->defaultParaLevel=0;
   1935     }
   1936 
   1937     if(length==0) {
   1938         /*
   1939          * For an empty paragraph, create a UBiDi object with the paraLevel and
   1940          * the flags and the direction set but without allocating zero-length arrays.
   1941          * There is nothing more to do.
   1942          */
   1943         if(IS_DEFAULT_LEVEL(paraLevel)) {
   1944             pBiDi->paraLevel&=1;
   1945             pBiDi->defaultParaLevel=0;
   1946         }
   1947         if(paraLevel&1) {
   1948             pBiDi->flags=DIRPROP_FLAG(R);
   1949             pBiDi->direction=UBIDI_RTL;
   1950         } else {
   1951             pBiDi->flags=DIRPROP_FLAG(L);
   1952             pBiDi->direction=UBIDI_LTR;
   1953         }
   1954 
   1955         pBiDi->runCount=0;
   1956         pBiDi->paraCount=0;
   1957         setParaSuccess(pBiDi);          /* mark successful setPara */
   1958         return;
   1959     }
   1960 
   1961     pBiDi->runCount=-1;
   1962 
   1963     /*
   1964      * Get the directional properties,
   1965      * the flags bit-set, and
   1966      * determine the paragraph level if necessary.
   1967      */
   1968     if(getDirPropsMemory(pBiDi, length)) {
   1969         pBiDi->dirProps=pBiDi->dirPropsMemory;
   1970         getDirProps(pBiDi);
   1971     } else {
   1972         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1973         return;
   1974     }
   1975     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
   1976     length= pBiDi->length;
   1977     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
   1978     /* allocate paras memory */
   1979     if(pBiDi->paraCount>1) {
   1980         if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
   1981             pBiDi->paras=pBiDi->parasMemory;
   1982             pBiDi->paras[pBiDi->paraCount-1]=length;
   1983         } else {
   1984             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1985             return;
   1986         }
   1987     } else {
   1988         /* initialize paras for single paragraph */
   1989         pBiDi->paras=pBiDi->simpleParas;
   1990         pBiDi->simpleParas[0]=length;
   1991     }
   1992 
   1993     /* are explicit levels specified? */
   1994     if(embeddingLevels==NULL) {
   1995         /* no: determine explicit levels according to the (Xn) rules */\
   1996         if(getLevelsMemory(pBiDi, length)) {
   1997             pBiDi->levels=pBiDi->levelsMemory;
   1998             direction=resolveExplicitLevels(pBiDi);
   1999         } else {
   2000             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2001             return;
   2002         }
   2003     } else {
   2004         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
   2005         pBiDi->levels=embeddingLevels;
   2006         direction=checkExplicitLevels(pBiDi, pErrorCode);
   2007         if(U_FAILURE(*pErrorCode)) {
   2008             return;
   2009         }
   2010     }
   2011 
   2012     /*
   2013      * The steps after (X9) in the UBiDi algorithm are performed only if
   2014      * the paragraph text has mixed directionality!
   2015      */
   2016     pBiDi->direction=direction;
   2017     switch(direction) {
   2018     case UBIDI_LTR:
   2019         /* make sure paraLevel is even */
   2020         pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
   2021 
   2022         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   2023         pBiDi->trailingWSStart=0;
   2024         break;
   2025     case UBIDI_RTL:
   2026         /* make sure paraLevel is odd */
   2027         pBiDi->paraLevel|=1;
   2028 
   2029         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   2030         pBiDi->trailingWSStart=0;
   2031         break;
   2032     default:
   2033         /*
   2034          *  Choose the right implicit state table
   2035          */
   2036         switch(pBiDi->reorderingMode) {
   2037         case UBIDI_REORDER_DEFAULT:
   2038             pBiDi->pImpTabPair=&impTab_DEFAULT;
   2039             break;
   2040         case UBIDI_REORDER_NUMBERS_SPECIAL:
   2041             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
   2042             break;
   2043         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
   2044             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
   2045             break;
   2046         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
   2047             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
   2048             break;
   2049         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
   2050             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   2051                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
   2052             } else {
   2053                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
   2054             }
   2055             break;
   2056         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
   2057             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   2058                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
   2059             } else {
   2060                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
   2061             }
   2062             break;
   2063         default:
   2064             /* we should never get here */
   2065             U_ASSERT(FALSE);
   2066             break;
   2067         }
   2068         /*
   2069          * If there are no external levels specified and there
   2070          * are no significant explicit level codes in the text,
   2071          * then we can treat the entire paragraph as one run.
   2072          * Otherwise, we need to perform the following rules on runs of
   2073          * the text with the same embedding levels. (X10)
   2074          * "Significant" explicit level codes are ones that actually
   2075          * affect non-BN characters.
   2076          * Examples for "insignificant" ones are empty embeddings
   2077          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
   2078          */
   2079         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
   2080                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
   2081             resolveImplicitLevels(pBiDi, 0, length,
   2082                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
   2083                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
   2084         } else {
   2085             /* sor, eor: start and end types of same-level-run */
   2086             UBiDiLevel *levels=pBiDi->levels;
   2087             int32_t start, limit=0;
   2088             UBiDiLevel level, nextLevel;
   2089             DirProp sor, eor;
   2090 
   2091             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
   2092             level=GET_PARALEVEL(pBiDi, 0);
   2093             nextLevel=levels[0];
   2094             if(level<nextLevel) {
   2095                 eor=GET_LR_FROM_LEVEL(nextLevel);
   2096             } else {
   2097                 eor=GET_LR_FROM_LEVEL(level);
   2098             }
   2099 
   2100             do {
   2101                 /* determine start and limit of the run (end points just behind the run) */
   2102 
   2103                 /* the values for this run's start are the same as for the previous run's end */
   2104                 start=limit;
   2105                 level=nextLevel;
   2106                 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
   2107                     /* except if this is a new paragraph, then set sor = para level */
   2108                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
   2109                 } else {
   2110                     sor=eor;
   2111                 }
   2112 
   2113                 /* search for the limit of this run */
   2114                 while(++limit<length && levels[limit]==level) {}
   2115 
   2116                 /* get the correct level of the next run */
   2117                 if(limit<length) {
   2118                     nextLevel=levels[limit];
   2119                 } else {
   2120                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
   2121                 }
   2122 
   2123                 /* determine eor from max(level, nextLevel); sor is last run's eor */
   2124                 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
   2125                     eor=GET_LR_FROM_LEVEL(nextLevel);
   2126                 } else {
   2127                     eor=GET_LR_FROM_LEVEL(level);
   2128                 }
   2129 
   2130                 /* if the run consists of overridden directional types, then there
   2131                    are no implicit types to be resolved */
   2132                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
   2133                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
   2134                 } else {
   2135                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
   2136                     do {
   2137                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
   2138                     } while(start<limit);
   2139                 }
   2140             } while(limit<length);
   2141         }
   2142         /* check if we got any memory shortage while adding insert points */
   2143         if (U_FAILURE(pBiDi->insertPoints.errorCode))
   2144         {
   2145             *pErrorCode=pBiDi->insertPoints.errorCode;
   2146             return;
   2147         }
   2148         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
   2149         adjustWSLevels(pBiDi);
   2150         break;
   2151     }
   2152     /* add RLM for inverse Bidi with contextual orientation resolving
   2153      * to RTL which would not round-trip otherwise
   2154      */
   2155     if((pBiDi->defaultParaLevel>0) &&
   2156        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
   2157        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
   2158         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
   2159         int32_t i, j, start, last;
   2160         DirProp dirProp;
   2161         for(i=0; i<pBiDi->paraCount; i++) {
   2162             last=pBiDi->paras[i]-1;
   2163             if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
   2164                 continue;           /* LTR paragraph */
   2165             }
   2166             start= i==0 ? 0 : pBiDi->paras[i - 1];
   2167             for(j=last; j>=start; j--) {
   2168                 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
   2169                 if(dirProp==L) {
   2170                     if(j<last) {
   2171                         while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
   2172                             last--;
   2173                         }
   2174                     }
   2175                     addPoint(pBiDi, last, RLM_BEFORE);
   2176                     break;
   2177                 }
   2178                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
   2179                     break;
   2180                 }
   2181             }
   2182         }
   2183     }
   2184 
   2185     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
   2186         pBiDi->resultLength -= pBiDi->controlCount;
   2187     } else {
   2188         pBiDi->resultLength += pBiDi->insertPoints.size;
   2189     }
   2190     setParaSuccess(pBiDi);              /* mark successful setPara */
   2191 }
   2192 
   2193 U_CAPI void U_EXPORT2
   2194 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
   2195     if(pBiDi!=NULL) {
   2196         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
   2197     }
   2198 }
   2199 
   2200 U_CAPI UBool U_EXPORT2
   2201 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
   2202     if(pBiDi!=NULL) {
   2203         return pBiDi->orderParagraphsLTR;
   2204     } else {
   2205         return FALSE;
   2206     }
   2207 }
   2208 
   2209 U_CAPI UBiDiDirection U_EXPORT2
   2210 ubidi_getDirection(const UBiDi *pBiDi) {
   2211     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2212         return pBiDi->direction;
   2213     } else {
   2214         return UBIDI_LTR;
   2215     }
   2216 }
   2217 
   2218 U_CAPI const UChar * U_EXPORT2
   2219 ubidi_getText(const UBiDi *pBiDi) {
   2220     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2221         return pBiDi->text;
   2222     } else {
   2223         return NULL;
   2224     }
   2225 }
   2226 
   2227 U_CAPI int32_t U_EXPORT2
   2228 ubidi_getLength(const UBiDi *pBiDi) {
   2229     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2230         return pBiDi->originalLength;
   2231     } else {
   2232         return 0;
   2233     }
   2234 }
   2235 
   2236 U_CAPI int32_t U_EXPORT2
   2237 ubidi_getProcessedLength(const UBiDi *pBiDi) {
   2238     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2239         return pBiDi->length;
   2240     } else {
   2241         return 0;
   2242     }
   2243 }
   2244 
   2245 U_CAPI int32_t U_EXPORT2
   2246 ubidi_getResultLength(const UBiDi *pBiDi) {
   2247     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2248         return pBiDi->resultLength;
   2249     } else {
   2250         return 0;
   2251     }
   2252 }
   2253 
   2254 /* paragraphs API functions ------------------------------------------------- */
   2255 
   2256 U_CAPI UBiDiLevel U_EXPORT2
   2257 ubidi_getParaLevel(const UBiDi *pBiDi) {
   2258     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2259         return pBiDi->paraLevel;
   2260     } else {
   2261         return 0;
   2262     }
   2263 }
   2264 
   2265 U_CAPI int32_t U_EXPORT2
   2266 ubidi_countParagraphs(UBiDi *pBiDi) {
   2267     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
   2268         return 0;
   2269     } else {
   2270         return pBiDi->paraCount;
   2271     }
   2272 }
   2273 
   2274 U_CAPI void U_EXPORT2
   2275 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
   2276                           int32_t *pParaStart, int32_t *pParaLimit,
   2277                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2278     int32_t paraStart;
   2279 
   2280     /* check the argument values */
   2281     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2282     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
   2283     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
   2284 
   2285     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2286     if(paraIndex) {
   2287         paraStart=pBiDi->paras[paraIndex-1];
   2288     } else {
   2289         paraStart=0;
   2290     }
   2291     if(pParaStart!=NULL) {
   2292         *pParaStart=paraStart;
   2293     }
   2294     if(pParaLimit!=NULL) {
   2295         *pParaLimit=pBiDi->paras[paraIndex];
   2296     }
   2297     if(pParaLevel!=NULL) {
   2298         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
   2299     }
   2300 }
   2301 
   2302 U_CAPI int32_t U_EXPORT2
   2303 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
   2304                           int32_t *pParaStart, int32_t *pParaLimit,
   2305                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2306     uint32_t paraIndex;
   2307 
   2308     /* check the argument values */
   2309     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
   2310     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
   2311     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
   2312     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2313     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
   2314 
   2315     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
   2316     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
   2317     return paraIndex;
   2318 }
   2319 
   2320 U_CAPI void U_EXPORT2
   2321 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
   2322                        const void *newContext, UBiDiClassCallback **oldFn,
   2323                        const void **oldContext, UErrorCode *pErrorCode)
   2324 {
   2325     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2326     if(pBiDi==NULL) {
   2327         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2328         return;
   2329     }
   2330     if( oldFn )
   2331     {
   2332         *oldFn = pBiDi->fnClassCallback;
   2333     }
   2334     if( oldContext )
   2335     {
   2336         *oldContext = pBiDi->coClassCallback;
   2337     }
   2338     pBiDi->fnClassCallback = newFn;
   2339     pBiDi->coClassCallback = newContext;
   2340 }
   2341 
   2342 U_CAPI void U_EXPORT2
   2343 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
   2344 {
   2345     if(pBiDi==NULL) {
   2346         return;
   2347     }
   2348     if( fn )
   2349     {
   2350         *fn = pBiDi->fnClassCallback;
   2351     }
   2352     if( context )
   2353     {
   2354         *context = pBiDi->coClassCallback;
   2355     }
   2356 }
   2357 
   2358 U_CAPI UCharDirection U_EXPORT2
   2359 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
   2360 {
   2361     UCharDirection dir;
   2362 
   2363     if( pBiDi->fnClassCallback == NULL ||
   2364         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
   2365     {
   2366         return ubidi_getClass(pBiDi->bdp, c);
   2367     } else {
   2368         return dir;
   2369     }
   2370 }
   2371 
   2372