1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidi.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999jul27 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #include "cmemory.h" 18 #include "unicode/utypes.h" 19 #include "unicode/ustring.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ubidi.h" 22 #include "ubidi_props.h" 23 #include "ubidiimp.h" 24 #include "uassert.h" 25 26 /* 27 * General implementation notes: 28 * 29 * Throughout the implementation, there are comments like (W2) that refer to 30 * rules of the BiDi algorithm in its version 5, in this example to the second 31 * rule of the resolution of weak types. 32 * 33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) 34 * character according to UTF-16, the second UChar gets the directional property of 35 * the entire character assigned, while the first one gets a BN, a boundary 36 * neutral, type, which is ignored by most of the algorithm according to 37 * rule (X9) and the implementation suggestions of the BiDi algorithm. 38 * 39 * Later, adjustWSLevels() will set the level for each BN to that of the 40 * following character (UChar), which results in surrogate pairs getting the 41 * same level on each of their surrogates. 42 * 43 * In a UTF-8 implementation, the same thing could be done: the last byte of 44 * a multi-byte sequence would get the "real" property, while all previous 45 * bytes of that sequence would get BN. 46 * 47 * It is not possible to assign all those parts of a character the same real 48 * property because this would fail in the resolution of weak types with rules 49 * that look at immediately surrounding types. 50 * 51 * As a related topic, this implementation does not remove Boundary Neutral 52 * types from the input, but ignores them wherever this is relevant. 53 * For example, the loop for the resolution of the weak types reads 54 * types until it finds a non-BN. 55 * Also, explicit embedding codes are neither changed into BN nor removed. 56 * They are only treated the same way real BNs are. 57 * As stated before, adjustWSLevels() takes care of them at the end. 58 * For the purpose of conformance, the levels of all these codes 59 * do not matter. 60 * 61 * Note that this implementation never modifies the dirProps 62 * after the initial setup. 63 * 64 * 65 * In this implementation, the resolution of weak types (Wn), 66 * neutrals (Nn), and the assignment of the resolved level (In) 67 * are all done in one single loop, in resolveImplicitLevels(). 68 * Changes of dirProp values are done on the fly, without writing 69 * them back to the dirProps array. 70 * 71 * 72 * This implementation contains code that allows to bypass steps of the 73 * algorithm that are not needed on the specific paragraph 74 * in order to speed up the most common cases considerably, 75 * like text that is entirely LTR, or RTL text without numbers. 76 * 77 * Most of this is done by setting a bit for each directional property 78 * in a flags variable and later checking for whether there are 79 * any LTR characters or any RTL characters, or both, whether 80 * there are any explicit embedding codes, etc. 81 * 82 * If the (Xn) steps are performed, then the flags are re-evaluated, 83 * because they will then not contain the embedding codes any more 84 * and will be adjusted for override codes, so that subsequently 85 * more bypassing may be possible than what the initial flags suggested. 86 * 87 * If the text is not mixed-directional, then the 88 * algorithm steps for the weak type resolution are not performed, 89 * and all levels are set to the paragraph level. 90 * 91 * If there are no explicit embedding codes, then the (Xn) steps 92 * are not performed. 93 * 94 * If embedding levels are supplied as a parameter, then all 95 * explicit embedding codes are ignored, and the (Xn) steps 96 * are not performed. 97 * 98 * White Space types could get the level of the run they belong to, 99 * and are checked with a test of (flags&MASK_EMBEDDING) to 100 * consider if the paragraph direction should be considered in 101 * the flags variable. 102 * 103 * If there are no White Space types in the paragraph, then 104 * (L1) is not necessary in adjustWSLevels(). 105 */ 106 107 /* to avoid some conditional statements, use tiny constant arrays */ 108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; 109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; 110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; 111 112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] 113 #define DIRPROP_FLAG_E(level) flagE[(level)&1] 114 #define DIRPROP_FLAG_O(level) flagO[(level)&1] 115 116 /* UBiDi object management -------------------------------------------------- */ 117 118 U_CAPI UBiDi * U_EXPORT2 119 ubidi_open(void) 120 { 121 UErrorCode errorCode=U_ZERO_ERROR; 122 return ubidi_openSized(0, 0, &errorCode); 123 } 124 125 U_CAPI UBiDi * U_EXPORT2 126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { 127 UBiDi *pBiDi; 128 129 /* check the argument values */ 130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 131 return NULL; 132 } else if(maxLength<0 || maxRunCount<0) { 133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 134 return NULL; /* invalid arguments */ 135 } 136 137 /* allocate memory for the object */ 138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); 139 if(pBiDi==NULL) { 140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 141 return NULL; 142 } 143 144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ 145 uprv_memset(pBiDi, 0, sizeof(UBiDi)); 146 147 /* get BiDi properties */ 148 pBiDi->bdp=ubidi_getSingleton(); 149 150 /* allocate memory for arrays as requested */ 151 if(maxLength>0) { 152 if( !getInitialDirPropsMemory(pBiDi, maxLength) || 153 !getInitialLevelsMemory(pBiDi, maxLength) 154 ) { 155 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 156 } 157 } else { 158 pBiDi->mayAllocateText=TRUE; 159 } 160 161 if(maxRunCount>0) { 162 if(maxRunCount==1) { 163 /* use simpleRuns[] */ 164 pBiDi->runsSize=sizeof(Run); 165 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { 166 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 167 } 168 } else { 169 pBiDi->mayAllocateRuns=TRUE; 170 } 171 172 if(U_SUCCESS(*pErrorCode)) { 173 return pBiDi; 174 } else { 175 ubidi_close(pBiDi); 176 return NULL; 177 } 178 } 179 180 /* 181 * We are allowed to allocate memory if memory==NULL or 182 * mayAllocate==TRUE for each array that we need. 183 * We also try to grow memory as needed if we 184 * allocate it. 185 * 186 * Assume sizeNeeded>0. 187 * If *pMemory!=NULL, then assume *pSize>0. 188 * 189 * ### this realloc() may unnecessarily copy the old data, 190 * which we know we don't need any more; 191 * is this the best way to do this?? 192 */ 193 U_CFUNC UBool 194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { 195 void **pMemory = (void **)bidiMem; 196 /* check for existing memory */ 197 if(*pMemory==NULL) { 198 /* we need to allocate memory */ 199 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { 200 *pSize=sizeNeeded; 201 return TRUE; 202 } else { 203 return FALSE; 204 } 205 } else { 206 if(sizeNeeded<=*pSize) { 207 /* there is already enough memory */ 208 return TRUE; 209 } 210 else if(!mayAllocate) { 211 /* not enough memory, and we must not allocate */ 212 return FALSE; 213 } else { 214 /* we try to grow */ 215 void *memory; 216 /* in most cases, we do not need the copy-old-data part of 217 * realloc, but it is needed when adding runs using getRunsMemory() 218 * in setParaRunsOnly() 219 */ 220 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { 221 *pMemory=memory; 222 *pSize=sizeNeeded; 223 return TRUE; 224 } else { 225 /* we failed to grow */ 226 return FALSE; 227 } 228 } 229 } 230 } 231 232 U_CAPI void U_EXPORT2 233 ubidi_close(UBiDi *pBiDi) { 234 if(pBiDi!=NULL) { 235 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ 236 if(pBiDi->dirPropsMemory!=NULL) { 237 uprv_free(pBiDi->dirPropsMemory); 238 } 239 if(pBiDi->levelsMemory!=NULL) { 240 uprv_free(pBiDi->levelsMemory); 241 } 242 if(pBiDi->runsMemory!=NULL) { 243 uprv_free(pBiDi->runsMemory); 244 } 245 if(pBiDi->parasMemory!=NULL) { 246 uprv_free(pBiDi->parasMemory); 247 } 248 if(pBiDi->insertPoints.points!=NULL) { 249 uprv_free(pBiDi->insertPoints.points); 250 } 251 252 uprv_free(pBiDi); 253 } 254 } 255 256 /* set to approximate "inverse BiDi" ---------------------------------------- */ 257 258 U_CAPI void U_EXPORT2 259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { 260 if(pBiDi!=NULL) { 261 pBiDi->isInverse=isInverse; 262 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L 263 : UBIDI_REORDER_DEFAULT; 264 } 265 } 266 267 U_CAPI UBool U_EXPORT2 268 ubidi_isInverse(UBiDi *pBiDi) { 269 if(pBiDi!=NULL) { 270 return pBiDi->isInverse; 271 } else { 272 return FALSE; 273 } 274 } 275 276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 277 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre 278 * concept of RUNS_ONLY which is a double operation. 279 * It could be advantageous to divide this into 3 concepts: 280 * a) Operation: direct / inverse / RUNS_ONLY 281 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R 282 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 283 * This would allow combinations not possible today like RUNS_ONLY with 284 * NUMBERS_SPECIAL. 285 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 286 * REMOVE_CONTROLS for the inverse step. 287 * Not all combinations would be supported, and probably not all do make sense. 288 * This would need to document which ones are supported and what are the 289 * fallbacks for unsupported combinations. 290 */ 291 U_CAPI void U_EXPORT2 292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { 293 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) 294 && (reorderingMode < UBIDI_REORDER_COUNT)) { 295 pBiDi->reorderingMode = reorderingMode; 296 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); 297 } 298 } 299 300 U_CAPI UBiDiReorderingMode U_EXPORT2 301 ubidi_getReorderingMode(UBiDi *pBiDi) { 302 if (pBiDi!=NULL) { 303 return pBiDi->reorderingMode; 304 } else { 305 return UBIDI_REORDER_DEFAULT; 306 } 307 } 308 309 U_CAPI void U_EXPORT2 310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { 311 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 312 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 313 } 314 if (pBiDi!=NULL) { 315 pBiDi->reorderingOptions=reorderingOptions; 316 } 317 } 318 319 U_CAPI uint32_t U_EXPORT2 320 ubidi_getReorderingOptions(UBiDi *pBiDi) { 321 if (pBiDi!=NULL) { 322 return pBiDi->reorderingOptions; 323 } else { 324 return 0; 325 } 326 } 327 328 U_CAPI UBiDiDirection U_EXPORT2 329 ubidi_getBaseDirection(const UChar *text, 330 int32_t length){ 331 332 int32_t i; 333 UChar32 uchar; 334 UCharDirection dir; 335 336 if( text==NULL || length<-1 ){ 337 return UBIDI_NEUTRAL; 338 } 339 340 if(length==-1) { 341 length=u_strlen(text); 342 } 343 344 for( i = 0 ; i < length; ) { 345 /* i is incremented by U16_NEXT */ 346 U16_NEXT(text, i, length, uchar); 347 dir = u_charDirection(uchar); 348 if( dir == U_LEFT_TO_RIGHT ) 349 return UBIDI_LTR; 350 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) 351 return UBIDI_RTL; 352 } 353 return UBIDI_NEUTRAL; 354 } 355 356 /* perform (P2)..(P3) ------------------------------------------------------- */ 357 358 static DirProp 359 firstL_R_AL(UBiDi *pBiDi) { 360 /* return first strong char after the last B in prologue if any */ 361 const UChar *text=pBiDi->prologue; 362 int32_t length=pBiDi->proLength; 363 int32_t i; 364 UChar32 uchar; 365 DirProp dirProp, result=ON; 366 for(i=0; i<length; ) { 367 /* i is incremented by U16_NEXT */ 368 U16_NEXT(text, i, length, uchar); 369 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 370 if(result==ON) { 371 if(dirProp==L || dirProp==R || dirProp==AL) { 372 result=dirProp; 373 } 374 } else { 375 if(dirProp==B) { 376 result=ON; 377 } 378 } 379 } 380 return result; 381 } 382 383 /* 384 * Get the directional properties for the text, 385 * calculate the flags bit-set, and 386 * determine the paragraph level if necessary. 387 */ 388 static void 389 getDirProps(UBiDi *pBiDi) { 390 const UChar *text=pBiDi->text; 391 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ 392 393 int32_t i=0, i1, length=pBiDi->originalLength; 394 Flags flags=0; /* collect all directionalities in the text */ 395 UChar32 uchar; 396 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */ 397 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); 398 /* for inverse BiDi, the default para level is set to RTL if there is a 399 strong R or AL character at either end of the text */ 400 UBool isDefaultLevelInverse=isDefaultLevel && (UBool) 401 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 402 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); 403 int32_t lastArabicPos=-1; 404 int32_t controlCount=0; 405 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & 406 UBIDI_OPTION_REMOVE_CONTROLS); 407 408 typedef enum { 409 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ 410 LOOKING_FOR_STRONG, /* 1: looking for first strong char */ 411 FOUND_STRONG_CHAR /* 2: found first strong char */ 412 } State; 413 State state; 414 int32_t paraStart=0; /* index of first char in paragraph */ 415 DirProp paraDir; /* == CONTEXT_RTL within paragraphs 416 starting with strong R char */ 417 DirProp lastStrongDir=0; /* for default level & inverse BiDi */ 418 int32_t lastStrongLTR=0; /* for STREAMING option */ 419 420 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 421 pBiDi->length=0; 422 lastStrongLTR=0; 423 } 424 if(isDefaultLevel) { 425 DirProp lastStrong; 426 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; 427 if(pBiDi->proLength>0 && 428 (lastStrong=firstL_R_AL(pBiDi))!=ON) { 429 paraDir=(lastStrong==L) ? 0 : CONTEXT_RTL; 430 state=FOUND_STRONG_CHAR; 431 } else { 432 paraDir=paraDirDefault; 433 state=LOOKING_FOR_STRONG; 434 } 435 lastStrongDir=paraDir; 436 } else { 437 state=NOT_CONTEXTUAL; 438 paraDir=0; 439 } 440 /* count paragraphs and determine the paragraph level (P2..P3) */ 441 /* 442 * see comment in ubidi.h: 443 * the DEFAULT_XXX values are designed so that 444 * their bit 0 alone yields the intended default 445 */ 446 for( /* i=0 above */ ; i<length; ) { 447 /* i is incremented by U16_NEXT */ 448 U16_NEXT(text, i, length, uchar); 449 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); 450 dirProps[i-1]=dirProp|paraDir; 451 if(uchar>0xffff) { /* set the lead surrogate's property to BN */ 452 flags|=DIRPROP_FLAG(BN); 453 dirProps[i-2]=(DirProp)(BN|paraDir); 454 } 455 if(state==LOOKING_FOR_STRONG) { 456 if(dirProp==L) { 457 state=FOUND_STRONG_CHAR; 458 if(paraDir) { 459 paraDir=0; 460 for(i1=paraStart; i1<i; i1++) { 461 dirProps[i1]&=~CONTEXT_RTL; 462 } 463 } 464 continue; 465 } 466 if(dirProp==R || dirProp==AL) { 467 state=FOUND_STRONG_CHAR; 468 if(paraDir==0) { 469 paraDir=CONTEXT_RTL; 470 for(i1=paraStart; i1<i; i1++) { 471 dirProps[i1]|=CONTEXT_RTL; 472 } 473 } 474 continue; 475 } 476 } 477 if(dirProp==L) { 478 lastStrongDir=0; 479 lastStrongLTR=i; /* i is index to next character */ 480 } 481 else if(dirProp==R) { 482 lastStrongDir=CONTEXT_RTL; 483 } 484 else if(dirProp==AL) { 485 lastStrongDir=CONTEXT_RTL; 486 lastArabicPos=i-1; 487 } 488 else if(dirProp==B) { 489 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 490 pBiDi->length=i; /* i is index to next character */ 491 } 492 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { 493 for( ; paraStart<i; paraStart++) { 494 dirProps[paraStart]|=CONTEXT_RTL; 495 } 496 } 497 if(i<length) { /* B not last char in text */ 498 if(!((uchar==CR) && (text[i]==LF))) { 499 pBiDi->paraCount++; 500 } 501 if(isDefaultLevel) { 502 state=LOOKING_FOR_STRONG; 503 paraStart=i; /* i is index to next character */ 504 paraDir=paraDirDefault; 505 lastStrongDir=paraDirDefault; 506 } 507 } 508 } 509 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) { 510 controlCount++; 511 } 512 } 513 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { 514 for(i1=paraStart; i1<length; i1++) { 515 dirProps[i1]|=CONTEXT_RTL; 516 } 517 } 518 if(isDefaultLevel) { 519 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0); 520 } 521 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 522 if((lastStrongLTR>pBiDi->length) && 523 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) { 524 pBiDi->length = lastStrongLTR; 525 } 526 if(pBiDi->length<pBiDi->originalLength) { 527 pBiDi->paraCount--; 528 } 529 } 530 /* The following line does nothing new for contextual paraLevel, but is 531 needed for absolute paraLevel. */ 532 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 533 534 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 535 flags|=DIRPROP_FLAG(L); 536 } 537 538 pBiDi->controlCount = controlCount; 539 pBiDi->flags=flags; 540 pBiDi->lastArabicPos=lastArabicPos; 541 } 542 543 /* perform (X1)..(X9) ------------------------------------------------------- */ 544 545 /* determine if the text is mixed-directional or single-directional */ 546 static UBiDiDirection 547 directionFromFlags(UBiDi *pBiDi) { 548 Flags flags=pBiDi->flags; 549 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 550 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { 551 return UBIDI_LTR; 552 } else if(!(flags&MASK_LTR)) { 553 return UBIDI_RTL; 554 } else { 555 return UBIDI_MIXED; 556 } 557 } 558 559 /* 560 * Resolve the explicit levels as specified by explicit embedding codes. 561 * Recalculate the flags to have them reflect the real properties 562 * after taking the explicit embeddings into account. 563 * 564 * The BiDi algorithm is designed to result in the same behavior whether embedding 565 * levels are externally specified (from "styled text", supposedly the preferred 566 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. 567 * That is why (X9) instructs to remove all explicit codes (and BN). 568 * However, in a real implementation, this removal of these codes and their index 569 * positions in the plain text is undesirable since it would result in 570 * reallocated, reindexed text. 571 * Instead, this implementation leaves the codes in there and just ignores them 572 * in the subsequent processing. 573 * In order to get the same reordering behavior, positions with a BN or an 574 * explicit embedding code just get the same level assigned as the last "real" 575 * character. 576 * 577 * Some implementations, not this one, then overwrite some of these 578 * directionality properties at "real" same-level-run boundaries by 579 * L or R codes so that the resolution of weak types can be performed on the 580 * entire paragraph at once instead of having to parse it once more and 581 * perform that resolution on same-level-runs. 582 * This limits the scope of the implicit rules in effectively 583 * the same way as the run limits. 584 * 585 * Instead, this implementation does not modify these codes. 586 * On one hand, the paragraph has to be scanned for same-level-runs, but 587 * on the other hand, this saves another loop to reset these codes, 588 * or saves making and modifying a copy of dirProps[]. 589 * 590 * 591 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. 592 * 593 * 594 * Handling the stack of explicit levels (Xn): 595 * 596 * With the BiDi stack of explicit levels, 597 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, 598 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. 599 * 600 * In order to have a correct push-pop semantics even in the case of overflows, 601 * there are two overflow counters: 602 * - countOver60 is incremented with each LRx at level 60 603 * - from level 60, one RLx increases the level to 61 604 * - countOver61 is incremented with each LRx and RLx at level 61 605 * 606 * Popping levels with PDF must work in the opposite order so that level 61 607 * is correct at the correct point. Underflows (too many PDFs) must be checked. 608 * 609 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 610 */ 611 static UBiDiDirection 612 resolveExplicitLevels(UBiDi *pBiDi) { 613 const DirProp *dirProps=pBiDi->dirProps; 614 UBiDiLevel *levels=pBiDi->levels; 615 const UChar *text=pBiDi->text; 616 617 int32_t i=0, length=pBiDi->length; 618 Flags flags=pBiDi->flags; /* collect all directionalities in the text */ 619 DirProp dirProp; 620 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); 621 622 UBiDiDirection direction; 623 int32_t paraIndex=0; 624 625 /* determine if the text is mixed-directional or single-directional */ 626 direction=directionFromFlags(pBiDi); 627 628 /* we may not need to resolve any explicit levels, but for multiple 629 paragraphs we want to loop on all chars to set the para boundaries */ 630 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) { 631 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 632 } else if((pBiDi->paraCount==1) && 633 (!(flags&MASK_EXPLICIT) || 634 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) { 635 /* mixed, but all characters are at the same embedding level */ 636 /* or we are in "inverse BiDi" */ 637 /* and we don't have contextual multiple paragraphs with some B char */ 638 /* set all levels to the paragraph level */ 639 for(i=0; i<length; ++i) { 640 levels[i]=level; 641 } 642 } else { 643 /* continue to perform (Xn) */ 644 645 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 646 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ 647 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0; 648 649 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */ 650 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ 651 652 /* recalculate the flags */ 653 flags=0; 654 655 for(i=0; i<length; ++i) { 656 dirProp=NO_CONTEXT_RTL(dirProps[i]); 657 switch(dirProp) { 658 case LRE: 659 case LRO: 660 /* (X3, X5) */ 661 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 662 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { 663 stack[stackTop]=embeddingLevel; 664 ++stackTop; 665 embeddingLevel=newLevel; 666 if(dirProp==LRO) { 667 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 668 } 669 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE 670 since this has already been done for newLevel which is 671 the source for embeddingLevel. 672 */ 673 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) { 674 ++countOver61; 675 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ { 676 ++countOver60; 677 } 678 flags|=DIRPROP_FLAG(BN); 679 break; 680 case RLE: 681 case RLO: 682 /* (X2, X4) */ 683 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 684 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { 685 stack[stackTop]=embeddingLevel; 686 ++stackTop; 687 embeddingLevel=newLevel; 688 if(dirProp==RLO) { 689 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 690 } 691 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE 692 since this has already been done for newLevel which is 693 the source for embeddingLevel. 694 */ 695 } else { 696 ++countOver61; 697 } 698 flags|=DIRPROP_FLAG(BN); 699 break; 700 case PDF: 701 /* (X7) */ 702 /* handle all the overflow cases first */ 703 if(countOver61>0) { 704 --countOver61; 705 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) { 706 /* handle LRx overflows from level 60 */ 707 --countOver60; 708 } else if(stackTop>0) { 709 /* this is the pop operation; it also pops level 61 while countOver60>0 */ 710 --stackTop; 711 embeddingLevel=stack[stackTop]; 712 /* } else { (underflow) */ 713 } 714 flags|=DIRPROP_FLAG(BN); 715 break; 716 case B: 717 stackTop=0; 718 countOver60=countOver61=0; 719 level=GET_PARALEVEL(pBiDi, i); 720 if((i+1)<length) { 721 embeddingLevel=GET_PARALEVEL(pBiDi, i+1); 722 if(!((text[i]==CR) && (text[i+1]==LF))) { 723 pBiDi->paras[paraIndex++]=i+1; 724 } 725 } 726 flags|=DIRPROP_FLAG(B); 727 break; 728 case BN: 729 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 730 /* they will get their levels set correctly in adjustWSLevels() */ 731 flags|=DIRPROP_FLAG(BN); 732 break; 733 default: 734 /* all other types get the "real" level */ 735 if(level!=embeddingLevel) { 736 level=embeddingLevel; 737 if(level&UBIDI_LEVEL_OVERRIDE) { 738 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; 739 } else { 740 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; 741 } 742 } 743 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 744 flags|=DIRPROP_FLAG(dirProp); 745 } 746 break; 747 } 748 749 /* 750 * We need to set reasonable levels even on BN codes and 751 * explicit codes because we will later look at same-level runs (X10). 752 */ 753 levels[i]=level; 754 } 755 if(flags&MASK_EMBEDDING) { 756 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 757 } 758 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 759 flags|=DIRPROP_FLAG(L); 760 } 761 762 /* subsequently, ignore the explicit codes and BN (X9) */ 763 764 /* again, determine if the text is mixed-directional or single-directional */ 765 pBiDi->flags=flags; 766 direction=directionFromFlags(pBiDi); 767 } 768 769 return direction; 770 } 771 772 /* 773 * Use a pre-specified embedding levels array: 774 * 775 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 776 * ignore all explicit codes (X9), 777 * and check all the preset levels. 778 * 779 * Recalculate the flags to have them reflect the real properties 780 * after taking the explicit embeddings into account. 781 */ 782 static UBiDiDirection 783 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 784 const DirProp *dirProps=pBiDi->dirProps; 785 DirProp dirProp; 786 UBiDiLevel *levels=pBiDi->levels; 787 const UChar *text=pBiDi->text; 788 789 int32_t i, length=pBiDi->length; 790 Flags flags=0; /* collect all directionalities in the text */ 791 UBiDiLevel level; 792 uint32_t paraIndex=0; 793 794 for(i=0; i<length; ++i) { 795 level=levels[i]; 796 dirProp=NO_CONTEXT_RTL(dirProps[i]); 797 if(level&UBIDI_LEVEL_OVERRIDE) { 798 /* keep the override flag in levels[i] but adjust the flags */ 799 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ 800 flags|=DIRPROP_FLAG_O(level); 801 } else { 802 /* set the flags */ 803 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); 804 } 805 if((level<GET_PARALEVEL(pBiDi, i) && 806 !((0==level)&&(dirProp==B))) || 807 (UBIDI_MAX_EXPLICIT_LEVEL<level)) { 808 /* level out of bounds */ 809 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 810 return UBIDI_LTR; 811 } 812 if((dirProp==B) && ((i+1)<length)) { 813 if(!((text[i]==CR) && (text[i+1]==LF))) { 814 pBiDi->paras[paraIndex++]=i+1; 815 } 816 } 817 } 818 if(flags&MASK_EMBEDDING) { 819 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 820 } 821 822 /* determine if the text is mixed-directional or single-directional */ 823 pBiDi->flags=flags; 824 return directionFromFlags(pBiDi); 825 } 826 827 /****************************************************************** 828 The Properties state machine table 829 ******************************************************************* 830 831 All table cells are 8 bits: 832 bits 0..4: next state 833 bits 5..7: action to perform (if > 0) 834 835 Cells may be of format "n" where n represents the next state 836 (except for the rightmost column). 837 Cells may also be of format "s(x,y)" where x represents an action 838 to perform and y represents the next state. 839 840 ******************************************************************* 841 Definitions and type for properties state table 842 ******************************************************************* 843 */ 844 #define IMPTABPROPS_COLUMNS 14 845 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) 846 #define GET_STATEPROPS(cell) ((cell)&0x1f) 847 #define GET_ACTIONPROPS(cell) ((cell)>>5) 848 #define s(action, newState) ((uint8_t)(newState+(action<<5))) 849 850 static const uint8_t groupProp[] = /* dirProp regrouped */ 851 { 852 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ 853 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 854 }; 855 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ 856 857 /****************************************************************** 858 859 PROPERTIES STATE TABLE 860 861 In table impTabProps, 862 - the ON column regroups ON and WS 863 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF 864 - the Res column is the reduced property assigned to a run 865 866 Action 1: process current run1, init new run1 867 2: init new run2 868 3: process run1, process run2, init new run1 869 4: process run1, set run1=run2, init new run2 870 871 Notes: 872 1) This table is used in resolveImplicitLevels(). 873 2) This table triggers actions when there is a change in the Bidi 874 property of incoming characters (action 1). 875 3) Most such property sequences are processed immediately (in 876 fact, passed to processPropertySeq(). 877 4) However, numbers are assembled as one sequence. This means 878 that undefined situations (like CS following digits, until 879 it is known if the next char will be a digit) are held until 880 following chars define them. 881 Example: digits followed by CS, then comes another CS or ON; 882 the digits will be processed, then the CS assigned 883 as the start of an ON sequence (action 3). 884 5) There are cases where more than one sequence must be 885 processed, for instance digits followed by CS followed by L: 886 the digits must be processed as one sequence, and the CS 887 must be processed as an ON sequence, all this before starting 888 assembling chars for the opening L sequence. 889 890 891 */ 892 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = 893 { 894 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */ 895 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON }, 896 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L }, 897 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R }, 898 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R }, 899 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN }, 900 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN }, 901 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN }, 902 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON }, 903 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON }, 904 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON }, 905 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN }, 906 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN }, 907 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN }, 908 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN }, 909 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON }, 910 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S }, 911 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S }, 912 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B } 913 }; 914 915 /* we must undef macro s because the levels table have a different 916 * structure (4 bits for action and 4 bits for next state. 917 */ 918 #undef s 919 920 /****************************************************************** 921 The levels state machine tables 922 ******************************************************************* 923 924 All table cells are 8 bits: 925 bits 0..3: next state 926 bits 4..7: action to perform (if > 0) 927 928 Cells may be of format "n" where n represents the next state 929 (except for the rightmost column). 930 Cells may also be of format "s(x,y)" where x represents an action 931 to perform and y represents the next state. 932 933 This format limits each table to 16 states each and to 15 actions. 934 935 ******************************************************************* 936 Definitions and type for levels state tables 937 ******************************************************************* 938 */ 939 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) 940 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) 941 #define GET_STATE(cell) ((cell)&0x0f) 942 #define GET_ACTION(cell) ((cell)>>4) 943 #define s(action, newState) ((uint8_t)(newState+(action<<4))) 944 945 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; 946 typedef uint8_t ImpAct[]; 947 948 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, 949 * instead of having a pair of ImpTab and a pair of ImpAct. 950 */ 951 typedef struct ImpTabPair { 952 const void * pImpTab[2]; 953 const void * pImpAct[2]; 954 } ImpTabPair; 955 956 /****************************************************************** 957 958 LEVELS STATE TABLES 959 960 In all levels state tables, 961 - state 0 is the initial state 962 - the Res column is the increment to add to the text level 963 for this property sequence. 964 965 The impAct arrays for each table of a pair map the local action 966 numbers of the table to the total list of actions. For instance, 967 action 2 in a given table corresponds to the action number which 968 appears in entry [2] of the impAct array for that table. 969 The first entry of all impAct arrays must be 0. 970 971 Action 1: init conditional sequence 972 2: prepend conditional sequence to current sequence 973 3: set ON sequence to new level - 1 974 4: init EN/AN/ON sequence 975 5: fix EN/AN/ON sequence followed by R 976 6: set previous level sequence to level 2 977 978 Notes: 979 1) These tables are used in processPropertySeq(). The input 980 is property sequences as determined by resolveImplicitLevels. 981 2) Most such property sequences are processed immediately 982 (levels are assigned). 983 3) However, some sequences cannot be assigned a final level till 984 one or more following sequences are received. For instance, 985 ON following an R sequence within an even-level paragraph. 986 If the following sequence is R, the ON sequence will be 987 assigned basic run level+1, and so will the R sequence. 988 4) S is generally handled like ON, since its level will be fixed 989 to paragraph level in adjustWSLevels(). 990 991 */ 992 993 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ 994 /* In this table, conditional sequences receive the higher possible level 995 until proven otherwise. 996 */ 997 { 998 /* L , R , EN , AN , ON , S , B , Res */ 999 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, 1000 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, 1001 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, 1002 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, 1003 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, 1004 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } 1005 }; 1006 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ 1007 /* In this table, conditional sequences receive the lower possible level 1008 until proven otherwise. 1009 */ 1010 { 1011 /* L , R , EN , AN , ON , S , B , Res */ 1012 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1013 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, 1014 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1015 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, 1016 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, 1017 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } 1018 }; 1019 static const ImpAct impAct0 = {0,1,2,3,4,5,6}; 1020 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, 1021 &impTabR_DEFAULT}, 1022 {&impAct0, &impAct0}}; 1023 1024 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ 1025 /* In this table, conditional sequences receive the higher possible level 1026 until proven otherwise. 1027 */ 1028 { 1029 /* L , R , EN , AN , ON , S , B , Res */ 1030 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, 1031 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, 1032 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, 1033 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, 1034 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } 1035 }; 1036 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, 1037 &impTabR_DEFAULT}, 1038 {&impAct0, &impAct0}}; 1039 1040 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = 1041 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1042 until proven that there is L or sor/eor on both sides. AN is handled like EN. 1043 */ 1044 { 1045 /* L , R , EN , AN , ON , S , B , Res */ 1046 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, 1047 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, 1048 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, 1049 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, 1050 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, 1051 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } 1052 }; 1053 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = 1054 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1055 until proven that there is L on both sides. AN is handled like EN. 1056 */ 1057 { 1058 /* L , R , EN , AN , ON , S , B , Res */ 1059 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1060 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1061 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, 1062 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, 1063 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } 1064 }; 1065 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { 1066 {&impTabL_GROUP_NUMBERS_WITH_R, 1067 &impTabR_GROUP_NUMBERS_WITH_R}, 1068 {&impAct0, &impAct0}}; 1069 1070 1071 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = 1072 /* This table is identical to the Default LTR table except that EN and AN are 1073 handled like L. 1074 */ 1075 { 1076 /* L , R , EN , AN , ON , S , B , Res */ 1077 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, 1078 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, 1079 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, 1080 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, 1081 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, 1082 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } 1083 }; 1084 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = 1085 /* This table is identical to the Default RTL table except that EN and AN are 1086 handled like L. 1087 */ 1088 { 1089 /* L , R , EN , AN , ON , S , B , Res */ 1090 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1091 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, 1092 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1093 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, 1094 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, 1095 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } 1096 }; 1097 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { 1098 {&impTabL_INVERSE_NUMBERS_AS_L, 1099 &impTabR_INVERSE_NUMBERS_AS_L}, 1100 {&impAct0, &impAct0}}; 1101 1102 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ 1103 /* In this table, conditional sequences receive the lower possible level 1104 until proven otherwise. 1105 */ 1106 { 1107 /* L , R , EN , AN , ON , S , B , Res */ 1108 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1109 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, 1110 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1111 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, 1112 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, 1113 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, 1114 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } 1115 }; 1116 static const ImpAct impAct1 = {0,1,11,12}; 1117 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" 1118 */ 1119 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { 1120 {&impTabL_DEFAULT, 1121 &impTabR_INVERSE_LIKE_DIRECT}, 1122 {&impAct0, &impAct1}}; 1123 1124 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = 1125 /* The case handled in this table is (visually): R EN L 1126 */ 1127 { 1128 /* L , R , EN , AN , ON , S , B , Res */ 1129 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, 1130 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, 1131 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, 1132 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, 1133 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, 1134 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, 1135 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } 1136 }; 1137 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = 1138 /* The cases handled in this table are (visually): R EN L 1139 R L AN L 1140 */ 1141 { 1142 /* L , R , EN , AN , ON , S , B , Res */ 1143 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1144 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, 1145 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, 1146 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, 1147 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, 1148 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, 1149 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } 1150 }; 1151 static const ImpAct impAct2 = {0,1,7,8,9,10}; 1152 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { 1153 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1154 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1155 {&impAct0, &impAct2}}; 1156 1157 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { 1158 {&impTabL_NUMBERS_SPECIAL, 1159 &impTabR_INVERSE_LIKE_DIRECT}, 1160 {&impAct0, &impAct1}}; 1161 1162 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = 1163 /* The case handled in this table is (visually): R EN L 1164 */ 1165 { 1166 /* L , R , EN , AN , ON , S , B , Res */ 1167 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, 1168 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, 1169 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, 1170 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, 1171 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } 1172 }; 1173 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { 1174 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1175 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1176 {&impAct0, &impAct2}}; 1177 1178 #undef s 1179 1180 typedef struct { 1181 const ImpTab * pImpTab; /* level table pointer */ 1182 const ImpAct * pImpAct; /* action map array */ 1183 int32_t startON; /* start of ON sequence */ 1184 int32_t startL2EN; /* start of level 2 sequence */ 1185 int32_t lastStrongRTL; /* index of last found R or AL */ 1186 int32_t state; /* current state */ 1187 UBiDiLevel runLevel; /* run level before implicit solving */ 1188 } LevState; 1189 1190 /*------------------------------------------------------------------------*/ 1191 1192 static void 1193 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) 1194 /* param pos: position where to insert 1195 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1196 */ 1197 { 1198 #define FIRSTALLOC 10 1199 Point point; 1200 InsertPoints * pInsertPoints=&(pBiDi->insertPoints); 1201 1202 if (pInsertPoints->capacity == 0) 1203 { 1204 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); 1205 if (pInsertPoints->points == NULL) 1206 { 1207 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1208 return; 1209 } 1210 pInsertPoints->capacity=FIRSTALLOC; 1211 } 1212 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ 1213 { 1214 void * savePoints=pInsertPoints->points; 1215 pInsertPoints->points=uprv_realloc(pInsertPoints->points, 1216 pInsertPoints->capacity*2*sizeof(Point)); 1217 if (pInsertPoints->points == NULL) 1218 { 1219 pInsertPoints->points=savePoints; 1220 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1221 return; 1222 } 1223 else pInsertPoints->capacity*=2; 1224 } 1225 point.pos=pos; 1226 point.flag=flag; 1227 pInsertPoints->points[pInsertPoints->size]=point; 1228 pInsertPoints->size++; 1229 #undef FIRSTALLOC 1230 } 1231 1232 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1233 1234 /* 1235 * This implementation of the (Wn) rules applies all rules in one pass. 1236 * In order to do so, it needs a look-ahead of typically 1 character 1237 * (except for W5: sequences of ET) and keeps track of changes 1238 * in a rule Wp that affect a later Wq (p<q). 1239 * 1240 * The (Nn) and (In) rules are also performed in that same single loop, 1241 * but effectively one iteration behind for white space. 1242 * 1243 * Since all implicit rules are performed in one step, it is not necessary 1244 * to actually store the intermediate directional properties in dirProps[]. 1245 */ 1246 1247 static void 1248 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, 1249 int32_t start, int32_t limit) { 1250 uint8_t cell, oldStateSeq, actionSeq; 1251 const ImpTab * pImpTab=pLevState->pImpTab; 1252 const ImpAct * pImpAct=pLevState->pImpAct; 1253 UBiDiLevel * levels=pBiDi->levels; 1254 UBiDiLevel level, addLevel; 1255 InsertPoints * pInsertPoints; 1256 int32_t start0, k; 1257 1258 start0=start; /* save original start position */ 1259 oldStateSeq=(uint8_t)pLevState->state; 1260 cell=(*pImpTab)[oldStateSeq][_prop]; 1261 pLevState->state=GET_STATE(cell); /* isolate the new state */ 1262 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ 1263 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; 1264 1265 if(actionSeq) { 1266 switch(actionSeq) { 1267 case 1: /* init ON seq */ 1268 pLevState->startON=start0; 1269 break; 1270 1271 case 2: /* prepend ON seq to current seq */ 1272 start=pLevState->startON; 1273 break; 1274 1275 case 3: /* L or S after possible relevant EN/AN */ 1276 /* check if we had EN after R/AL */ 1277 if (pLevState->startL2EN >= 0) { 1278 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1279 } 1280 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ 1281 /* check if we had any relevant EN/AN after R/AL */ 1282 pInsertPoints=&(pBiDi->insertPoints); 1283 if ((pInsertPoints->capacity == 0) || 1284 (pInsertPoints->size <= pInsertPoints->confirmed)) 1285 { 1286 /* nothing, just clean up */ 1287 pLevState->lastStrongRTL=-1; 1288 /* check if we have a pending conditional segment */ 1289 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; 1290 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ 1291 start=pLevState->startON; /* reset to basic run level */ 1292 } 1293 if (_prop == DirProp_S) /* add LRM before S */ 1294 { 1295 addPoint(pBiDi, start0, LRM_BEFORE); 1296 pInsertPoints->confirmed=pInsertPoints->size; 1297 } 1298 break; 1299 } 1300 /* reset previous RTL cont to level for LTR text */ 1301 for (k=pLevState->lastStrongRTL+1; k<start0; k++) 1302 { 1303 /* reset odd level, leave runLevel+2 as is */ 1304 levels[k]=(levels[k] - 2) & ~1; 1305 } 1306 /* mark insert points as confirmed */ 1307 pInsertPoints->confirmed=pInsertPoints->size; 1308 pLevState->lastStrongRTL=-1; 1309 if (_prop == DirProp_S) /* add LRM before S */ 1310 { 1311 addPoint(pBiDi, start0, LRM_BEFORE); 1312 pInsertPoints->confirmed=pInsertPoints->size; 1313 } 1314 break; 1315 1316 case 4: /* R/AL after possible relevant EN/AN */ 1317 /* just clean up */ 1318 pInsertPoints=&(pBiDi->insertPoints); 1319 if (pInsertPoints->capacity > 0) 1320 /* remove all non confirmed insert points */ 1321 pInsertPoints->size=pInsertPoints->confirmed; 1322 pLevState->startON=-1; 1323 pLevState->startL2EN=-1; 1324 pLevState->lastStrongRTL=limit - 1; 1325 break; 1326 1327 case 5: /* EN/AN after R/AL + possible cont */ 1328 /* check for real AN */ 1329 if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) && 1330 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) 1331 { 1332 /* real AN */ 1333 if (pLevState->startL2EN == -1) /* if no relevant EN already found */ 1334 { 1335 /* just note the righmost digit as a strong RTL */ 1336 pLevState->lastStrongRTL=limit - 1; 1337 break; 1338 } 1339 if (pLevState->startL2EN >= 0) /* after EN, no AN */ 1340 { 1341 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1342 pLevState->startL2EN=-2; 1343 } 1344 /* note AN */ 1345 addPoint(pBiDi, start0, LRM_BEFORE); 1346 break; 1347 } 1348 /* if first EN/AN after R/AL */ 1349 if (pLevState->startL2EN == -1) { 1350 pLevState->startL2EN=start0; 1351 } 1352 break; 1353 1354 case 6: /* note location of latest R/AL */ 1355 pLevState->lastStrongRTL=limit - 1; 1356 pLevState->startON=-1; 1357 break; 1358 1359 case 7: /* L after R+ON/EN/AN */ 1360 /* include possible adjacent number on the left */ 1361 for (k=start0-1; k>=0 && !(levels[k]&1); k--); 1362 if(k>=0) { 1363 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ 1364 pInsertPoints=&(pBiDi->insertPoints); 1365 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ 1366 } 1367 pLevState->startON=start0; 1368 break; 1369 1370 case 8: /* AN after L */ 1371 /* AN numbers between L text on both sides may be trouble. */ 1372 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 1373 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ 1374 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ 1375 break; 1376 1377 case 9: /* R after L+ON/EN/AN */ 1378 /* false alert, infirm LRMs around previous AN */ 1379 pInsertPoints=&(pBiDi->insertPoints); 1380 pInsertPoints->size=pInsertPoints->confirmed; 1381 if (_prop == DirProp_S) /* add RLM before S */ 1382 { 1383 addPoint(pBiDi, start0, RLM_BEFORE); 1384 pInsertPoints->confirmed=pInsertPoints->size; 1385 } 1386 break; 1387 1388 case 10: /* L after L+ON/AN */ 1389 level=pLevState->runLevel + addLevel; 1390 for(k=pLevState->startON; k<start0; k++) { 1391 if (levels[k]<level) 1392 levels[k]=level; 1393 } 1394 pInsertPoints=&(pBiDi->insertPoints); 1395 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ 1396 pLevState->startON=start0; 1397 break; 1398 1399 case 11: /* L after L+ON+EN/AN/ON */ 1400 level=pLevState->runLevel; 1401 for(k=start0-1; k>=pLevState->startON; k--) { 1402 if(levels[k]==level+3) { 1403 while(levels[k]==level+3) { 1404 levels[k--]-=2; 1405 } 1406 while(levels[k]==level) { 1407 k--; 1408 } 1409 } 1410 if(levels[k]==level+2) { 1411 levels[k]=level; 1412 continue; 1413 } 1414 levels[k]=level+1; 1415 } 1416 break; 1417 1418 case 12: /* R after L+ON+EN/AN/ON */ 1419 level=pLevState->runLevel+1; 1420 for(k=start0-1; k>=pLevState->startON; k--) { 1421 if(levels[k]>level) { 1422 levels[k]-=2; 1423 } 1424 } 1425 break; 1426 1427 default: /* we should never get here */ 1428 U_ASSERT(FALSE); 1429 break; 1430 } 1431 } 1432 if((addLevel) || (start < start0)) { 1433 level=pLevState->runLevel + addLevel; 1434 for(k=start; k<limit; k++) { 1435 levels[k]=level; 1436 } 1437 } 1438 } 1439 1440 static DirProp 1441 lastL_R_AL(UBiDi *pBiDi) { 1442 /* return last strong char at the end of the prologue */ 1443 const UChar *text=pBiDi->prologue; 1444 int32_t length=pBiDi->proLength; 1445 int32_t i; 1446 UChar32 uchar; 1447 DirProp dirProp; 1448 for(i=length; i>0; ) { 1449 /* i is decremented by U16_PREV */ 1450 U16_PREV(text, 0, i, uchar); 1451 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1452 if(dirProp==L) { 1453 return DirProp_L; 1454 } 1455 if(dirProp==R || dirProp==AL) { 1456 return DirProp_R; 1457 } 1458 if(dirProp==B) { 1459 return DirProp_ON; 1460 } 1461 } 1462 return DirProp_ON; 1463 } 1464 1465 static DirProp 1466 firstL_R_AL_EN_AN(UBiDi *pBiDi) { 1467 /* return first strong char or digit in epilogue */ 1468 const UChar *text=pBiDi->epilogue; 1469 int32_t length=pBiDi->epiLength; 1470 int32_t i; 1471 UChar32 uchar; 1472 DirProp dirProp; 1473 for(i=0; i<length; ) { 1474 /* i is incremented by U16_NEXT */ 1475 U16_NEXT(text, i, length, uchar); 1476 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1477 if(dirProp==L) { 1478 return DirProp_L; 1479 } 1480 if(dirProp==R || dirProp==AL) { 1481 return DirProp_R; 1482 } 1483 if(dirProp==EN) { 1484 return DirProp_EN; 1485 } 1486 if(dirProp==AN) { 1487 return DirProp_AN; 1488 } 1489 } 1490 return DirProp_ON; 1491 } 1492 1493 static void 1494 resolveImplicitLevels(UBiDi *pBiDi, 1495 int32_t start, int32_t limit, 1496 DirProp sor, DirProp eor) { 1497 const DirProp *dirProps=pBiDi->dirProps; 1498 1499 LevState levState; 1500 int32_t i, start1, start2; 1501 uint8_t oldStateImp, stateImp, actionImp; 1502 uint8_t gprop, resProp, cell; 1503 UBool inverseRTL; 1504 DirProp nextStrongProp=R; 1505 int32_t nextStrongPos=-1; 1506 1507 levState.startON = -1; /* silence gcc flow analysis */ 1508 1509 /* check for RTL inverse BiDi mode */ 1510 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to 1511 * loop on the text characters from end to start. 1512 * This would need a different properties state table (at least different 1513 * actions) and different levels state tables (maybe very similar to the 1514 * LTR corresponding ones. 1515 */ 1516 inverseRTL=(UBool) 1517 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && 1518 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 1519 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); 1520 /* initialize for levels state table */ 1521 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1522 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1523 levState.state=0; 1524 levState.runLevel=pBiDi->levels[start]; 1525 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; 1526 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; 1527 if(start==0 && pBiDi->proLength>0) { 1528 DirProp lastStrong=lastL_R_AL(pBiDi); 1529 if(lastStrong!=DirProp_ON) { 1530 sor=lastStrong; 1531 } 1532 } 1533 processPropertySeq(pBiDi, &levState, sor, start, start); 1534 /* initialize for property state table */ 1535 if(NO_CONTEXT_RTL(dirProps[start])==NSM) { 1536 stateImp = 1 + sor; 1537 } else { 1538 stateImp=0; 1539 } 1540 start1=start; 1541 start2=start; 1542 1543 for(i=start; i<=limit; i++) { 1544 if(i>=limit) { 1545 gprop=eor; 1546 } else { 1547 DirProp prop, prop1; 1548 prop=NO_CONTEXT_RTL(dirProps[i]); 1549 if(inverseRTL) { 1550 if(prop==AL) { 1551 /* AL before EN does not make it AN */ 1552 prop=R; 1553 } else if(prop==EN) { 1554 if(nextStrongPos<=i) { 1555 /* look for next strong char (L/R/AL) */ 1556 int32_t j; 1557 nextStrongProp=R; /* set default */ 1558 nextStrongPos=limit; 1559 for(j=i+1; j<limit; j++) { 1560 prop1=NO_CONTEXT_RTL(dirProps[j]); 1561 if(prop1==L || prop1==R || prop1==AL) { 1562 nextStrongProp=prop1; 1563 nextStrongPos=j; 1564 break; 1565 } 1566 } 1567 } 1568 if(nextStrongProp==AL) { 1569 prop=AN; 1570 } 1571 } 1572 } 1573 gprop=groupProp[prop]; 1574 } 1575 oldStateImp=stateImp; 1576 cell=impTabProps[oldStateImp][gprop]; 1577 stateImp=GET_STATEPROPS(cell); /* isolate the new state */ 1578 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ 1579 if((i==limit) && (actionImp==0)) { 1580 /* there is an unprocessed sequence if its property == eor */ 1581 actionImp=1; /* process the last sequence */ 1582 } 1583 if(actionImp) { 1584 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; 1585 switch(actionImp) { 1586 case 1: /* process current seq1, init new seq1 */ 1587 processPropertySeq(pBiDi, &levState, resProp, start1, i); 1588 start1=i; 1589 break; 1590 case 2: /* init new seq2 */ 1591 start2=i; 1592 break; 1593 case 3: /* process seq1, process seq2, init new seq1 */ 1594 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1595 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); 1596 start1=i; 1597 break; 1598 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 1599 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1600 start1=start2; 1601 start2=i; 1602 break; 1603 default: /* we should never get here */ 1604 U_ASSERT(FALSE); 1605 break; 1606 } 1607 } 1608 } 1609 /* flush possible pending sequence, e.g. ON */ 1610 if(limit==pBiDi->length && pBiDi->epiLength>0) { 1611 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); 1612 if(firstStrong!=DirProp_ON) { 1613 eor=firstStrong; 1614 } 1615 } 1616 processPropertySeq(pBiDi, &levState, eor, limit, limit); 1617 } 1618 1619 /* perform (L1) and (X9) ---------------------------------------------------- */ 1620 1621 /* 1622 * Reset the embedding levels for some non-graphic characters (L1). 1623 * This function also sets appropriate levels for BN, and 1624 * explicit embedding types that are supposed to have been removed 1625 * from the paragraph in (X9). 1626 */ 1627 static void 1628 adjustWSLevels(UBiDi *pBiDi) { 1629 const DirProp *dirProps=pBiDi->dirProps; 1630 UBiDiLevel *levels=pBiDi->levels; 1631 int32_t i; 1632 1633 if(pBiDi->flags&MASK_WS) { 1634 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; 1635 Flags flag; 1636 1637 i=pBiDi->trailingWSStart; 1638 while(i>0) { 1639 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 1640 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { 1641 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1642 levels[i]=0; 1643 } else { 1644 levels[i]=GET_PARALEVEL(pBiDi, i); 1645 } 1646 } 1647 1648 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 1649 /* here, i+1 is guaranteed to be <length */ 1650 while(i>0) { 1651 flag=DIRPROP_FLAG_NC(dirProps[--i]); 1652 if(flag&MASK_BN_EXPLICIT) { 1653 levels[i]=levels[i+1]; 1654 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1655 levels[i]=0; 1656 break; 1657 } else if(flag&MASK_B_S) { 1658 levels[i]=GET_PARALEVEL(pBiDi, i); 1659 break; 1660 } 1661 } 1662 } 1663 } 1664 } 1665 1666 U_DRAFT void U_EXPORT2 1667 ubidi_setContext(UBiDi *pBiDi, 1668 const UChar *prologue, int32_t proLength, 1669 const UChar *epilogue, int32_t epiLength, 1670 UErrorCode *pErrorCode) { 1671 /* check the argument values */ 1672 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1673 if(pBiDi==NULL || proLength<-1 || epiLength<-1 || 1674 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { 1675 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1676 return; 1677 } 1678 1679 if(proLength==-1) { 1680 pBiDi->proLength=u_strlen(prologue); 1681 } else { 1682 pBiDi->proLength=proLength; 1683 } 1684 if(epiLength==-1) { 1685 pBiDi->epiLength=u_strlen(epilogue); 1686 } else { 1687 pBiDi->epiLength=epiLength; 1688 } 1689 pBiDi->prologue=prologue; 1690 pBiDi->epilogue=epilogue; 1691 } 1692 1693 static void 1694 setParaSuccess(UBiDi *pBiDi) { 1695 pBiDi->proLength=0; /* forget the last context */ 1696 pBiDi->epiLength=0; 1697 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 1698 } 1699 1700 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) 1701 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) 1702 static void 1703 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, 1704 UBiDiLevel paraLevel, UErrorCode *pErrorCode) { 1705 void *runsOnlyMemory; 1706 int32_t *visualMap; 1707 UChar *visualText; 1708 int32_t saveLength, saveTrailingWSStart; 1709 const UBiDiLevel *levels; 1710 UBiDiLevel *saveLevels; 1711 UBiDiDirection saveDirection; 1712 UBool saveMayAllocateText; 1713 Run *runs; 1714 int32_t visualLength, i, j, visualStart, logicalStart, 1715 runCount, runLength, addedRuns, insertRemove, 1716 start, limit, step, indexOddBit, logicalPos, 1717 index0, index1; 1718 uint32_t saveOptions; 1719 1720 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; 1721 if(length==0) { 1722 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1723 goto cleanup3; 1724 } 1725 /* obtain memory for mapping table and visual text */ 1726 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); 1727 if(runsOnlyMemory==NULL) { 1728 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1729 goto cleanup3; 1730 } 1731 visualMap=runsOnlyMemory; 1732 visualText=(UChar *)&visualMap[length]; 1733 saveLevels=(UBiDiLevel *)&visualText[length]; 1734 saveOptions=pBiDi->reorderingOptions; 1735 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { 1736 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 1737 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; 1738 } 1739 paraLevel&=1; /* accept only 0 or 1 */ 1740 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1741 if(U_FAILURE(*pErrorCode)) { 1742 goto cleanup3; 1743 } 1744 /* we cannot access directly pBiDi->levels since it is not yet set if 1745 * direction is not MIXED 1746 */ 1747 levels=ubidi_getLevels(pBiDi, pErrorCode); 1748 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); 1749 saveTrailingWSStart=pBiDi->trailingWSStart; 1750 saveLength=pBiDi->length; 1751 saveDirection=pBiDi->direction; 1752 1753 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use 1754 * the visual map and the dirProps array to drive the second call 1755 * to ubidi_setPara (but must make provision for possible removal of 1756 * BiDi controls. Alternatively, only use the dirProps array via 1757 * customized classifier callback. 1758 */ 1759 visualLength=ubidi_writeReordered(pBiDi, visualText, length, 1760 UBIDI_DO_MIRRORING, pErrorCode); 1761 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); 1762 if(U_FAILURE(*pErrorCode)) { 1763 goto cleanup2; 1764 } 1765 pBiDi->reorderingOptions=saveOptions; 1766 1767 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; 1768 paraLevel^=1; 1769 /* Because what we did with reorderingOptions, visualText may be shorter 1770 * than the original text. But we don't want the levels memory to be 1771 * reallocated shorter than the original length, since we need to restore 1772 * the levels as after the first call to ubidi_setpara() before returning. 1773 * We will force mayAllocateText to FALSE before the second call to 1774 * ubidi_setpara(), and will restore it afterwards. 1775 */ 1776 saveMayAllocateText=pBiDi->mayAllocateText; 1777 pBiDi->mayAllocateText=FALSE; 1778 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); 1779 pBiDi->mayAllocateText=saveMayAllocateText; 1780 ubidi_getRuns(pBiDi, pErrorCode); 1781 if(U_FAILURE(*pErrorCode)) { 1782 goto cleanup1; 1783 } 1784 /* check if some runs must be split, count how many splits */ 1785 addedRuns=0; 1786 runCount=pBiDi->runCount; 1787 runs=pBiDi->runs; 1788 visualStart=0; 1789 for(i=0; i<runCount; i++, visualStart+=runLength) { 1790 runLength=runs[i].visualLimit-visualStart; 1791 if(runLength<2) { 1792 continue; 1793 } 1794 logicalStart=GET_INDEX(runs[i].logicalStart); 1795 for(j=logicalStart+1; j<logicalStart+runLength; j++) { 1796 index0=visualMap[j]; 1797 index1=visualMap[j-1]; 1798 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1799 addedRuns++; 1800 } 1801 } 1802 } 1803 if(addedRuns) { 1804 if(getRunsMemory(pBiDi, runCount+addedRuns)) { 1805 if(runCount==1) { 1806 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ 1807 pBiDi->runsMemory[0]=runs[0]; 1808 } 1809 runs=pBiDi->runs=pBiDi->runsMemory; 1810 pBiDi->runCount+=addedRuns; 1811 } else { 1812 goto cleanup1; 1813 } 1814 } 1815 /* split runs which are not consecutive in source text */ 1816 for(i=runCount-1; i>=0; i--) { 1817 runLength= i==0 ? runs[0].visualLimit : 1818 runs[i].visualLimit-runs[i-1].visualLimit; 1819 logicalStart=runs[i].logicalStart; 1820 indexOddBit=GET_ODD_BIT(logicalStart); 1821 logicalStart=GET_INDEX(logicalStart); 1822 if(runLength<2) { 1823 if(addedRuns) { 1824 runs[i+addedRuns]=runs[i]; 1825 } 1826 logicalPos=visualMap[logicalStart]; 1827 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1828 saveLevels[logicalPos]^indexOddBit); 1829 continue; 1830 } 1831 if(indexOddBit) { 1832 start=logicalStart; 1833 limit=logicalStart+runLength-1; 1834 step=1; 1835 } else { 1836 start=logicalStart+runLength-1; 1837 limit=logicalStart; 1838 step=-1; 1839 } 1840 for(j=start; j!=limit; j+=step) { 1841 index0=visualMap[j]; 1842 index1=visualMap[j+step]; 1843 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1844 logicalPos=BIDI_MIN(visualMap[start], index0); 1845 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1846 saveLevels[logicalPos]^indexOddBit); 1847 runs[i+addedRuns].visualLimit=runs[i].visualLimit; 1848 runs[i].visualLimit-=BIDI_ABS(j-start)+1; 1849 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); 1850 runs[i+addedRuns].insertRemove=insertRemove; 1851 runs[i].insertRemove&=~insertRemove; 1852 start=j+step; 1853 addedRuns--; 1854 } 1855 } 1856 if(addedRuns) { 1857 runs[i+addedRuns]=runs[i]; 1858 } 1859 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); 1860 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1861 saveLevels[logicalPos]^indexOddBit); 1862 } 1863 1864 cleanup1: 1865 /* restore initial paraLevel */ 1866 pBiDi->paraLevel^=1; 1867 cleanup2: 1868 /* restore real text */ 1869 pBiDi->text=text; 1870 pBiDi->length=saveLength; 1871 pBiDi->originalLength=length; 1872 pBiDi->direction=saveDirection; 1873 /* the saved levels should never excess levelsSize, but we check anyway */ 1874 if(saveLength>pBiDi->levelsSize) { 1875 saveLength=pBiDi->levelsSize; 1876 } 1877 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); 1878 pBiDi->trailingWSStart=saveTrailingWSStart; 1879 /* free memory for mapping table and visual text */ 1880 uprv_free(runsOnlyMemory); 1881 if(pBiDi->runCount>1) { 1882 pBiDi->direction=UBIDI_MIXED; 1883 } 1884 cleanup3: 1885 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; 1886 } 1887 1888 /* ubidi_setPara ------------------------------------------------------------ */ 1889 1890 U_CAPI void U_EXPORT2 1891 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1892 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1893 UErrorCode *pErrorCode) { 1894 UBiDiDirection direction; 1895 1896 /* check the argument values */ 1897 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1898 if(pBiDi==NULL || text==NULL || length<-1 || 1899 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { 1900 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1901 return; 1902 } 1903 1904 if(length==-1) { 1905 length=u_strlen(text); 1906 } 1907 1908 /* special treatment for RUNS_ONLY mode */ 1909 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { 1910 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); 1911 return; 1912 } 1913 1914 /* initialize the UBiDi structure */ 1915 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ 1916 pBiDi->text=text; 1917 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; 1918 pBiDi->paraLevel=paraLevel; 1919 pBiDi->direction=UBIDI_LTR; 1920 pBiDi->paraCount=1; 1921 1922 pBiDi->dirProps=NULL; 1923 pBiDi->levels=NULL; 1924 pBiDi->runs=NULL; 1925 pBiDi->insertPoints.size=0; /* clean up from last call */ 1926 pBiDi->insertPoints.confirmed=0; /* clean up from last call */ 1927 1928 /* 1929 * Save the original paraLevel if contextual; otherwise, set to 0. 1930 */ 1931 if(IS_DEFAULT_LEVEL(paraLevel)) { 1932 pBiDi->defaultParaLevel=paraLevel; 1933 } else { 1934 pBiDi->defaultParaLevel=0; 1935 } 1936 1937 if(length==0) { 1938 /* 1939 * For an empty paragraph, create a UBiDi object with the paraLevel and 1940 * the flags and the direction set but without allocating zero-length arrays. 1941 * There is nothing more to do. 1942 */ 1943 if(IS_DEFAULT_LEVEL(paraLevel)) { 1944 pBiDi->paraLevel&=1; 1945 pBiDi->defaultParaLevel=0; 1946 } 1947 if(paraLevel&1) { 1948 pBiDi->flags=DIRPROP_FLAG(R); 1949 pBiDi->direction=UBIDI_RTL; 1950 } else { 1951 pBiDi->flags=DIRPROP_FLAG(L); 1952 pBiDi->direction=UBIDI_LTR; 1953 } 1954 1955 pBiDi->runCount=0; 1956 pBiDi->paraCount=0; 1957 setParaSuccess(pBiDi); /* mark successful setPara */ 1958 return; 1959 } 1960 1961 pBiDi->runCount=-1; 1962 1963 /* 1964 * Get the directional properties, 1965 * the flags bit-set, and 1966 * determine the paragraph level if necessary. 1967 */ 1968 if(getDirPropsMemory(pBiDi, length)) { 1969 pBiDi->dirProps=pBiDi->dirPropsMemory; 1970 getDirProps(pBiDi); 1971 } else { 1972 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1973 return; 1974 } 1975 /* the processed length may have changed if UBIDI_OPTION_STREAMING */ 1976 length= pBiDi->length; 1977 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ 1978 /* allocate paras memory */ 1979 if(pBiDi->paraCount>1) { 1980 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { 1981 pBiDi->paras=pBiDi->parasMemory; 1982 pBiDi->paras[pBiDi->paraCount-1]=length; 1983 } else { 1984 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1985 return; 1986 } 1987 } else { 1988 /* initialize paras for single paragraph */ 1989 pBiDi->paras=pBiDi->simpleParas; 1990 pBiDi->simpleParas[0]=length; 1991 } 1992 1993 /* are explicit levels specified? */ 1994 if(embeddingLevels==NULL) { 1995 /* no: determine explicit levels according to the (Xn) rules */\ 1996 if(getLevelsMemory(pBiDi, length)) { 1997 pBiDi->levels=pBiDi->levelsMemory; 1998 direction=resolveExplicitLevels(pBiDi); 1999 } else { 2000 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2001 return; 2002 } 2003 } else { 2004 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ 2005 pBiDi->levels=embeddingLevels; 2006 direction=checkExplicitLevels(pBiDi, pErrorCode); 2007 if(U_FAILURE(*pErrorCode)) { 2008 return; 2009 } 2010 } 2011 2012 /* 2013 * The steps after (X9) in the UBiDi algorithm are performed only if 2014 * the paragraph text has mixed directionality! 2015 */ 2016 pBiDi->direction=direction; 2017 switch(direction) { 2018 case UBIDI_LTR: 2019 /* make sure paraLevel is even */ 2020 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); 2021 2022 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 2023 pBiDi->trailingWSStart=0; 2024 break; 2025 case UBIDI_RTL: 2026 /* make sure paraLevel is odd */ 2027 pBiDi->paraLevel|=1; 2028 2029 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 2030 pBiDi->trailingWSStart=0; 2031 break; 2032 default: 2033 /* 2034 * Choose the right implicit state table 2035 */ 2036 switch(pBiDi->reorderingMode) { 2037 case UBIDI_REORDER_DEFAULT: 2038 pBiDi->pImpTabPair=&impTab_DEFAULT; 2039 break; 2040 case UBIDI_REORDER_NUMBERS_SPECIAL: 2041 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; 2042 break; 2043 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: 2044 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; 2045 break; 2046 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: 2047 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; 2048 break; 2049 case UBIDI_REORDER_INVERSE_LIKE_DIRECT: 2050 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 2051 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; 2052 } else { 2053 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; 2054 } 2055 break; 2056 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: 2057 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 2058 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; 2059 } else { 2060 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; 2061 } 2062 break; 2063 default: 2064 /* we should never get here */ 2065 U_ASSERT(FALSE); 2066 break; 2067 } 2068 /* 2069 * If there are no external levels specified and there 2070 * are no significant explicit level codes in the text, 2071 * then we can treat the entire paragraph as one run. 2072 * Otherwise, we need to perform the following rules on runs of 2073 * the text with the same embedding levels. (X10) 2074 * "Significant" explicit level codes are ones that actually 2075 * affect non-BN characters. 2076 * Examples for "insignificant" ones are empty embeddings 2077 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 2078 */ 2079 if(embeddingLevels==NULL && pBiDi->paraCount<=1 && 2080 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { 2081 resolveImplicitLevels(pBiDi, 0, length, 2082 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), 2083 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); 2084 } else { 2085 /* sor, eor: start and end types of same-level-run */ 2086 UBiDiLevel *levels=pBiDi->levels; 2087 int32_t start, limit=0; 2088 UBiDiLevel level, nextLevel; 2089 DirProp sor, eor; 2090 2091 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 2092 level=GET_PARALEVEL(pBiDi, 0); 2093 nextLevel=levels[0]; 2094 if(level<nextLevel) { 2095 eor=GET_LR_FROM_LEVEL(nextLevel); 2096 } else { 2097 eor=GET_LR_FROM_LEVEL(level); 2098 } 2099 2100 do { 2101 /* determine start and limit of the run (end points just behind the run) */ 2102 2103 /* the values for this run's start are the same as for the previous run's end */ 2104 start=limit; 2105 level=nextLevel; 2106 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { 2107 /* except if this is a new paragraph, then set sor = para level */ 2108 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); 2109 } else { 2110 sor=eor; 2111 } 2112 2113 /* search for the limit of this run */ 2114 while(++limit<length && levels[limit]==level) {} 2115 2116 /* get the correct level of the next run */ 2117 if(limit<length) { 2118 nextLevel=levels[limit]; 2119 } else { 2120 nextLevel=GET_PARALEVEL(pBiDi, length-1); 2121 } 2122 2123 /* determine eor from max(level, nextLevel); sor is last run's eor */ 2124 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { 2125 eor=GET_LR_FROM_LEVEL(nextLevel); 2126 } else { 2127 eor=GET_LR_FROM_LEVEL(level); 2128 } 2129 2130 /* if the run consists of overridden directional types, then there 2131 are no implicit types to be resolved */ 2132 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 2133 resolveImplicitLevels(pBiDi, start, limit, sor, eor); 2134 } else { 2135 /* remove the UBIDI_LEVEL_OVERRIDE flags */ 2136 do { 2137 levels[start++]&=~UBIDI_LEVEL_OVERRIDE; 2138 } while(start<limit); 2139 } 2140 } while(limit<length); 2141 } 2142 /* check if we got any memory shortage while adding insert points */ 2143 if (U_FAILURE(pBiDi->insertPoints.errorCode)) 2144 { 2145 *pErrorCode=pBiDi->insertPoints.errorCode; 2146 return; 2147 } 2148 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 2149 adjustWSLevels(pBiDi); 2150 break; 2151 } 2152 /* add RLM for inverse Bidi with contextual orientation resolving 2153 * to RTL which would not round-trip otherwise 2154 */ 2155 if((pBiDi->defaultParaLevel>0) && 2156 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && 2157 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || 2158 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { 2159 int32_t i, j, start, last; 2160 DirProp dirProp; 2161 for(i=0; i<pBiDi->paraCount; i++) { 2162 last=pBiDi->paras[i]-1; 2163 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { 2164 continue; /* LTR paragraph */ 2165 } 2166 start= i==0 ? 0 : pBiDi->paras[i - 1]; 2167 for(j=last; j>=start; j--) { 2168 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); 2169 if(dirProp==L) { 2170 if(j<last) { 2171 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) { 2172 last--; 2173 } 2174 } 2175 addPoint(pBiDi, last, RLM_BEFORE); 2176 break; 2177 } 2178 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { 2179 break; 2180 } 2181 } 2182 } 2183 } 2184 2185 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 2186 pBiDi->resultLength -= pBiDi->controlCount; 2187 } else { 2188 pBiDi->resultLength += pBiDi->insertPoints.size; 2189 } 2190 setParaSuccess(pBiDi); /* mark successful setPara */ 2191 } 2192 2193 U_CAPI void U_EXPORT2 2194 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { 2195 if(pBiDi!=NULL) { 2196 pBiDi->orderParagraphsLTR=orderParagraphsLTR; 2197 } 2198 } 2199 2200 U_CAPI UBool U_EXPORT2 2201 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { 2202 if(pBiDi!=NULL) { 2203 return pBiDi->orderParagraphsLTR; 2204 } else { 2205 return FALSE; 2206 } 2207 } 2208 2209 U_CAPI UBiDiDirection U_EXPORT2 2210 ubidi_getDirection(const UBiDi *pBiDi) { 2211 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2212 return pBiDi->direction; 2213 } else { 2214 return UBIDI_LTR; 2215 } 2216 } 2217 2218 U_CAPI const UChar * U_EXPORT2 2219 ubidi_getText(const UBiDi *pBiDi) { 2220 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2221 return pBiDi->text; 2222 } else { 2223 return NULL; 2224 } 2225 } 2226 2227 U_CAPI int32_t U_EXPORT2 2228 ubidi_getLength(const UBiDi *pBiDi) { 2229 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2230 return pBiDi->originalLength; 2231 } else { 2232 return 0; 2233 } 2234 } 2235 2236 U_CAPI int32_t U_EXPORT2 2237 ubidi_getProcessedLength(const UBiDi *pBiDi) { 2238 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2239 return pBiDi->length; 2240 } else { 2241 return 0; 2242 } 2243 } 2244 2245 U_CAPI int32_t U_EXPORT2 2246 ubidi_getResultLength(const UBiDi *pBiDi) { 2247 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2248 return pBiDi->resultLength; 2249 } else { 2250 return 0; 2251 } 2252 } 2253 2254 /* paragraphs API functions ------------------------------------------------- */ 2255 2256 U_CAPI UBiDiLevel U_EXPORT2 2257 ubidi_getParaLevel(const UBiDi *pBiDi) { 2258 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2259 return pBiDi->paraLevel; 2260 } else { 2261 return 0; 2262 } 2263 } 2264 2265 U_CAPI int32_t U_EXPORT2 2266 ubidi_countParagraphs(UBiDi *pBiDi) { 2267 if(!IS_VALID_PARA_OR_LINE(pBiDi)) { 2268 return 0; 2269 } else { 2270 return pBiDi->paraCount; 2271 } 2272 } 2273 2274 U_CAPI void U_EXPORT2 2275 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 2276 int32_t *pParaStart, int32_t *pParaLimit, 2277 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2278 int32_t paraStart; 2279 2280 /* check the argument values */ 2281 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2282 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); 2283 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); 2284 2285 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2286 if(paraIndex) { 2287 paraStart=pBiDi->paras[paraIndex-1]; 2288 } else { 2289 paraStart=0; 2290 } 2291 if(pParaStart!=NULL) { 2292 *pParaStart=paraStart; 2293 } 2294 if(pParaLimit!=NULL) { 2295 *pParaLimit=pBiDi->paras[paraIndex]; 2296 } 2297 if(pParaLevel!=NULL) { 2298 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); 2299 } 2300 } 2301 2302 U_CAPI int32_t U_EXPORT2 2303 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, 2304 int32_t *pParaStart, int32_t *pParaLimit, 2305 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2306 uint32_t paraIndex; 2307 2308 /* check the argument values */ 2309 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ 2310 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); 2311 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); 2312 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2313 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); 2314 2315 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); 2316 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); 2317 return paraIndex; 2318 } 2319 2320 U_CAPI void U_EXPORT2 2321 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 2322 const void *newContext, UBiDiClassCallback **oldFn, 2323 const void **oldContext, UErrorCode *pErrorCode) 2324 { 2325 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2326 if(pBiDi==NULL) { 2327 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2328 return; 2329 } 2330 if( oldFn ) 2331 { 2332 *oldFn = pBiDi->fnClassCallback; 2333 } 2334 if( oldContext ) 2335 { 2336 *oldContext = pBiDi->coClassCallback; 2337 } 2338 pBiDi->fnClassCallback = newFn; 2339 pBiDi->coClassCallback = newContext; 2340 } 2341 2342 U_CAPI void U_EXPORT2 2343 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) 2344 { 2345 if(pBiDi==NULL) { 2346 return; 2347 } 2348 if( fn ) 2349 { 2350 *fn = pBiDi->fnClassCallback; 2351 } 2352 if( context ) 2353 { 2354 *context = pBiDi->coClassCallback; 2355 } 2356 } 2357 2358 U_CAPI UCharDirection U_EXPORT2 2359 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) 2360 { 2361 UCharDirection dir; 2362 2363 if( pBiDi->fnClassCallback == NULL || 2364 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) 2365 { 2366 return ubidi_getClass(pBiDi->bdp, c); 2367 } else { 2368 return dir; 2369 } 2370 } 2371 2372