1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidi.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999jul27 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #include "cmemory.h" 18 #include "unicode/utypes.h" 19 #include "unicode/ustring.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ubidi.h" 22 #include "ubidi_props.h" 23 #include "ubidiimp.h" 24 #include "uassert.h" 25 26 /* 27 * General implementation notes: 28 * 29 * Throughout the implementation, there are comments like (W2) that refer to 30 * rules of the BiDi algorithm in its version 5, in this example to the second 31 * rule of the resolution of weak types. 32 * 33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) 34 * character according to UTF-16, the second UChar gets the directional property of 35 * the entire character assigned, while the first one gets a BN, a boundary 36 * neutral, type, which is ignored by most of the algorithm according to 37 * rule (X9) and the implementation suggestions of the BiDi algorithm. 38 * 39 * Later, adjustWSLevels() will set the level for each BN to that of the 40 * following character (UChar), which results in surrogate pairs getting the 41 * same level on each of their surrogates. 42 * 43 * In a UTF-8 implementation, the same thing could be done: the last byte of 44 * a multi-byte sequence would get the "real" property, while all previous 45 * bytes of that sequence would get BN. 46 * 47 * It is not possible to assign all those parts of a character the same real 48 * property because this would fail in the resolution of weak types with rules 49 * that look at immediately surrounding types. 50 * 51 * As a related topic, this implementation does not remove Boundary Neutral 52 * types from the input, but ignores them wherever this is relevant. 53 * For example, the loop for the resolution of the weak types reads 54 * types until it finds a non-BN. 55 * Also, explicit embedding codes are neither changed into BN nor removed. 56 * They are only treated the same way real BNs are. 57 * As stated before, adjustWSLevels() takes care of them at the end. 58 * For the purpose of conformance, the levels of all these codes 59 * do not matter. 60 * 61 * Note that this implementation never modifies the dirProps 62 * after the initial setup. 63 * 64 * 65 * In this implementation, the resolution of weak types (Wn), 66 * neutrals (Nn), and the assignment of the resolved level (In) 67 * are all done in one single loop, in resolveImplicitLevels(). 68 * Changes of dirProp values are done on the fly, without writing 69 * them back to the dirProps array. 70 * 71 * 72 * This implementation contains code that allows to bypass steps of the 73 * algorithm that are not needed on the specific paragraph 74 * in order to speed up the most common cases considerably, 75 * like text that is entirely LTR, or RTL text without numbers. 76 * 77 * Most of this is done by setting a bit for each directional property 78 * in a flags variable and later checking for whether there are 79 * any LTR characters or any RTL characters, or both, whether 80 * there are any explicit embedding codes, etc. 81 * 82 * If the (Xn) steps are performed, then the flags are re-evaluated, 83 * because they will then not contain the embedding codes any more 84 * and will be adjusted for override codes, so that subsequently 85 * more bypassing may be possible than what the initial flags suggested. 86 * 87 * If the text is not mixed-directional, then the 88 * algorithm steps for the weak type resolution are not performed, 89 * and all levels are set to the paragraph level. 90 * 91 * If there are no explicit embedding codes, then the (Xn) steps 92 * are not performed. 93 * 94 * If embedding levels are supplied as a parameter, then all 95 * explicit embedding codes are ignored, and the (Xn) steps 96 * are not performed. 97 * 98 * White Space types could get the level of the run they belong to, 99 * and are checked with a test of (flags&MASK_EMBEDDING) to 100 * consider if the paragraph direction should be considered in 101 * the flags variable. 102 * 103 * If there are no White Space types in the paragraph, then 104 * (L1) is not necessary in adjustWSLevels(). 105 */ 106 107 /* to avoid some conditional statements, use tiny constant arrays */ 108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; 109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; 110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; 111 112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] 113 #define DIRPROP_FLAG_E(level) flagE[(level)&1] 114 #define DIRPROP_FLAG_O(level) flagO[(level)&1] 115 116 /* UBiDi object management -------------------------------------------------- */ 117 118 U_CAPI UBiDi * U_EXPORT2 119 ubidi_open(void) 120 { 121 UErrorCode errorCode=U_ZERO_ERROR; 122 return ubidi_openSized(0, 0, &errorCode); 123 } 124 125 U_CAPI UBiDi * U_EXPORT2 126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { 127 UBiDi *pBiDi; 128 129 /* check the argument values */ 130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 131 return NULL; 132 } else if(maxLength<0 || maxRunCount<0) { 133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 134 return NULL; /* invalid arguments */ 135 } 136 137 /* allocate memory for the object */ 138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); 139 if(pBiDi==NULL) { 140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 141 return NULL; 142 } 143 144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ 145 uprv_memset(pBiDi, 0, sizeof(UBiDi)); 146 147 /* get BiDi properties */ 148 pBiDi->bdp=ubidi_getSingleton(); 149 150 /* allocate memory for arrays as requested */ 151 if(maxLength>0) { 152 if( !getInitialDirPropsMemory(pBiDi, maxLength) || 153 !getInitialLevelsMemory(pBiDi, maxLength) 154 ) { 155 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 156 } 157 } else { 158 pBiDi->mayAllocateText=TRUE; 159 } 160 161 if(maxRunCount>0) { 162 if(maxRunCount==1) { 163 /* use simpleRuns[] */ 164 pBiDi->runsSize=sizeof(Run); 165 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { 166 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 167 } 168 } else { 169 pBiDi->mayAllocateRuns=TRUE; 170 } 171 172 if(U_SUCCESS(*pErrorCode)) { 173 return pBiDi; 174 } else { 175 ubidi_close(pBiDi); 176 return NULL; 177 } 178 } 179 180 /* 181 * We are allowed to allocate memory if memory==NULL or 182 * mayAllocate==TRUE for each array that we need. 183 * We also try to grow memory as needed if we 184 * allocate it. 185 * 186 * Assume sizeNeeded>0. 187 * If *pMemory!=NULL, then assume *pSize>0. 188 * 189 * ### this realloc() may unnecessarily copy the old data, 190 * which we know we don't need any more; 191 * is this the best way to do this?? 192 */ 193 U_CFUNC UBool 194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { 195 void **pMemory = (void **)bidiMem; 196 /* check for existing memory */ 197 if(*pMemory==NULL) { 198 /* we need to allocate memory */ 199 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { 200 *pSize=sizeNeeded; 201 return TRUE; 202 } else { 203 return FALSE; 204 } 205 } else { 206 if(sizeNeeded<=*pSize) { 207 /* there is already enough memory */ 208 return TRUE; 209 } 210 else if(!mayAllocate) { 211 /* not enough memory, and we must not allocate */ 212 return FALSE; 213 } else { 214 /* we try to grow */ 215 void *memory; 216 /* in most cases, we do not need the copy-old-data part of 217 * realloc, but it is needed when adding runs using getRunsMemory() 218 * in setParaRunsOnly() 219 */ 220 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { 221 *pMemory=memory; 222 *pSize=sizeNeeded; 223 return TRUE; 224 } else { 225 /* we failed to grow */ 226 return FALSE; 227 } 228 } 229 } 230 } 231 232 U_CAPI void U_EXPORT2 233 ubidi_close(UBiDi *pBiDi) { 234 if(pBiDi!=NULL) { 235 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ 236 if(pBiDi->dirPropsMemory!=NULL) { 237 uprv_free(pBiDi->dirPropsMemory); 238 } 239 if(pBiDi->levelsMemory!=NULL) { 240 uprv_free(pBiDi->levelsMemory); 241 } 242 if(pBiDi->runsMemory!=NULL) { 243 uprv_free(pBiDi->runsMemory); 244 } 245 if(pBiDi->parasMemory!=NULL) { 246 uprv_free(pBiDi->parasMemory); 247 } 248 if(pBiDi->insertPoints.points!=NULL) { 249 uprv_free(pBiDi->insertPoints.points); 250 } 251 252 uprv_free(pBiDi); 253 } 254 } 255 256 /* set to approximate "inverse BiDi" ---------------------------------------- */ 257 258 U_CAPI void U_EXPORT2 259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { 260 if(pBiDi!=NULL) { 261 pBiDi->isInverse=isInverse; 262 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L 263 : UBIDI_REORDER_DEFAULT; 264 } 265 } 266 267 U_CAPI UBool U_EXPORT2 268 ubidi_isInverse(UBiDi *pBiDi) { 269 if(pBiDi!=NULL) { 270 return pBiDi->isInverse; 271 } else { 272 return FALSE; 273 } 274 } 275 276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 277 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre 278 * concept of RUNS_ONLY which is a double operation. 279 * It could be advantageous to divide this into 3 concepts: 280 * a) Operation: direct / inverse / RUNS_ONLY 281 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R 282 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 283 * This would allow combinations not possible today like RUNS_ONLY with 284 * NUMBERS_SPECIAL. 285 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 286 * REMOVE_CONTROLS for the inverse step. 287 * Not all combinations would be supported, and probably not all do make sense. 288 * This would need to document which ones are supported and what are the 289 * fallbacks for unsupported combinations. 290 */ 291 U_CAPI void U_EXPORT2 292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { 293 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) 294 && (reorderingMode < UBIDI_REORDER_COUNT)) { 295 pBiDi->reorderingMode = reorderingMode; 296 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); 297 } 298 } 299 300 U_CAPI UBiDiReorderingMode U_EXPORT2 301 ubidi_getReorderingMode(UBiDi *pBiDi) { 302 if (pBiDi!=NULL) { 303 return pBiDi->reorderingMode; 304 } else { 305 return UBIDI_REORDER_DEFAULT; 306 } 307 } 308 309 U_CAPI void U_EXPORT2 310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { 311 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 312 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 313 } 314 if (pBiDi!=NULL) { 315 pBiDi->reorderingOptions=reorderingOptions; 316 } 317 } 318 319 U_CAPI uint32_t U_EXPORT2 320 ubidi_getReorderingOptions(UBiDi *pBiDi) { 321 if (pBiDi!=NULL) { 322 return pBiDi->reorderingOptions; 323 } else { 324 return 0; 325 } 326 } 327 328 U_CAPI UBiDiDirection U_EXPORT2 329 ubidi_getBaseDirection(const UChar *text, 330 int32_t length){ 331 332 int32_t i; 333 UChar32 uchar; 334 UCharDirection dir; 335 336 if( text==NULL || length<-1 ){ 337 return UBIDI_NEUTRAL; 338 } 339 340 if(length==-1) { 341 length=u_strlen(text); 342 } 343 344 for( i = 0 ; i < length; ) { 345 /* i is incremented by U16_NEXT */ 346 U16_NEXT(text, i, length, uchar); 347 dir = u_charDirection(uchar); 348 if( dir == U_LEFT_TO_RIGHT ) 349 return UBIDI_LTR; 350 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) 351 return UBIDI_RTL; 352 } 353 return UBIDI_NEUTRAL; 354 } 355 356 /* perform (P2)..(P3) ------------------------------------------------------- */ 357 358 /* 359 * Get the directional properties for the text, 360 * calculate the flags bit-set, and 361 * determine the paragraph level if necessary. 362 */ 363 static void 364 getDirProps(UBiDi *pBiDi) { 365 const UChar *text=pBiDi->text; 366 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ 367 368 int32_t i=0, i1, length=pBiDi->originalLength; 369 Flags flags=0; /* collect all directionalities in the text */ 370 UChar32 uchar; 371 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */ 372 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); 373 /* for inverse BiDi, the default para level is set to RTL if there is a 374 strong R or AL character at either end of the text */ 375 UBool isDefaultLevelInverse=isDefaultLevel && (UBool) 376 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 377 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); 378 int32_t lastArabicPos=-1; 379 int32_t controlCount=0; 380 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & 381 UBIDI_OPTION_REMOVE_CONTROLS); 382 383 typedef enum { 384 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ 385 LOOKING_FOR_STRONG, /* 1: looking for first strong char */ 386 FOUND_STRONG_CHAR /* 2: found first strong char */ 387 } State; 388 State state; 389 int32_t paraStart=0; /* index of first char in paragraph */ 390 DirProp paraDir; /* == CONTEXT_RTL within paragraphs 391 starting with strong R char */ 392 DirProp lastStrongDir=0; /* for default level & inverse BiDi */ 393 int32_t lastStrongLTR=0; /* for STREAMING option */ 394 395 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 396 pBiDi->length=0; 397 lastStrongLTR=0; 398 } 399 if(isDefaultLevel) { 400 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; 401 paraDir=paraDirDefault; 402 lastStrongDir=paraDirDefault; 403 state=LOOKING_FOR_STRONG; 404 } else { 405 state=NOT_CONTEXTUAL; 406 paraDir=0; 407 } 408 /* count paragraphs and determine the paragraph level (P2..P3) */ 409 /* 410 * see comment in ubidi.h: 411 * the DEFAULT_XXX values are designed so that 412 * their bit 0 alone yields the intended default 413 */ 414 for( /* i=0 above */ ; i<length; ) { 415 /* i is incremented by U16_NEXT */ 416 U16_NEXT(text, i, length, uchar); 417 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); 418 dirProps[i-1]=dirProp|paraDir; 419 if(uchar>0xffff) { /* set the lead surrogate's property to BN */ 420 flags|=DIRPROP_FLAG(BN); 421 dirProps[i-2]=(DirProp)(BN|paraDir); 422 } 423 if(state==LOOKING_FOR_STRONG) { 424 if(dirProp==L) { 425 state=FOUND_STRONG_CHAR; 426 if(paraDir) { 427 paraDir=0; 428 for(i1=paraStart; i1<i; i1++) { 429 dirProps[i1]&=~CONTEXT_RTL; 430 } 431 } 432 continue; 433 } 434 if(dirProp==R || dirProp==AL) { 435 state=FOUND_STRONG_CHAR; 436 if(paraDir==0) { 437 paraDir=CONTEXT_RTL; 438 for(i1=paraStart; i1<i; i1++) { 439 dirProps[i1]|=CONTEXT_RTL; 440 } 441 } 442 continue; 443 } 444 } 445 if(dirProp==L) { 446 lastStrongDir=0; 447 lastStrongLTR=i; /* i is index to next character */ 448 } 449 else if(dirProp==R) { 450 lastStrongDir=CONTEXT_RTL; 451 } 452 else if(dirProp==AL) { 453 lastStrongDir=CONTEXT_RTL; 454 lastArabicPos=i-1; 455 } 456 else if(dirProp==B) { 457 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 458 pBiDi->length=i; /* i is index to next character */ 459 } 460 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { 461 for( ; paraStart<i; paraStart++) { 462 dirProps[paraStart]|=CONTEXT_RTL; 463 } 464 } 465 if(i<length) { /* B not last char in text */ 466 if(!((uchar==CR) && (text[i]==LF))) { 467 pBiDi->paraCount++; 468 } 469 if(isDefaultLevel) { 470 state=LOOKING_FOR_STRONG; 471 paraStart=i; /* i is index to next character */ 472 paraDir=paraDirDefault; 473 lastStrongDir=paraDirDefault; 474 } 475 } 476 } 477 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) { 478 controlCount++; 479 } 480 } 481 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { 482 for(i1=paraStart; i1<length; i1++) { 483 dirProps[i1]|=CONTEXT_RTL; 484 } 485 } 486 if(isDefaultLevel) { 487 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0); 488 } 489 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 490 if((lastStrongLTR>pBiDi->length) && 491 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) { 492 pBiDi->length = lastStrongLTR; 493 } 494 if(pBiDi->length<pBiDi->originalLength) { 495 pBiDi->paraCount--; 496 } 497 } 498 /* The following line does nothing new for contextual paraLevel, but is 499 needed for absolute paraLevel. */ 500 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 501 502 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 503 flags|=DIRPROP_FLAG(L); 504 } 505 506 pBiDi->controlCount = controlCount; 507 pBiDi->flags=flags; 508 pBiDi->lastArabicPos=lastArabicPos; 509 } 510 511 /* perform (X1)..(X9) ------------------------------------------------------- */ 512 513 /* determine if the text is mixed-directional or single-directional */ 514 static UBiDiDirection 515 directionFromFlags(UBiDi *pBiDi) { 516 Flags flags=pBiDi->flags; 517 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 518 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { 519 return UBIDI_LTR; 520 } else if(!(flags&MASK_LTR)) { 521 return UBIDI_RTL; 522 } else { 523 return UBIDI_MIXED; 524 } 525 } 526 527 /* 528 * Resolve the explicit levels as specified by explicit embedding codes. 529 * Recalculate the flags to have them reflect the real properties 530 * after taking the explicit embeddings into account. 531 * 532 * The BiDi algorithm is designed to result in the same behavior whether embedding 533 * levels are externally specified (from "styled text", supposedly the preferred 534 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. 535 * That is why (X9) instructs to remove all explicit codes (and BN). 536 * However, in a real implementation, this removal of these codes and their index 537 * positions in the plain text is undesirable since it would result in 538 * reallocated, reindexed text. 539 * Instead, this implementation leaves the codes in there and just ignores them 540 * in the subsequent processing. 541 * In order to get the same reordering behavior, positions with a BN or an 542 * explicit embedding code just get the same level assigned as the last "real" 543 * character. 544 * 545 * Some implementations, not this one, then overwrite some of these 546 * directionality properties at "real" same-level-run boundaries by 547 * L or R codes so that the resolution of weak types can be performed on the 548 * entire paragraph at once instead of having to parse it once more and 549 * perform that resolution on same-level-runs. 550 * This limits the scope of the implicit rules in effectively 551 * the same way as the run limits. 552 * 553 * Instead, this implementation does not modify these codes. 554 * On one hand, the paragraph has to be scanned for same-level-runs, but 555 * on the other hand, this saves another loop to reset these codes, 556 * or saves making and modifying a copy of dirProps[]. 557 * 558 * 559 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. 560 * 561 * 562 * Handling the stack of explicit levels (Xn): 563 * 564 * With the BiDi stack of explicit levels, 565 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, 566 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. 567 * 568 * In order to have a correct push-pop semantics even in the case of overflows, 569 * there are two overflow counters: 570 * - countOver60 is incremented with each LRx at level 60 571 * - from level 60, one RLx increases the level to 61 572 * - countOver61 is incremented with each LRx and RLx at level 61 573 * 574 * Popping levels with PDF must work in the opposite order so that level 61 575 * is correct at the correct point. Underflows (too many PDFs) must be checked. 576 * 577 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 578 */ 579 static UBiDiDirection 580 resolveExplicitLevels(UBiDi *pBiDi) { 581 const DirProp *dirProps=pBiDi->dirProps; 582 UBiDiLevel *levels=pBiDi->levels; 583 const UChar *text=pBiDi->text; 584 585 int32_t i=0, length=pBiDi->length; 586 Flags flags=pBiDi->flags; /* collect all directionalities in the text */ 587 DirProp dirProp; 588 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); 589 590 UBiDiDirection direction; 591 int32_t paraIndex=0; 592 593 /* determine if the text is mixed-directional or single-directional */ 594 direction=directionFromFlags(pBiDi); 595 596 /* we may not need to resolve any explicit levels, but for multiple 597 paragraphs we want to loop on all chars to set the para boundaries */ 598 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) { 599 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 600 } else if((pBiDi->paraCount==1) && 601 (!(flags&MASK_EXPLICIT) || 602 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) { 603 /* mixed, but all characters are at the same embedding level */ 604 /* or we are in "inverse BiDi" */ 605 /* and we don't have contextual multiple paragraphs with some B char */ 606 /* set all levels to the paragraph level */ 607 for(i=0; i<length; ++i) { 608 levels[i]=level; 609 } 610 } else { 611 /* continue to perform (Xn) */ 612 613 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 614 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ 615 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0; 616 617 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */ 618 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ 619 620 /* recalculate the flags */ 621 flags=0; 622 623 for(i=0; i<length; ++i) { 624 dirProp=NO_CONTEXT_RTL(dirProps[i]); 625 switch(dirProp) { 626 case LRE: 627 case LRO: 628 /* (X3, X5) */ 629 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 630 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { 631 stack[stackTop]=embeddingLevel; 632 ++stackTop; 633 embeddingLevel=newLevel; 634 if(dirProp==LRO) { 635 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 636 } 637 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE 638 since this has already been done for newLevel which is 639 the source for embeddingLevel. 640 */ 641 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) { 642 ++countOver61; 643 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ { 644 ++countOver60; 645 } 646 flags|=DIRPROP_FLAG(BN); 647 break; 648 case RLE: 649 case RLO: 650 /* (X2, X4) */ 651 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 652 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { 653 stack[stackTop]=embeddingLevel; 654 ++stackTop; 655 embeddingLevel=newLevel; 656 if(dirProp==RLO) { 657 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 658 } 659 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE 660 since this has already been done for newLevel which is 661 the source for embeddingLevel. 662 */ 663 } else { 664 ++countOver61; 665 } 666 flags|=DIRPROP_FLAG(BN); 667 break; 668 case PDF: 669 /* (X7) */ 670 /* handle all the overflow cases first */ 671 if(countOver61>0) { 672 --countOver61; 673 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) { 674 /* handle LRx overflows from level 60 */ 675 --countOver60; 676 } else if(stackTop>0) { 677 /* this is the pop operation; it also pops level 61 while countOver60>0 */ 678 --stackTop; 679 embeddingLevel=stack[stackTop]; 680 /* } else { (underflow) */ 681 } 682 flags|=DIRPROP_FLAG(BN); 683 break; 684 case B: 685 stackTop=0; 686 countOver60=countOver61=0; 687 level=GET_PARALEVEL(pBiDi, i); 688 if((i+1)<length) { 689 embeddingLevel=GET_PARALEVEL(pBiDi, i+1); 690 if(!((text[i]==CR) && (text[i+1]==LF))) { 691 pBiDi->paras[paraIndex++]=i+1; 692 } 693 } 694 flags|=DIRPROP_FLAG(B); 695 break; 696 case BN: 697 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 698 /* they will get their levels set correctly in adjustWSLevels() */ 699 flags|=DIRPROP_FLAG(BN); 700 break; 701 default: 702 /* all other types get the "real" level */ 703 if(level!=embeddingLevel) { 704 level=embeddingLevel; 705 if(level&UBIDI_LEVEL_OVERRIDE) { 706 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; 707 } else { 708 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; 709 } 710 } 711 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 712 flags|=DIRPROP_FLAG(dirProp); 713 } 714 break; 715 } 716 717 /* 718 * We need to set reasonable levels even on BN codes and 719 * explicit codes because we will later look at same-level runs (X10). 720 */ 721 levels[i]=level; 722 } 723 if(flags&MASK_EMBEDDING) { 724 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 725 } 726 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 727 flags|=DIRPROP_FLAG(L); 728 } 729 730 /* subsequently, ignore the explicit codes and BN (X9) */ 731 732 /* again, determine if the text is mixed-directional or single-directional */ 733 pBiDi->flags=flags; 734 direction=directionFromFlags(pBiDi); 735 } 736 737 return direction; 738 } 739 740 /* 741 * Use a pre-specified embedding levels array: 742 * 743 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 744 * ignore all explicit codes (X9), 745 * and check all the preset levels. 746 * 747 * Recalculate the flags to have them reflect the real properties 748 * after taking the explicit embeddings into account. 749 */ 750 static UBiDiDirection 751 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 752 const DirProp *dirProps=pBiDi->dirProps; 753 DirProp dirProp; 754 UBiDiLevel *levels=pBiDi->levels; 755 const UChar *text=pBiDi->text; 756 757 int32_t i, length=pBiDi->length; 758 Flags flags=0; /* collect all directionalities in the text */ 759 UBiDiLevel level; 760 uint32_t paraIndex=0; 761 762 for(i=0; i<length; ++i) { 763 level=levels[i]; 764 dirProp=NO_CONTEXT_RTL(dirProps[i]); 765 if(level&UBIDI_LEVEL_OVERRIDE) { 766 /* keep the override flag in levels[i] but adjust the flags */ 767 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ 768 flags|=DIRPROP_FLAG_O(level); 769 } else { 770 /* set the flags */ 771 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); 772 } 773 if((level<GET_PARALEVEL(pBiDi, i) && 774 !((0==level)&&(dirProp==B))) || 775 (UBIDI_MAX_EXPLICIT_LEVEL<level)) { 776 /* level out of bounds */ 777 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 778 return UBIDI_LTR; 779 } 780 if((dirProp==B) && ((i+1)<length)) { 781 if(!((text[i]==CR) && (text[i+1]==LF))) { 782 pBiDi->paras[paraIndex++]=i+1; 783 } 784 } 785 } 786 if(flags&MASK_EMBEDDING) { 787 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 788 } 789 790 /* determine if the text is mixed-directional or single-directional */ 791 pBiDi->flags=flags; 792 return directionFromFlags(pBiDi); 793 } 794 795 /****************************************************************** 796 The Properties state machine table 797 ******************************************************************* 798 799 All table cells are 8 bits: 800 bits 0..4: next state 801 bits 5..7: action to perform (if > 0) 802 803 Cells may be of format "n" where n represents the next state 804 (except for the rightmost column). 805 Cells may also be of format "s(x,y)" where x represents an action 806 to perform and y represents the next state. 807 808 ******************************************************************* 809 Definitions and type for properties state table 810 ******************************************************************* 811 */ 812 #define IMPTABPROPS_COLUMNS 14 813 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) 814 #define GET_STATEPROPS(cell) ((cell)&0x1f) 815 #define GET_ACTIONPROPS(cell) ((cell)>>5) 816 #define s(action, newState) ((uint8_t)(newState+(action<<5))) 817 818 static const uint8_t groupProp[] = /* dirProp regrouped */ 819 { 820 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ 821 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 822 }; 823 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ 824 825 /****************************************************************** 826 827 PROPERTIES STATE TABLE 828 829 In table impTabProps, 830 - the ON column regroups ON and WS 831 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF 832 - the Res column is the reduced property assigned to a run 833 834 Action 1: process current run1, init new run1 835 2: init new run2 836 3: process run1, process run2, init new run1 837 4: process run1, set run1=run2, init new run2 838 839 Notes: 840 1) This table is used in resolveImplicitLevels(). 841 2) This table triggers actions when there is a change in the Bidi 842 property of incoming characters (action 1). 843 3) Most such property sequences are processed immediately (in 844 fact, passed to processPropertySeq(). 845 4) However, numbers are assembled as one sequence. This means 846 that undefined situations (like CS following digits, until 847 it is known if the next char will be a digit) are held until 848 following chars define them. 849 Example: digits followed by CS, then comes another CS or ON; 850 the digits will be processed, then the CS assigned 851 as the start of an ON sequence (action 3). 852 5) There are cases where more than one sequence must be 853 processed, for instance digits followed by CS followed by L: 854 the digits must be processed as one sequence, and the CS 855 must be processed as an ON sequence, all this before starting 856 assembling chars for the opening L sequence. 857 858 859 */ 860 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = 861 { 862 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */ 863 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON }, 864 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L }, 865 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R }, 866 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R }, 867 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN }, 868 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN }, 869 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN }, 870 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON }, 871 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON }, 872 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON }, 873 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN }, 874 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN }, 875 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN }, 876 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN }, 877 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON }, 878 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S }, 879 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S }, 880 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B } 881 }; 882 883 /* we must undef macro s because the levels table have a different 884 * structure (4 bits for action and 4 bits for next state. 885 */ 886 #undef s 887 888 /****************************************************************** 889 The levels state machine tables 890 ******************************************************************* 891 892 All table cells are 8 bits: 893 bits 0..3: next state 894 bits 4..7: action to perform (if > 0) 895 896 Cells may be of format "n" where n represents the next state 897 (except for the rightmost column). 898 Cells may also be of format "s(x,y)" where x represents an action 899 to perform and y represents the next state. 900 901 This format limits each table to 16 states each and to 15 actions. 902 903 ******************************************************************* 904 Definitions and type for levels state tables 905 ******************************************************************* 906 */ 907 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) 908 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) 909 #define GET_STATE(cell) ((cell)&0x0f) 910 #define GET_ACTION(cell) ((cell)>>4) 911 #define s(action, newState) ((uint8_t)(newState+(action<<4))) 912 913 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; 914 typedef uint8_t ImpAct[]; 915 916 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, 917 * instead of having a pair of ImpTab and a pair of ImpAct. 918 */ 919 typedef struct ImpTabPair { 920 const void * pImpTab[2]; 921 const void * pImpAct[2]; 922 } ImpTabPair; 923 924 /****************************************************************** 925 926 LEVELS STATE TABLES 927 928 In all levels state tables, 929 - state 0 is the initial state 930 - the Res column is the increment to add to the text level 931 for this property sequence. 932 933 The impAct arrays for each table of a pair map the local action 934 numbers of the table to the total list of actions. For instance, 935 action 2 in a given table corresponds to the action number which 936 appears in entry [2] of the impAct array for that table. 937 The first entry of all impAct arrays must be 0. 938 939 Action 1: init conditional sequence 940 2: prepend conditional sequence to current sequence 941 3: set ON sequence to new level - 1 942 4: init EN/AN/ON sequence 943 5: fix EN/AN/ON sequence followed by R 944 6: set previous level sequence to level 2 945 946 Notes: 947 1) These tables are used in processPropertySeq(). The input 948 is property sequences as determined by resolveImplicitLevels. 949 2) Most such property sequences are processed immediately 950 (levels are assigned). 951 3) However, some sequences cannot be assigned a final level till 952 one or more following sequences are received. For instance, 953 ON following an R sequence within an even-level paragraph. 954 If the following sequence is R, the ON sequence will be 955 assigned basic run level+1, and so will the R sequence. 956 4) S is generally handled like ON, since its level will be fixed 957 to paragraph level in adjustWSLevels(). 958 959 */ 960 961 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ 962 /* In this table, conditional sequences receive the higher possible level 963 until proven otherwise. 964 */ 965 { 966 /* L , R , EN , AN , ON , S , B , Res */ 967 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, 968 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, 969 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, 970 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, 971 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, 972 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } 973 }; 974 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ 975 /* In this table, conditional sequences receive the lower possible level 976 until proven otherwise. 977 */ 978 { 979 /* L , R , EN , AN , ON , S , B , Res */ 980 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 981 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, 982 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 983 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, 984 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, 985 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } 986 }; 987 static const ImpAct impAct0 = {0,1,2,3,4,5,6}; 988 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, 989 &impTabR_DEFAULT}, 990 {&impAct0, &impAct0}}; 991 992 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ 993 /* In this table, conditional sequences receive the higher possible level 994 until proven otherwise. 995 */ 996 { 997 /* L , R , EN , AN , ON , S , B , Res */ 998 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, 999 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, 1000 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, 1001 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, 1002 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } 1003 }; 1004 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, 1005 &impTabR_DEFAULT}, 1006 {&impAct0, &impAct0}}; 1007 1008 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = 1009 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1010 until proven that there is L or sor/eor on both sides. AN is handled like EN. 1011 */ 1012 { 1013 /* L , R , EN , AN , ON , S , B , Res */ 1014 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, 1015 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, 1016 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, 1017 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, 1018 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, 1019 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } 1020 }; 1021 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = 1022 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1023 until proven that there is L on both sides. AN is handled like EN. 1024 */ 1025 { 1026 /* L , R , EN , AN , ON , S , B , Res */ 1027 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1028 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1029 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, 1030 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, 1031 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } 1032 }; 1033 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { 1034 {&impTabL_GROUP_NUMBERS_WITH_R, 1035 &impTabR_GROUP_NUMBERS_WITH_R}, 1036 {&impAct0, &impAct0}}; 1037 1038 1039 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = 1040 /* This table is identical to the Default LTR table except that EN and AN are 1041 handled like L. 1042 */ 1043 { 1044 /* L , R , EN , AN , ON , S , B , Res */ 1045 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, 1046 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, 1047 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, 1048 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, 1049 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, 1050 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } 1051 }; 1052 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = 1053 /* This table is identical to the Default RTL table except that EN and AN are 1054 handled like L. 1055 */ 1056 { 1057 /* L , R , EN , AN , ON , S , B , Res */ 1058 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1059 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, 1060 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1061 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, 1062 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, 1063 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } 1064 }; 1065 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { 1066 {&impTabL_INVERSE_NUMBERS_AS_L, 1067 &impTabR_INVERSE_NUMBERS_AS_L}, 1068 {&impAct0, &impAct0}}; 1069 1070 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ 1071 /* In this table, conditional sequences receive the lower possible level 1072 until proven otherwise. 1073 */ 1074 { 1075 /* L , R , EN , AN , ON , S , B , Res */ 1076 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1077 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, 1078 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1079 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, 1080 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, 1081 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, 1082 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } 1083 }; 1084 static const ImpAct impAct1 = {0,1,11,12}; 1085 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" 1086 */ 1087 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { 1088 {&impTabL_DEFAULT, 1089 &impTabR_INVERSE_LIKE_DIRECT}, 1090 {&impAct0, &impAct1}}; 1091 1092 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = 1093 /* The case handled in this table is (visually): R EN L 1094 */ 1095 { 1096 /* L , R , EN , AN , ON , S , B , Res */ 1097 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, 1098 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, 1099 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, 1100 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, 1101 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, 1102 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, 1103 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } 1104 }; 1105 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = 1106 /* The cases handled in this table are (visually): R EN L 1107 R L AN L 1108 */ 1109 { 1110 /* L , R , EN , AN , ON , S , B , Res */ 1111 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1112 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, 1113 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, 1114 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, 1115 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, 1116 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, 1117 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } 1118 }; 1119 static const ImpAct impAct2 = {0,1,7,8,9,10}; 1120 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { 1121 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1122 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1123 {&impAct0, &impAct2}}; 1124 1125 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { 1126 {&impTabL_NUMBERS_SPECIAL, 1127 &impTabR_INVERSE_LIKE_DIRECT}, 1128 {&impAct0, &impAct1}}; 1129 1130 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = 1131 /* The case handled in this table is (visually): R EN L 1132 */ 1133 { 1134 /* L , R , EN , AN , ON , S , B , Res */ 1135 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, 1136 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, 1137 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, 1138 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, 1139 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } 1140 }; 1141 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { 1142 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1143 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1144 {&impAct0, &impAct2}}; 1145 1146 #undef s 1147 1148 typedef struct { 1149 const ImpTab * pImpTab; /* level table pointer */ 1150 const ImpAct * pImpAct; /* action map array */ 1151 int32_t startON; /* start of ON sequence */ 1152 int32_t startL2EN; /* start of level 2 sequence */ 1153 int32_t lastStrongRTL; /* index of last found R or AL */ 1154 int32_t state; /* current state */ 1155 UBiDiLevel runLevel; /* run level before implicit solving */ 1156 } LevState; 1157 1158 /*------------------------------------------------------------------------*/ 1159 1160 static void 1161 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) 1162 /* param pos: position where to insert 1163 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1164 */ 1165 { 1166 #define FIRSTALLOC 10 1167 Point point; 1168 InsertPoints * pInsertPoints=&(pBiDi->insertPoints); 1169 1170 if (pInsertPoints->capacity == 0) 1171 { 1172 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); 1173 if (pInsertPoints->points == NULL) 1174 { 1175 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1176 return; 1177 } 1178 pInsertPoints->capacity=FIRSTALLOC; 1179 } 1180 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ 1181 { 1182 void * savePoints=pInsertPoints->points; 1183 pInsertPoints->points=uprv_realloc(pInsertPoints->points, 1184 pInsertPoints->capacity*2*sizeof(Point)); 1185 if (pInsertPoints->points == NULL) 1186 { 1187 pInsertPoints->points=savePoints; 1188 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1189 return; 1190 } 1191 else pInsertPoints->capacity*=2; 1192 } 1193 point.pos=pos; 1194 point.flag=flag; 1195 pInsertPoints->points[pInsertPoints->size]=point; 1196 pInsertPoints->size++; 1197 #undef FIRSTALLOC 1198 } 1199 1200 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1201 1202 /* 1203 * This implementation of the (Wn) rules applies all rules in one pass. 1204 * In order to do so, it needs a look-ahead of typically 1 character 1205 * (except for W5: sequences of ET) and keeps track of changes 1206 * in a rule Wp that affect a later Wq (p<q). 1207 * 1208 * The (Nn) and (In) rules are also performed in that same single loop, 1209 * but effectively one iteration behind for white space. 1210 * 1211 * Since all implicit rules are performed in one step, it is not necessary 1212 * to actually store the intermediate directional properties in dirProps[]. 1213 */ 1214 1215 static void 1216 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, 1217 int32_t start, int32_t limit) { 1218 uint8_t cell, oldStateSeq, actionSeq; 1219 const ImpTab * pImpTab=pLevState->pImpTab; 1220 const ImpAct * pImpAct=pLevState->pImpAct; 1221 UBiDiLevel * levels=pBiDi->levels; 1222 UBiDiLevel level, addLevel; 1223 InsertPoints * pInsertPoints; 1224 int32_t start0, k; 1225 1226 start0=start; /* save original start position */ 1227 oldStateSeq=(uint8_t)pLevState->state; 1228 cell=(*pImpTab)[oldStateSeq][_prop]; 1229 pLevState->state=GET_STATE(cell); /* isolate the new state */ 1230 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ 1231 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; 1232 1233 if(actionSeq) { 1234 switch(actionSeq) { 1235 case 1: /* init ON seq */ 1236 pLevState->startON=start0; 1237 break; 1238 1239 case 2: /* prepend ON seq to current seq */ 1240 start=pLevState->startON; 1241 break; 1242 1243 case 3: /* L or S after possible relevant EN/AN */ 1244 /* check if we had EN after R/AL */ 1245 if (pLevState->startL2EN >= 0) { 1246 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1247 } 1248 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ 1249 /* check if we had any relevant EN/AN after R/AL */ 1250 pInsertPoints=&(pBiDi->insertPoints); 1251 if ((pInsertPoints->capacity == 0) || 1252 (pInsertPoints->size <= pInsertPoints->confirmed)) 1253 { 1254 /* nothing, just clean up */ 1255 pLevState->lastStrongRTL=-1; 1256 /* check if we have a pending conditional segment */ 1257 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; 1258 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ 1259 start=pLevState->startON; /* reset to basic run level */ 1260 } 1261 if (_prop == DirProp_S) /* add LRM before S */ 1262 { 1263 addPoint(pBiDi, start0, LRM_BEFORE); 1264 pInsertPoints->confirmed=pInsertPoints->size; 1265 } 1266 break; 1267 } 1268 /* reset previous RTL cont to level for LTR text */ 1269 for (k=pLevState->lastStrongRTL+1; k<start0; k++) 1270 { 1271 /* reset odd level, leave runLevel+2 as is */ 1272 levels[k]=(levels[k] - 2) & ~1; 1273 } 1274 /* mark insert points as confirmed */ 1275 pInsertPoints->confirmed=pInsertPoints->size; 1276 pLevState->lastStrongRTL=-1; 1277 if (_prop == DirProp_S) /* add LRM before S */ 1278 { 1279 addPoint(pBiDi, start0, LRM_BEFORE); 1280 pInsertPoints->confirmed=pInsertPoints->size; 1281 } 1282 break; 1283 1284 case 4: /* R/AL after possible relevant EN/AN */ 1285 /* just clean up */ 1286 pInsertPoints=&(pBiDi->insertPoints); 1287 if (pInsertPoints->capacity > 0) 1288 /* remove all non confirmed insert points */ 1289 pInsertPoints->size=pInsertPoints->confirmed; 1290 pLevState->startON=-1; 1291 pLevState->startL2EN=-1; 1292 pLevState->lastStrongRTL=limit - 1; 1293 break; 1294 1295 case 5: /* EN/AN after R/AL + possible cont */ 1296 /* check for real AN */ 1297 if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) && 1298 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) 1299 { 1300 /* real AN */ 1301 if (pLevState->startL2EN == -1) /* if no relevant EN already found */ 1302 { 1303 /* just note the righmost digit as a strong RTL */ 1304 pLevState->lastStrongRTL=limit - 1; 1305 break; 1306 } 1307 if (pLevState->startL2EN >= 0) /* after EN, no AN */ 1308 { 1309 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1310 pLevState->startL2EN=-2; 1311 } 1312 /* note AN */ 1313 addPoint(pBiDi, start0, LRM_BEFORE); 1314 break; 1315 } 1316 /* if first EN/AN after R/AL */ 1317 if (pLevState->startL2EN == -1) { 1318 pLevState->startL2EN=start0; 1319 } 1320 break; 1321 1322 case 6: /* note location of latest R/AL */ 1323 pLevState->lastStrongRTL=limit - 1; 1324 pLevState->startON=-1; 1325 break; 1326 1327 case 7: /* L after R+ON/EN/AN */ 1328 /* include possible adjacent number on the left */ 1329 for (k=start0-1; k>=0 && !(levels[k]&1); k--); 1330 if(k>=0) { 1331 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ 1332 pInsertPoints=&(pBiDi->insertPoints); 1333 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ 1334 } 1335 pLevState->startON=start0; 1336 break; 1337 1338 case 8: /* AN after L */ 1339 /* AN numbers between L text on both sides may be trouble. */ 1340 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 1341 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ 1342 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ 1343 break; 1344 1345 case 9: /* R after L+ON/EN/AN */ 1346 /* false alert, infirm LRMs around previous AN */ 1347 pInsertPoints=&(pBiDi->insertPoints); 1348 pInsertPoints->size=pInsertPoints->confirmed; 1349 if (_prop == DirProp_S) /* add RLM before S */ 1350 { 1351 addPoint(pBiDi, start0, RLM_BEFORE); 1352 pInsertPoints->confirmed=pInsertPoints->size; 1353 } 1354 break; 1355 1356 case 10: /* L after L+ON/AN */ 1357 level=pLevState->runLevel + addLevel; 1358 for(k=pLevState->startON; k<start0; k++) { 1359 if (levels[k]<level) 1360 levels[k]=level; 1361 } 1362 pInsertPoints=&(pBiDi->insertPoints); 1363 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ 1364 pLevState->startON=start0; 1365 break; 1366 1367 case 11: /* L after L+ON+EN/AN/ON */ 1368 level=pLevState->runLevel; 1369 for(k=start0-1; k>=pLevState->startON; k--) { 1370 if(levels[k]==level+3) { 1371 while(levels[k]==level+3) { 1372 levels[k--]-=2; 1373 } 1374 while(levels[k]==level) { 1375 k--; 1376 } 1377 } 1378 if(levels[k]==level+2) { 1379 levels[k]=level; 1380 continue; 1381 } 1382 levels[k]=level+1; 1383 } 1384 break; 1385 1386 case 12: /* R after L+ON+EN/AN/ON */ 1387 level=pLevState->runLevel+1; 1388 for(k=start0-1; k>=pLevState->startON; k--) { 1389 if(levels[k]>level) { 1390 levels[k]-=2; 1391 } 1392 } 1393 break; 1394 1395 default: /* we should never get here */ 1396 U_ASSERT(FALSE); 1397 break; 1398 } 1399 } 1400 if((addLevel) || (start < start0)) { 1401 level=pLevState->runLevel + addLevel; 1402 for(k=start; k<limit; k++) { 1403 levels[k]=level; 1404 } 1405 } 1406 } 1407 1408 static void 1409 resolveImplicitLevels(UBiDi *pBiDi, 1410 int32_t start, int32_t limit, 1411 DirProp sor, DirProp eor) { 1412 const DirProp *dirProps=pBiDi->dirProps; 1413 1414 LevState levState; 1415 int32_t i, start1, start2; 1416 uint8_t oldStateImp, stateImp, actionImp; 1417 uint8_t gprop, resProp, cell; 1418 UBool inverseRTL; 1419 DirProp nextStrongProp=R; 1420 int32_t nextStrongPos=-1; 1421 1422 levState.startON = -1; /* silence gcc flow analysis */ 1423 1424 /* check for RTL inverse BiDi mode */ 1425 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to 1426 * loop on the text characters from end to start. 1427 * This would need a different properties state table (at least different 1428 * actions) and different levels state tables (maybe very similar to the 1429 * LTR corresponding ones. 1430 */ 1431 inverseRTL=(UBool) 1432 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && 1433 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 1434 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); 1435 /* initialize for levels state table */ 1436 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1437 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1438 levState.state=0; 1439 levState.runLevel=pBiDi->levels[start]; 1440 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; 1441 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; 1442 processPropertySeq(pBiDi, &levState, sor, start, start); 1443 /* initialize for property state table */ 1444 if(NO_CONTEXT_RTL(dirProps[start])==NSM) { 1445 stateImp = 1 + sor; 1446 } else { 1447 stateImp=0; 1448 } 1449 start1=start; 1450 start2=start; 1451 1452 for(i=start; i<=limit; i++) { 1453 if(i>=limit) { 1454 gprop=eor; 1455 } else { 1456 DirProp prop, prop1; 1457 prop=NO_CONTEXT_RTL(dirProps[i]); 1458 if(inverseRTL) { 1459 if(prop==AL) { 1460 /* AL before EN does not make it AN */ 1461 prop=R; 1462 } else if(prop==EN) { 1463 if(nextStrongPos<=i) { 1464 /* look for next strong char (L/R/AL) */ 1465 int32_t j; 1466 nextStrongProp=R; /* set default */ 1467 nextStrongPos=limit; 1468 for(j=i+1; j<limit; j++) { 1469 prop1=NO_CONTEXT_RTL(dirProps[j]); 1470 if(prop1==L || prop1==R || prop1==AL) { 1471 nextStrongProp=prop1; 1472 nextStrongPos=j; 1473 break; 1474 } 1475 } 1476 } 1477 if(nextStrongProp==AL) { 1478 prop=AN; 1479 } 1480 } 1481 } 1482 gprop=groupProp[prop]; 1483 } 1484 oldStateImp=stateImp; 1485 cell=impTabProps[oldStateImp][gprop]; 1486 stateImp=GET_STATEPROPS(cell); /* isolate the new state */ 1487 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ 1488 if((i==limit) && (actionImp==0)) { 1489 /* there is an unprocessed sequence if its property == eor */ 1490 actionImp=1; /* process the last sequence */ 1491 } 1492 if(actionImp) { 1493 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; 1494 switch(actionImp) { 1495 case 1: /* process current seq1, init new seq1 */ 1496 processPropertySeq(pBiDi, &levState, resProp, start1, i); 1497 start1=i; 1498 break; 1499 case 2: /* init new seq2 */ 1500 start2=i; 1501 break; 1502 case 3: /* process seq1, process seq2, init new seq1 */ 1503 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1504 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); 1505 start1=i; 1506 break; 1507 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 1508 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1509 start1=start2; 1510 start2=i; 1511 break; 1512 default: /* we should never get here */ 1513 U_ASSERT(FALSE); 1514 break; 1515 } 1516 } 1517 } 1518 /* flush possible pending sequence, e.g. ON */ 1519 processPropertySeq(pBiDi, &levState, eor, limit, limit); 1520 } 1521 1522 /* perform (L1) and (X9) ---------------------------------------------------- */ 1523 1524 /* 1525 * Reset the embedding levels for some non-graphic characters (L1). 1526 * This function also sets appropriate levels for BN, and 1527 * explicit embedding types that are supposed to have been removed 1528 * from the paragraph in (X9). 1529 */ 1530 static void 1531 adjustWSLevels(UBiDi *pBiDi) { 1532 const DirProp *dirProps=pBiDi->dirProps; 1533 UBiDiLevel *levels=pBiDi->levels; 1534 int32_t i; 1535 1536 if(pBiDi->flags&MASK_WS) { 1537 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; 1538 Flags flag; 1539 1540 i=pBiDi->trailingWSStart; 1541 while(i>0) { 1542 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 1543 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { 1544 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1545 levels[i]=0; 1546 } else { 1547 levels[i]=GET_PARALEVEL(pBiDi, i); 1548 } 1549 } 1550 1551 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 1552 /* here, i+1 is guaranteed to be <length */ 1553 while(i>0) { 1554 flag=DIRPROP_FLAG_NC(dirProps[--i]); 1555 if(flag&MASK_BN_EXPLICIT) { 1556 levels[i]=levels[i+1]; 1557 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1558 levels[i]=0; 1559 break; 1560 } else if(flag&MASK_B_S) { 1561 levels[i]=GET_PARALEVEL(pBiDi, i); 1562 break; 1563 } 1564 } 1565 } 1566 } 1567 } 1568 1569 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) 1570 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) 1571 static void 1572 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, 1573 UBiDiLevel paraLevel, UErrorCode *pErrorCode) { 1574 void *runsOnlyMemory; 1575 int32_t *visualMap; 1576 UChar *visualText; 1577 int32_t saveLength, saveTrailingWSStart; 1578 const UBiDiLevel *levels; 1579 UBiDiLevel *saveLevels; 1580 UBiDiDirection saveDirection; 1581 UBool saveMayAllocateText; 1582 Run *runs; 1583 int32_t visualLength, i, j, visualStart, logicalStart, 1584 runCount, runLength, addedRuns, insertRemove, 1585 start, limit, step, indexOddBit, logicalPos, 1586 index0, index1; 1587 uint32_t saveOptions; 1588 1589 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; 1590 if(length==0) { 1591 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1592 goto cleanup3; 1593 } 1594 /* obtain memory for mapping table and visual text */ 1595 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); 1596 if(runsOnlyMemory==NULL) { 1597 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1598 goto cleanup3; 1599 } 1600 visualMap=runsOnlyMemory; 1601 visualText=(UChar *)&visualMap[length]; 1602 saveLevels=(UBiDiLevel *)&visualText[length]; 1603 saveOptions=pBiDi->reorderingOptions; 1604 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { 1605 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 1606 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; 1607 } 1608 paraLevel&=1; /* accept only 0 or 1 */ 1609 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1610 if(U_FAILURE(*pErrorCode)) { 1611 goto cleanup3; 1612 } 1613 /* we cannot access directly pBiDi->levels since it is not yet set if 1614 * direction is not MIXED 1615 */ 1616 levels=ubidi_getLevels(pBiDi, pErrorCode); 1617 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); 1618 saveTrailingWSStart=pBiDi->trailingWSStart; 1619 saveLength=pBiDi->length; 1620 saveDirection=pBiDi->direction; 1621 1622 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use 1623 * the visual map and the dirProps array to drive the second call 1624 * to ubidi_setPara (but must make provision for possible removal of 1625 * BiDi controls. Alternatively, only use the dirProps array via 1626 * customized classifier callback. 1627 */ 1628 visualLength=ubidi_writeReordered(pBiDi, visualText, length, 1629 UBIDI_DO_MIRRORING, pErrorCode); 1630 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); 1631 if(U_FAILURE(*pErrorCode)) { 1632 goto cleanup2; 1633 } 1634 pBiDi->reorderingOptions=saveOptions; 1635 1636 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; 1637 paraLevel^=1; 1638 /* Because what we did with reorderingOptions, visualText may be shorter 1639 * than the original text. But we don't want the levels memory to be 1640 * reallocated shorter than the original length, since we need to restore 1641 * the levels as after the first call to ubidi_setpara() before returning. 1642 * We will force mayAllocateText to FALSE before the second call to 1643 * ubidi_setpara(), and will restore it afterwards. 1644 */ 1645 saveMayAllocateText=pBiDi->mayAllocateText; 1646 pBiDi->mayAllocateText=FALSE; 1647 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); 1648 pBiDi->mayAllocateText=saveMayAllocateText; 1649 ubidi_getRuns(pBiDi, pErrorCode); 1650 if(U_FAILURE(*pErrorCode)) { 1651 goto cleanup1; 1652 } 1653 /* check if some runs must be split, count how many splits */ 1654 addedRuns=0; 1655 runCount=pBiDi->runCount; 1656 runs=pBiDi->runs; 1657 visualStart=0; 1658 for(i=0; i<runCount; i++, visualStart+=runLength) { 1659 runLength=runs[i].visualLimit-visualStart; 1660 if(runLength<2) { 1661 continue; 1662 } 1663 logicalStart=GET_INDEX(runs[i].logicalStart); 1664 for(j=logicalStart+1; j<logicalStart+runLength; j++) { 1665 index0=visualMap[j]; 1666 index1=visualMap[j-1]; 1667 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1668 addedRuns++; 1669 } 1670 } 1671 } 1672 if(addedRuns) { 1673 if(getRunsMemory(pBiDi, runCount+addedRuns)) { 1674 if(runCount==1) { 1675 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ 1676 pBiDi->runsMemory[0]=runs[0]; 1677 } 1678 runs=pBiDi->runs=pBiDi->runsMemory; 1679 pBiDi->runCount+=addedRuns; 1680 } else { 1681 goto cleanup1; 1682 } 1683 } 1684 /* split runs which are not consecutive in source text */ 1685 for(i=runCount-1; i>=0; i--) { 1686 runLength= i==0 ? runs[0].visualLimit : 1687 runs[i].visualLimit-runs[i-1].visualLimit; 1688 logicalStart=runs[i].logicalStart; 1689 indexOddBit=GET_ODD_BIT(logicalStart); 1690 logicalStart=GET_INDEX(logicalStart); 1691 if(runLength<2) { 1692 if(addedRuns) { 1693 runs[i+addedRuns]=runs[i]; 1694 } 1695 logicalPos=visualMap[logicalStart]; 1696 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1697 saveLevels[logicalPos]^indexOddBit); 1698 continue; 1699 } 1700 if(indexOddBit) { 1701 start=logicalStart; 1702 limit=logicalStart+runLength-1; 1703 step=1; 1704 } else { 1705 start=logicalStart+runLength-1; 1706 limit=logicalStart; 1707 step=-1; 1708 } 1709 for(j=start; j!=limit; j+=step) { 1710 index0=visualMap[j]; 1711 index1=visualMap[j+step]; 1712 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1713 logicalPos=BIDI_MIN(visualMap[start], index0); 1714 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1715 saveLevels[logicalPos]^indexOddBit); 1716 runs[i+addedRuns].visualLimit=runs[i].visualLimit; 1717 runs[i].visualLimit-=BIDI_ABS(j-start)+1; 1718 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); 1719 runs[i+addedRuns].insertRemove=insertRemove; 1720 runs[i].insertRemove&=~insertRemove; 1721 start=j+step; 1722 addedRuns--; 1723 } 1724 } 1725 if(addedRuns) { 1726 runs[i+addedRuns]=runs[i]; 1727 } 1728 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); 1729 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1730 saveLevels[logicalPos]^indexOddBit); 1731 } 1732 1733 cleanup1: 1734 /* restore initial paraLevel */ 1735 pBiDi->paraLevel^=1; 1736 cleanup2: 1737 /* restore real text */ 1738 pBiDi->text=text; 1739 pBiDi->length=saveLength; 1740 pBiDi->originalLength=length; 1741 pBiDi->direction=saveDirection; 1742 /* the saved levels should never excess levelsSize, but we check anyway */ 1743 if(saveLength>pBiDi->levelsSize) { 1744 saveLength=pBiDi->levelsSize; 1745 } 1746 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); 1747 pBiDi->trailingWSStart=saveTrailingWSStart; 1748 /* free memory for mapping table and visual text */ 1749 uprv_free(runsOnlyMemory); 1750 if(pBiDi->runCount>1) { 1751 pBiDi->direction=UBIDI_MIXED; 1752 } 1753 cleanup3: 1754 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; 1755 } 1756 1757 /* ubidi_setPara ------------------------------------------------------------ */ 1758 1759 U_CAPI void U_EXPORT2 1760 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1761 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1762 UErrorCode *pErrorCode) { 1763 UBiDiDirection direction; 1764 1765 /* check the argument values */ 1766 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1767 if(pBiDi==NULL || text==NULL || length<-1 || 1768 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { 1769 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1770 return; 1771 } 1772 1773 if(length==-1) { 1774 length=u_strlen(text); 1775 } 1776 1777 /* special treatment for RUNS_ONLY mode */ 1778 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { 1779 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); 1780 return; 1781 } 1782 1783 /* initialize the UBiDi structure */ 1784 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ 1785 pBiDi->text=text; 1786 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; 1787 pBiDi->paraLevel=paraLevel; 1788 pBiDi->direction=UBIDI_LTR; 1789 pBiDi->paraCount=1; 1790 1791 pBiDi->dirProps=NULL; 1792 pBiDi->levels=NULL; 1793 pBiDi->runs=NULL; 1794 pBiDi->insertPoints.size=0; /* clean up from last call */ 1795 pBiDi->insertPoints.confirmed=0; /* clean up from last call */ 1796 1797 /* 1798 * Save the original paraLevel if contextual; otherwise, set to 0. 1799 */ 1800 if(IS_DEFAULT_LEVEL(paraLevel)) { 1801 pBiDi->defaultParaLevel=paraLevel; 1802 } else { 1803 pBiDi->defaultParaLevel=0; 1804 } 1805 1806 if(length==0) { 1807 /* 1808 * For an empty paragraph, create a UBiDi object with the paraLevel and 1809 * the flags and the direction set but without allocating zero-length arrays. 1810 * There is nothing more to do. 1811 */ 1812 if(IS_DEFAULT_LEVEL(paraLevel)) { 1813 pBiDi->paraLevel&=1; 1814 pBiDi->defaultParaLevel=0; 1815 } 1816 if(paraLevel&1) { 1817 pBiDi->flags=DIRPROP_FLAG(R); 1818 pBiDi->direction=UBIDI_RTL; 1819 } else { 1820 pBiDi->flags=DIRPROP_FLAG(L); 1821 pBiDi->direction=UBIDI_LTR; 1822 } 1823 1824 pBiDi->runCount=0; 1825 pBiDi->paraCount=0; 1826 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 1827 return; 1828 } 1829 1830 pBiDi->runCount=-1; 1831 1832 /* 1833 * Get the directional properties, 1834 * the flags bit-set, and 1835 * determine the paragraph level if necessary. 1836 */ 1837 if(getDirPropsMemory(pBiDi, length)) { 1838 pBiDi->dirProps=pBiDi->dirPropsMemory; 1839 getDirProps(pBiDi); 1840 } else { 1841 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1842 return; 1843 } 1844 /* the processed length may have changed if UBIDI_OPTION_STREAMING */ 1845 length= pBiDi->length; 1846 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ 1847 /* allocate paras memory */ 1848 if(pBiDi->paraCount>1) { 1849 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { 1850 pBiDi->paras=pBiDi->parasMemory; 1851 pBiDi->paras[pBiDi->paraCount-1]=length; 1852 } else { 1853 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1854 return; 1855 } 1856 } else { 1857 /* initialize paras for single paragraph */ 1858 pBiDi->paras=pBiDi->simpleParas; 1859 pBiDi->simpleParas[0]=length; 1860 } 1861 1862 /* are explicit levels specified? */ 1863 if(embeddingLevels==NULL) { 1864 /* no: determine explicit levels according to the (Xn) rules */\ 1865 if(getLevelsMemory(pBiDi, length)) { 1866 pBiDi->levels=pBiDi->levelsMemory; 1867 direction=resolveExplicitLevels(pBiDi); 1868 } else { 1869 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1870 return; 1871 } 1872 } else { 1873 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ 1874 pBiDi->levels=embeddingLevels; 1875 direction=checkExplicitLevels(pBiDi, pErrorCode); 1876 if(U_FAILURE(*pErrorCode)) { 1877 return; 1878 } 1879 } 1880 1881 /* 1882 * The steps after (X9) in the UBiDi algorithm are performed only if 1883 * the paragraph text has mixed directionality! 1884 */ 1885 pBiDi->direction=direction; 1886 switch(direction) { 1887 case UBIDI_LTR: 1888 /* make sure paraLevel is even */ 1889 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); 1890 1891 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 1892 pBiDi->trailingWSStart=0; 1893 break; 1894 case UBIDI_RTL: 1895 /* make sure paraLevel is odd */ 1896 pBiDi->paraLevel|=1; 1897 1898 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 1899 pBiDi->trailingWSStart=0; 1900 break; 1901 default: 1902 /* 1903 * Choose the right implicit state table 1904 */ 1905 switch(pBiDi->reorderingMode) { 1906 case UBIDI_REORDER_DEFAULT: 1907 pBiDi->pImpTabPair=&impTab_DEFAULT; 1908 break; 1909 case UBIDI_REORDER_NUMBERS_SPECIAL: 1910 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; 1911 break; 1912 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: 1913 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; 1914 break; 1915 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: 1916 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; 1917 break; 1918 case UBIDI_REORDER_INVERSE_LIKE_DIRECT: 1919 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 1920 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; 1921 } else { 1922 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; 1923 } 1924 break; 1925 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: 1926 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 1927 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; 1928 } else { 1929 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; 1930 } 1931 break; 1932 default: 1933 /* we should never get here */ 1934 U_ASSERT(FALSE); 1935 break; 1936 } 1937 /* 1938 * If there are no external levels specified and there 1939 * are no significant explicit level codes in the text, 1940 * then we can treat the entire paragraph as one run. 1941 * Otherwise, we need to perform the following rules on runs of 1942 * the text with the same embedding levels. (X10) 1943 * "Significant" explicit level codes are ones that actually 1944 * affect non-BN characters. 1945 * Examples for "insignificant" ones are empty embeddings 1946 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 1947 */ 1948 if(embeddingLevels==NULL && pBiDi->paraCount<=1 && 1949 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { 1950 resolveImplicitLevels(pBiDi, 0, length, 1951 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), 1952 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); 1953 } else { 1954 /* sor, eor: start and end types of same-level-run */ 1955 UBiDiLevel *levels=pBiDi->levels; 1956 int32_t start, limit=0; 1957 UBiDiLevel level, nextLevel; 1958 DirProp sor, eor; 1959 1960 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 1961 level=GET_PARALEVEL(pBiDi, 0); 1962 nextLevel=levels[0]; 1963 if(level<nextLevel) { 1964 eor=GET_LR_FROM_LEVEL(nextLevel); 1965 } else { 1966 eor=GET_LR_FROM_LEVEL(level); 1967 } 1968 1969 do { 1970 /* determine start and limit of the run (end points just behind the run) */ 1971 1972 /* the values for this run's start are the same as for the previous run's end */ 1973 start=limit; 1974 level=nextLevel; 1975 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { 1976 /* except if this is a new paragraph, then set sor = para level */ 1977 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); 1978 } else { 1979 sor=eor; 1980 } 1981 1982 /* search for the limit of this run */ 1983 while(++limit<length && levels[limit]==level) {} 1984 1985 /* get the correct level of the next run */ 1986 if(limit<length) { 1987 nextLevel=levels[limit]; 1988 } else { 1989 nextLevel=GET_PARALEVEL(pBiDi, length-1); 1990 } 1991 1992 /* determine eor from max(level, nextLevel); sor is last run's eor */ 1993 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { 1994 eor=GET_LR_FROM_LEVEL(nextLevel); 1995 } else { 1996 eor=GET_LR_FROM_LEVEL(level); 1997 } 1998 1999 /* if the run consists of overridden directional types, then there 2000 are no implicit types to be resolved */ 2001 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 2002 resolveImplicitLevels(pBiDi, start, limit, sor, eor); 2003 } else { 2004 /* remove the UBIDI_LEVEL_OVERRIDE flags */ 2005 do { 2006 levels[start++]&=~UBIDI_LEVEL_OVERRIDE; 2007 } while(start<limit); 2008 } 2009 } while(limit<length); 2010 } 2011 /* check if we got any memory shortage while adding insert points */ 2012 if (U_FAILURE(pBiDi->insertPoints.errorCode)) 2013 { 2014 *pErrorCode=pBiDi->insertPoints.errorCode; 2015 return; 2016 } 2017 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 2018 adjustWSLevels(pBiDi); 2019 break; 2020 } 2021 /* add RLM for inverse Bidi with contextual orientation resolving 2022 * to RTL which would not round-trip otherwise 2023 */ 2024 if((pBiDi->defaultParaLevel>0) && 2025 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && 2026 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || 2027 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { 2028 int32_t i, j, start, last; 2029 DirProp dirProp; 2030 for(i=0; i<pBiDi->paraCount; i++) { 2031 last=pBiDi->paras[i]-1; 2032 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { 2033 continue; /* LTR paragraph */ 2034 } 2035 start= i==0 ? 0 : pBiDi->paras[i - 1]; 2036 for(j=last; j>=start; j--) { 2037 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); 2038 if(dirProp==L) { 2039 if(j<last) { 2040 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) { 2041 last--; 2042 } 2043 } 2044 addPoint(pBiDi, last, RLM_BEFORE); 2045 break; 2046 } 2047 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { 2048 break; 2049 } 2050 } 2051 } 2052 } 2053 2054 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 2055 pBiDi->resultLength -= pBiDi->controlCount; 2056 } else { 2057 pBiDi->resultLength += pBiDi->insertPoints.size; 2058 } 2059 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 2060 } 2061 2062 U_CAPI void U_EXPORT2 2063 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { 2064 if(pBiDi!=NULL) { 2065 pBiDi->orderParagraphsLTR=orderParagraphsLTR; 2066 } 2067 } 2068 2069 U_CAPI UBool U_EXPORT2 2070 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { 2071 if(pBiDi!=NULL) { 2072 return pBiDi->orderParagraphsLTR; 2073 } else { 2074 return FALSE; 2075 } 2076 } 2077 2078 U_CAPI UBiDiDirection U_EXPORT2 2079 ubidi_getDirection(const UBiDi *pBiDi) { 2080 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2081 return pBiDi->direction; 2082 } else { 2083 return UBIDI_LTR; 2084 } 2085 } 2086 2087 U_CAPI const UChar * U_EXPORT2 2088 ubidi_getText(const UBiDi *pBiDi) { 2089 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2090 return pBiDi->text; 2091 } else { 2092 return NULL; 2093 } 2094 } 2095 2096 U_CAPI int32_t U_EXPORT2 2097 ubidi_getLength(const UBiDi *pBiDi) { 2098 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2099 return pBiDi->originalLength; 2100 } else { 2101 return 0; 2102 } 2103 } 2104 2105 U_CAPI int32_t U_EXPORT2 2106 ubidi_getProcessedLength(const UBiDi *pBiDi) { 2107 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2108 return pBiDi->length; 2109 } else { 2110 return 0; 2111 } 2112 } 2113 2114 U_CAPI int32_t U_EXPORT2 2115 ubidi_getResultLength(const UBiDi *pBiDi) { 2116 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2117 return pBiDi->resultLength; 2118 } else { 2119 return 0; 2120 } 2121 } 2122 2123 /* paragraphs API functions ------------------------------------------------- */ 2124 2125 U_CAPI UBiDiLevel U_EXPORT2 2126 ubidi_getParaLevel(const UBiDi *pBiDi) { 2127 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2128 return pBiDi->paraLevel; 2129 } else { 2130 return 0; 2131 } 2132 } 2133 2134 U_CAPI int32_t U_EXPORT2 2135 ubidi_countParagraphs(UBiDi *pBiDi) { 2136 if(!IS_VALID_PARA_OR_LINE(pBiDi)) { 2137 return 0; 2138 } else { 2139 return pBiDi->paraCount; 2140 } 2141 } 2142 2143 U_CAPI void U_EXPORT2 2144 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 2145 int32_t *pParaStart, int32_t *pParaLimit, 2146 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2147 int32_t paraStart; 2148 2149 /* check the argument values */ 2150 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2151 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); 2152 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); 2153 2154 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2155 if(paraIndex) { 2156 paraStart=pBiDi->paras[paraIndex-1]; 2157 } else { 2158 paraStart=0; 2159 } 2160 if(pParaStart!=NULL) { 2161 *pParaStart=paraStart; 2162 } 2163 if(pParaLimit!=NULL) { 2164 *pParaLimit=pBiDi->paras[paraIndex]; 2165 } 2166 if(pParaLevel!=NULL) { 2167 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); 2168 } 2169 } 2170 2171 U_CAPI int32_t U_EXPORT2 2172 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, 2173 int32_t *pParaStart, int32_t *pParaLimit, 2174 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2175 uint32_t paraIndex; 2176 2177 /* check the argument values */ 2178 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ 2179 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); 2180 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); 2181 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2182 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); 2183 2184 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); 2185 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); 2186 return paraIndex; 2187 } 2188 2189 U_CAPI void U_EXPORT2 2190 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 2191 const void *newContext, UBiDiClassCallback **oldFn, 2192 const void **oldContext, UErrorCode *pErrorCode) 2193 { 2194 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2195 if(pBiDi==NULL) { 2196 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2197 return; 2198 } 2199 if( oldFn ) 2200 { 2201 *oldFn = pBiDi->fnClassCallback; 2202 } 2203 if( oldContext ) 2204 { 2205 *oldContext = pBiDi->coClassCallback; 2206 } 2207 pBiDi->fnClassCallback = newFn; 2208 pBiDi->coClassCallback = newContext; 2209 } 2210 2211 U_CAPI void U_EXPORT2 2212 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) 2213 { 2214 if(pBiDi==NULL) { 2215 return; 2216 } 2217 if( fn ) 2218 { 2219 *fn = pBiDi->fnClassCallback; 2220 } 2221 if( context ) 2222 { 2223 *context = pBiDi->coClassCallback; 2224 } 2225 } 2226 2227 U_CAPI UCharDirection U_EXPORT2 2228 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) 2229 { 2230 UCharDirection dir; 2231 2232 if( pBiDi->fnClassCallback == NULL || 2233 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) 2234 { 2235 return ubidi_getClass(pBiDi->bdp, c); 2236 } else { 2237 return dir; 2238 } 2239 } 2240 2241