1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidi.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999jul27 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #include "cmemory.h" 18 #include "unicode/utypes.h" 19 #include "unicode/ustring.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ubidi.h" 22 #include "ubidi_props.h" 23 #include "ubidiimp.h" 24 #include "uassert.h" 25 26 /* 27 * General implementation notes: 28 * 29 * Throughout the implementation, there are comments like (W2) that refer to 30 * rules of the BiDi algorithm in its version 5, in this example to the second 31 * rule of the resolution of weak types. 32 * 33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) 34 * character according to UTF-16, the second UChar gets the directional property of 35 * the entire character assigned, while the first one gets a BN, a boundary 36 * neutral, type, which is ignored by most of the algorithm according to 37 * rule (X9) and the implementation suggestions of the BiDi algorithm. 38 * 39 * Later, adjustWSLevels() will set the level for each BN to that of the 40 * following character (UChar), which results in surrogate pairs getting the 41 * same level on each of their surrogates. 42 * 43 * In a UTF-8 implementation, the same thing could be done: the last byte of 44 * a multi-byte sequence would get the "real" property, while all previous 45 * bytes of that sequence would get BN. 46 * 47 * It is not possible to assign all those parts of a character the same real 48 * property because this would fail in the resolution of weak types with rules 49 * that look at immediately surrounding types. 50 * 51 * As a related topic, this implementation does not remove Boundary Neutral 52 * types from the input, but ignores them wherever this is relevant. 53 * For example, the loop for the resolution of the weak types reads 54 * types until it finds a non-BN. 55 * Also, explicit embedding codes are neither changed into BN nor removed. 56 * They are only treated the same way real BNs are. 57 * As stated before, adjustWSLevels() takes care of them at the end. 58 * For the purpose of conformance, the levels of all these codes 59 * do not matter. 60 * 61 * Note that this implementation never modifies the dirProps 62 * after the initial setup. 63 * 64 * 65 * In this implementation, the resolution of weak types (Wn), 66 * neutrals (Nn), and the assignment of the resolved level (In) 67 * are all done in one single loop, in resolveImplicitLevels(). 68 * Changes of dirProp values are done on the fly, without writing 69 * them back to the dirProps array. 70 * 71 * 72 * This implementation contains code that allows to bypass steps of the 73 * algorithm that are not needed on the specific paragraph 74 * in order to speed up the most common cases considerably, 75 * like text that is entirely LTR, or RTL text without numbers. 76 * 77 * Most of this is done by setting a bit for each directional property 78 * in a flags variable and later checking for whether there are 79 * any LTR characters or any RTL characters, or both, whether 80 * there are any explicit embedding codes, etc. 81 * 82 * If the (Xn) steps are performed, then the flags are re-evaluated, 83 * because they will then not contain the embedding codes any more 84 * and will be adjusted for override codes, so that subsequently 85 * more bypassing may be possible than what the initial flags suggested. 86 * 87 * If the text is not mixed-directional, then the 88 * algorithm steps for the weak type resolution are not performed, 89 * and all levels are set to the paragraph level. 90 * 91 * If there are no explicit embedding codes, then the (Xn) steps 92 * are not performed. 93 * 94 * If embedding levels are supplied as a parameter, then all 95 * explicit embedding codes are ignored, and the (Xn) steps 96 * are not performed. 97 * 98 * White Space types could get the level of the run they belong to, 99 * and are checked with a test of (flags&MASK_EMBEDDING) to 100 * consider if the paragraph direction should be considered in 101 * the flags variable. 102 * 103 * If there are no White Space types in the paragraph, then 104 * (L1) is not necessary in adjustWSLevels(). 105 */ 106 107 /* to avoid some conditional statements, use tiny constant arrays */ 108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; 109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; 110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; 111 112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] 113 #define DIRPROP_FLAG_E(level) flagE[(level)&1] 114 #define DIRPROP_FLAG_O(level) flagO[(level)&1] 115 116 /* UBiDi object management -------------------------------------------------- */ 117 118 U_CAPI UBiDi * U_EXPORT2 119 ubidi_open(void) 120 { 121 UErrorCode errorCode=U_ZERO_ERROR; 122 return ubidi_openSized(0, 0, &errorCode); 123 } 124 125 U_CAPI UBiDi * U_EXPORT2 126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { 127 UBiDi *pBiDi; 128 129 /* check the argument values */ 130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 131 return NULL; 132 } else if(maxLength<0 || maxRunCount<0) { 133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 134 return NULL; /* invalid arguments */ 135 } 136 137 /* allocate memory for the object */ 138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); 139 if(pBiDi==NULL) { 140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 141 return NULL; 142 } 143 144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ 145 uprv_memset(pBiDi, 0, sizeof(UBiDi)); 146 147 /* get BiDi properties */ 148 pBiDi->bdp=ubidi_getSingleton(pErrorCode); 149 if(U_FAILURE(*pErrorCode)) { 150 uprv_free(pBiDi); 151 return NULL; 152 } 153 154 /* allocate memory for arrays as requested */ 155 if(maxLength>0) { 156 if( !getInitialDirPropsMemory(pBiDi, maxLength) || 157 !getInitialLevelsMemory(pBiDi, maxLength) 158 ) { 159 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 160 } 161 } else { 162 pBiDi->mayAllocateText=TRUE; 163 } 164 165 if(maxRunCount>0) { 166 if(maxRunCount==1) { 167 /* use simpleRuns[] */ 168 pBiDi->runsSize=sizeof(Run); 169 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { 170 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 171 } 172 } else { 173 pBiDi->mayAllocateRuns=TRUE; 174 } 175 176 if(U_SUCCESS(*pErrorCode)) { 177 return pBiDi; 178 } else { 179 ubidi_close(pBiDi); 180 return NULL; 181 } 182 } 183 184 /* 185 * We are allowed to allocate memory if memory==NULL or 186 * mayAllocate==TRUE for each array that we need. 187 * We also try to grow memory as needed if we 188 * allocate it. 189 * 190 * Assume sizeNeeded>0. 191 * If *pMemory!=NULL, then assume *pSize>0. 192 * 193 * ### this realloc() may unnecessarily copy the old data, 194 * which we know we don't need any more; 195 * is this the best way to do this?? 196 */ 197 U_CFUNC UBool 198 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { 199 void **pMemory = (void **)bidiMem; 200 /* check for existing memory */ 201 if(*pMemory==NULL) { 202 /* we need to allocate memory */ 203 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { 204 *pSize=sizeNeeded; 205 return TRUE; 206 } else { 207 return FALSE; 208 } 209 } else { 210 if(sizeNeeded<=*pSize) { 211 /* there is already enough memory */ 212 return TRUE; 213 } 214 else if(!mayAllocate) { 215 /* not enough memory, and we must not allocate */ 216 return FALSE; 217 } else { 218 /* we try to grow */ 219 void *memory; 220 /* in most cases, we do not need the copy-old-data part of 221 * realloc, but it is needed when adding runs using getRunsMemory() 222 * in setParaRunsOnly() 223 */ 224 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { 225 *pMemory=memory; 226 *pSize=sizeNeeded; 227 return TRUE; 228 } else { 229 /* we failed to grow */ 230 return FALSE; 231 } 232 } 233 } 234 } 235 236 U_CAPI void U_EXPORT2 237 ubidi_close(UBiDi *pBiDi) { 238 if(pBiDi!=NULL) { 239 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ 240 if(pBiDi->dirPropsMemory!=NULL) { 241 uprv_free(pBiDi->dirPropsMemory); 242 } 243 if(pBiDi->levelsMemory!=NULL) { 244 uprv_free(pBiDi->levelsMemory); 245 } 246 if(pBiDi->runsMemory!=NULL) { 247 uprv_free(pBiDi->runsMemory); 248 } 249 if(pBiDi->parasMemory!=NULL) { 250 uprv_free(pBiDi->parasMemory); 251 } 252 if(pBiDi->insertPoints.points!=NULL) { 253 uprv_free(pBiDi->insertPoints.points); 254 } 255 256 uprv_free(pBiDi); 257 } 258 } 259 260 /* set to approximate "inverse BiDi" ---------------------------------------- */ 261 262 U_CAPI void U_EXPORT2 263 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { 264 if(pBiDi!=NULL) { 265 pBiDi->isInverse=isInverse; 266 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L 267 : UBIDI_REORDER_DEFAULT; 268 } 269 } 270 271 U_CAPI UBool U_EXPORT2 272 ubidi_isInverse(UBiDi *pBiDi) { 273 if(pBiDi!=NULL) { 274 return pBiDi->isInverse; 275 } else { 276 return FALSE; 277 } 278 } 279 280 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 281 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre 282 * concept of RUNS_ONLY which is a double operation. 283 * It could be advantageous to divide this into 3 concepts: 284 * a) Operation: direct / inverse / RUNS_ONLY 285 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R 286 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 287 * This would allow combinations not possible today like RUNS_ONLY with 288 * NUMBERS_SPECIAL. 289 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 290 * REMOVE_CONTROLS for the inverse step. 291 * Not all combinations would be supported, and probably not all do make sense. 292 * This would need to document which ones are supported and what are the 293 * fallbacks for unsupported combinations. 294 */ 295 U_CAPI void U_EXPORT2 296 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { 297 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) 298 && (reorderingMode < UBIDI_REORDER_COUNT)) { 299 pBiDi->reorderingMode = reorderingMode; 300 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); 301 } 302 } 303 304 U_CAPI UBiDiReorderingMode U_EXPORT2 305 ubidi_getReorderingMode(UBiDi *pBiDi) { 306 if (pBiDi!=NULL) { 307 return pBiDi->reorderingMode; 308 } else { 309 return UBIDI_REORDER_DEFAULT; 310 } 311 } 312 313 U_CAPI void U_EXPORT2 314 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { 315 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 316 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 317 } 318 if (pBiDi!=NULL) { 319 pBiDi->reorderingOptions=reorderingOptions; 320 } 321 } 322 323 U_CAPI uint32_t U_EXPORT2 324 ubidi_getReorderingOptions(UBiDi *pBiDi) { 325 if (pBiDi!=NULL) { 326 return pBiDi->reorderingOptions; 327 } else { 328 return 0; 329 } 330 } 331 332 /* perform (P2)..(P3) ------------------------------------------------------- */ 333 334 /* 335 * Get the directional properties for the text, 336 * calculate the flags bit-set, and 337 * determine the paragraph level if necessary. 338 */ 339 static void 340 getDirProps(UBiDi *pBiDi) { 341 const UChar *text=pBiDi->text; 342 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ 343 344 int32_t i=0, i0, i1, length=pBiDi->originalLength; 345 Flags flags=0; /* collect all directionalities in the text */ 346 UChar32 uchar; 347 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */ 348 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); 349 /* for inverse BiDi, the default para level is set to RTL if there is a 350 strong R or AL character at either end of the text */ 351 UBool isDefaultLevelInverse=isDefaultLevel && (UBool) 352 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 353 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); 354 int32_t lastArabicPos=-1; 355 int32_t controlCount=0; 356 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & 357 UBIDI_OPTION_REMOVE_CONTROLS); 358 359 typedef enum { 360 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ 361 LOOKING_FOR_STRONG, /* 1: looking for first strong char */ 362 FOUND_STRONG_CHAR /* 2: found first strong char */ 363 } State; 364 State state; 365 int32_t paraStart=0; /* index of first char in paragraph */ 366 DirProp paraDir; /* == CONTEXT_RTL within paragraphs 367 starting with strong R char */ 368 DirProp lastStrongDir=0; /* for default level & inverse BiDi */ 369 int32_t lastStrongLTR=0; /* for STREAMING option */ 370 371 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 372 pBiDi->length=0; 373 lastStrongLTR=0; 374 } 375 if(isDefaultLevel) { 376 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; 377 paraDir=paraDirDefault; 378 lastStrongDir=paraDirDefault; 379 state=LOOKING_FOR_STRONG; 380 } else { 381 state=NOT_CONTEXTUAL; 382 paraDir=0; 383 } 384 /* count paragraphs and determine the paragraph level (P2..P3) */ 385 /* 386 * see comment in ubidi.h: 387 * the DEFAULT_XXX values are designed so that 388 * their bit 0 alone yields the intended default 389 */ 390 for( /* i=0 above */ ; i<length; ) { 391 /* i is incremented by UTF_NEXT_CHAR */ 392 i0=i; /* index of first code unit */ 393 UTF_NEXT_CHAR(text, i, length, uchar); 394 i1=i-1; /* index of last code unit, gets the directional property */ 395 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); 396 dirProps[i1]=dirProp|paraDir; 397 if(i1>i0) { /* set previous code units' properties to BN */ 398 flags|=DIRPROP_FLAG(BN); 399 do { 400 dirProps[--i1]=(DirProp)(BN|paraDir); 401 } while(i1>i0); 402 } 403 if(state==LOOKING_FOR_STRONG) { 404 if(dirProp==L) { 405 state=FOUND_STRONG_CHAR; 406 if(paraDir) { 407 paraDir=0; 408 for(i1=paraStart; i1<i; i1++) { 409 dirProps[i1]&=~CONTEXT_RTL; 410 } 411 } 412 continue; 413 } 414 if(dirProp==R || dirProp==AL) { 415 state=FOUND_STRONG_CHAR; 416 if(paraDir==0) { 417 paraDir=CONTEXT_RTL; 418 for(i1=paraStart; i1<i; i1++) { 419 dirProps[i1]|=CONTEXT_RTL; 420 } 421 } 422 continue; 423 } 424 } 425 if(dirProp==L) { 426 lastStrongDir=0; 427 lastStrongLTR=i; /* i is index to next character */ 428 } 429 else if(dirProp==R) { 430 lastStrongDir=CONTEXT_RTL; 431 } 432 else if(dirProp==AL) { 433 lastStrongDir=CONTEXT_RTL; 434 lastArabicPos=i-1; 435 } 436 else if(dirProp==B) { 437 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 438 pBiDi->length=i; /* i is index to next character */ 439 } 440 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { 441 for( ; paraStart<i; paraStart++) { 442 dirProps[paraStart]|=CONTEXT_RTL; 443 } 444 } 445 if(i<length) { /* B not last char in text */ 446 if(!((uchar==CR) && (text[i]==LF))) { 447 pBiDi->paraCount++; 448 } 449 if(isDefaultLevel) { 450 state=LOOKING_FOR_STRONG; 451 paraStart=i; /* i is index to next character */ 452 paraDir=paraDirDefault; 453 lastStrongDir=paraDirDefault; 454 } 455 } 456 } 457 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) { 458 controlCount++; 459 } 460 } 461 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { 462 for(i1=paraStart; i1<length; i1++) { 463 dirProps[i1]|=CONTEXT_RTL; 464 } 465 } 466 if(isDefaultLevel) { 467 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0); 468 } 469 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 470 if((lastStrongLTR>pBiDi->length) && 471 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) { 472 pBiDi->length = lastStrongLTR; 473 } 474 if(pBiDi->length<pBiDi->originalLength) { 475 pBiDi->paraCount--; 476 } 477 } 478 /* The following line does nothing new for contextual paraLevel, but is 479 needed for absolute paraLevel. */ 480 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 481 482 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 483 flags|=DIRPROP_FLAG(L); 484 } 485 486 pBiDi->controlCount = controlCount; 487 pBiDi->flags=flags; 488 pBiDi->lastArabicPos=lastArabicPos; 489 } 490 491 /* perform (X1)..(X9) ------------------------------------------------------- */ 492 493 /* determine if the text is mixed-directional or single-directional */ 494 static UBiDiDirection 495 directionFromFlags(UBiDi *pBiDi) { 496 Flags flags=pBiDi->flags; 497 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 498 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { 499 return UBIDI_LTR; 500 } else if(!(flags&MASK_LTR)) { 501 return UBIDI_RTL; 502 } else { 503 return UBIDI_MIXED; 504 } 505 } 506 507 /* 508 * Resolve the explicit levels as specified by explicit embedding codes. 509 * Recalculate the flags to have them reflect the real properties 510 * after taking the explicit embeddings into account. 511 * 512 * The BiDi algorithm is designed to result in the same behavior whether embedding 513 * levels are externally specified (from "styled text", supposedly the preferred 514 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. 515 * That is why (X9) instructs to remove all explicit codes (and BN). 516 * However, in a real implementation, this removal of these codes and their index 517 * positions in the plain text is undesirable since it would result in 518 * reallocated, reindexed text. 519 * Instead, this implementation leaves the codes in there and just ignores them 520 * in the subsequent processing. 521 * In order to get the same reordering behavior, positions with a BN or an 522 * explicit embedding code just get the same level assigned as the last "real" 523 * character. 524 * 525 * Some implementations, not this one, then overwrite some of these 526 * directionality properties at "real" same-level-run boundaries by 527 * L or R codes so that the resolution of weak types can be performed on the 528 * entire paragraph at once instead of having to parse it once more and 529 * perform that resolution on same-level-runs. 530 * This limits the scope of the implicit rules in effectively 531 * the same way as the run limits. 532 * 533 * Instead, this implementation does not modify these codes. 534 * On one hand, the paragraph has to be scanned for same-level-runs, but 535 * on the other hand, this saves another loop to reset these codes, 536 * or saves making and modifying a copy of dirProps[]. 537 * 538 * 539 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. 540 * 541 * 542 * Handling the stack of explicit levels (Xn): 543 * 544 * With the BiDi stack of explicit levels, 545 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, 546 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. 547 * 548 * In order to have a correct push-pop semantics even in the case of overflows, 549 * there are two overflow counters: 550 * - countOver60 is incremented with each LRx at level 60 551 * - from level 60, one RLx increases the level to 61 552 * - countOver61 is incremented with each LRx and RLx at level 61 553 * 554 * Popping levels with PDF must work in the opposite order so that level 61 555 * is correct at the correct point. Underflows (too many PDFs) must be checked. 556 * 557 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 558 */ 559 static UBiDiDirection 560 resolveExplicitLevels(UBiDi *pBiDi) { 561 const DirProp *dirProps=pBiDi->dirProps; 562 UBiDiLevel *levels=pBiDi->levels; 563 const UChar *text=pBiDi->text; 564 565 int32_t i=0, length=pBiDi->length; 566 Flags flags=pBiDi->flags; /* collect all directionalities in the text */ 567 DirProp dirProp; 568 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); 569 570 UBiDiDirection direction; 571 int32_t paraIndex=0; 572 573 /* determine if the text is mixed-directional or single-directional */ 574 direction=directionFromFlags(pBiDi); 575 576 /* we may not need to resolve any explicit levels, but for multiple 577 paragraphs we want to loop on all chars to set the para boundaries */ 578 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) { 579 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 580 } else if((pBiDi->paraCount==1) && 581 (!(flags&MASK_EXPLICIT) || 582 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) { 583 /* mixed, but all characters are at the same embedding level */ 584 /* or we are in "inverse BiDi" */ 585 /* and we don't have contextual multiple paragraphs with some B char */ 586 /* set all levels to the paragraph level */ 587 for(i=0; i<length; ++i) { 588 levels[i]=level; 589 } 590 } else { 591 /* continue to perform (Xn) */ 592 593 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 594 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ 595 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0; 596 597 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */ 598 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ 599 600 /* recalculate the flags */ 601 flags=0; 602 603 for(i=0; i<length; ++i) { 604 dirProp=NO_CONTEXT_RTL(dirProps[i]); 605 switch(dirProp) { 606 case LRE: 607 case LRO: 608 /* (X3, X5) */ 609 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 610 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { 611 stack[stackTop]=embeddingLevel; 612 ++stackTop; 613 embeddingLevel=newLevel; 614 if(dirProp==LRO) { 615 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 616 } 617 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE 618 since this has already been done for newLevel which is 619 the source for embeddingLevel. 620 */ 621 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) { 622 ++countOver61; 623 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ { 624 ++countOver60; 625 } 626 flags|=DIRPROP_FLAG(BN); 627 break; 628 case RLE: 629 case RLO: 630 /* (X2, X4) */ 631 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 632 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { 633 stack[stackTop]=embeddingLevel; 634 ++stackTop; 635 embeddingLevel=newLevel; 636 if(dirProp==RLO) { 637 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 638 } 639 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE 640 since this has already been done for newLevel which is 641 the source for embeddingLevel. 642 */ 643 } else { 644 ++countOver61; 645 } 646 flags|=DIRPROP_FLAG(BN); 647 break; 648 case PDF: 649 /* (X7) */ 650 /* handle all the overflow cases first */ 651 if(countOver61>0) { 652 --countOver61; 653 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) { 654 /* handle LRx overflows from level 60 */ 655 --countOver60; 656 } else if(stackTop>0) { 657 /* this is the pop operation; it also pops level 61 while countOver60>0 */ 658 --stackTop; 659 embeddingLevel=stack[stackTop]; 660 /* } else { (underflow) */ 661 } 662 flags|=DIRPROP_FLAG(BN); 663 break; 664 case B: 665 stackTop=0; 666 countOver60=countOver61=0; 667 level=GET_PARALEVEL(pBiDi, i); 668 if((i+1)<length) { 669 embeddingLevel=GET_PARALEVEL(pBiDi, i+1); 670 if(!((text[i]==CR) && (text[i+1]==LF))) { 671 pBiDi->paras[paraIndex++]=i+1; 672 } 673 } 674 flags|=DIRPROP_FLAG(B); 675 break; 676 case BN: 677 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 678 /* they will get their levels set correctly in adjustWSLevels() */ 679 flags|=DIRPROP_FLAG(BN); 680 break; 681 default: 682 /* all other types get the "real" level */ 683 if(level!=embeddingLevel) { 684 level=embeddingLevel; 685 if(level&UBIDI_LEVEL_OVERRIDE) { 686 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; 687 } else { 688 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; 689 } 690 } 691 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 692 flags|=DIRPROP_FLAG(dirProp); 693 } 694 break; 695 } 696 697 /* 698 * We need to set reasonable levels even on BN codes and 699 * explicit codes because we will later look at same-level runs (X10). 700 */ 701 levels[i]=level; 702 } 703 if(flags&MASK_EMBEDDING) { 704 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 705 } 706 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 707 flags|=DIRPROP_FLAG(L); 708 } 709 710 /* subsequently, ignore the explicit codes and BN (X9) */ 711 712 /* again, determine if the text is mixed-directional or single-directional */ 713 pBiDi->flags=flags; 714 direction=directionFromFlags(pBiDi); 715 } 716 717 return direction; 718 } 719 720 /* 721 * Use a pre-specified embedding levels array: 722 * 723 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 724 * ignore all explicit codes (X9), 725 * and check all the preset levels. 726 * 727 * Recalculate the flags to have them reflect the real properties 728 * after taking the explicit embeddings into account. 729 */ 730 static UBiDiDirection 731 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 732 const DirProp *dirProps=pBiDi->dirProps; 733 DirProp dirProp; 734 UBiDiLevel *levels=pBiDi->levels; 735 const UChar *text=pBiDi->text; 736 737 int32_t i, length=pBiDi->length; 738 Flags flags=0; /* collect all directionalities in the text */ 739 UBiDiLevel level; 740 uint32_t paraIndex=0; 741 742 for(i=0; i<length; ++i) { 743 level=levels[i]; 744 dirProp=NO_CONTEXT_RTL(dirProps[i]); 745 if(level&UBIDI_LEVEL_OVERRIDE) { 746 /* keep the override flag in levels[i] but adjust the flags */ 747 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ 748 flags|=DIRPROP_FLAG_O(level); 749 } else { 750 /* set the flags */ 751 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); 752 } 753 if((level<GET_PARALEVEL(pBiDi, i) && 754 !((0==level)&&(dirProp==B))) || 755 (UBIDI_MAX_EXPLICIT_LEVEL<level)) { 756 /* level out of bounds */ 757 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 758 return UBIDI_LTR; 759 } 760 if((dirProp==B) && ((i+1)<length)) { 761 if(!((text[i]==CR) && (text[i+1]==LF))) { 762 pBiDi->paras[paraIndex++]=i+1; 763 } 764 } 765 } 766 if(flags&MASK_EMBEDDING) { 767 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 768 } 769 770 /* determine if the text is mixed-directional or single-directional */ 771 pBiDi->flags=flags; 772 return directionFromFlags(pBiDi); 773 } 774 775 /****************************************************************** 776 The Properties state machine table 777 ******************************************************************* 778 779 All table cells are 8 bits: 780 bits 0..4: next state 781 bits 5..7: action to perform (if > 0) 782 783 Cells may be of format "n" where n represents the next state 784 (except for the rightmost column). 785 Cells may also be of format "s(x,y)" where x represents an action 786 to perform and y represents the next state. 787 788 ******************************************************************* 789 Definitions and type for properties state table 790 ******************************************************************* 791 */ 792 #define IMPTABPROPS_COLUMNS 14 793 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) 794 #define GET_STATEPROPS(cell) ((cell)&0x1f) 795 #define GET_ACTIONPROPS(cell) ((cell)>>5) 796 #define s(action, newState) ((uint8_t)(newState+(action<<5))) 797 798 static const uint8_t groupProp[] = /* dirProp regrouped */ 799 { 800 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ 801 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 802 }; 803 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ 804 805 /****************************************************************** 806 807 PROPERTIES STATE TABLE 808 809 In table impTabProps, 810 - the ON column regroups ON and WS 811 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF 812 - the Res column is the reduced property assigned to a run 813 814 Action 1: process current run1, init new run1 815 2: init new run2 816 3: process run1, process run2, init new run1 817 4: process run1, set run1=run2, init new run2 818 819 Notes: 820 1) This table is used in resolveImplicitLevels(). 821 2) This table triggers actions when there is a change in the Bidi 822 property of incoming characters (action 1). 823 3) Most such property sequences are processed immediately (in 824 fact, passed to processPropertySeq(). 825 4) However, numbers are assembled as one sequence. This means 826 that undefined situations (like CS following digits, until 827 it is known if the next char will be a digit) are held until 828 following chars define them. 829 Example: digits followed by CS, then comes another CS or ON; 830 the digits will be processed, then the CS assigned 831 as the start of an ON sequence (action 3). 832 5) There are cases where more than one sequence must be 833 processed, for instance digits followed by CS followed by L: 834 the digits must be processed as one sequence, and the CS 835 must be processed as an ON sequence, all this before starting 836 assembling chars for the opening L sequence. 837 838 839 */ 840 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = 841 { 842 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */ 843 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON }, 844 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L }, 845 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R }, 846 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R }, 847 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN }, 848 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN }, 849 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN }, 850 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON }, 851 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON }, 852 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON }, 853 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN }, 854 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN }, 855 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN }, 856 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN }, 857 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON }, 858 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S }, 859 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S }, 860 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B } 861 }; 862 863 /* we must undef macro s because the levels table have a different 864 * structure (4 bits for action and 4 bits for next state. 865 */ 866 #undef s 867 868 /****************************************************************** 869 The levels state machine tables 870 ******************************************************************* 871 872 All table cells are 8 bits: 873 bits 0..3: next state 874 bits 4..7: action to perform (if > 0) 875 876 Cells may be of format "n" where n represents the next state 877 (except for the rightmost column). 878 Cells may also be of format "s(x,y)" where x represents an action 879 to perform and y represents the next state. 880 881 This format limits each table to 16 states each and to 15 actions. 882 883 ******************************************************************* 884 Definitions and type for levels state tables 885 ******************************************************************* 886 */ 887 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) 888 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) 889 #define GET_STATE(cell) ((cell)&0x0f) 890 #define GET_ACTION(cell) ((cell)>>4) 891 #define s(action, newState) ((uint8_t)(newState+(action<<4))) 892 893 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; 894 typedef uint8_t ImpAct[]; 895 896 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, 897 * instead of having a pair of ImpTab and a pair of ImpAct. 898 */ 899 typedef struct ImpTabPair { 900 const void * pImpTab[2]; 901 const void * pImpAct[2]; 902 } ImpTabPair; 903 904 /****************************************************************** 905 906 LEVELS STATE TABLES 907 908 In all levels state tables, 909 - state 0 is the initial state 910 - the Res column is the increment to add to the text level 911 for this property sequence. 912 913 The impAct arrays for each table of a pair map the local action 914 numbers of the table to the total list of actions. For instance, 915 action 2 in a given table corresponds to the action number which 916 appears in entry [2] of the impAct array for that table. 917 The first entry of all impAct arrays must be 0. 918 919 Action 1: init conditional sequence 920 2: prepend conditional sequence to current sequence 921 3: set ON sequence to new level - 1 922 4: init EN/AN/ON sequence 923 5: fix EN/AN/ON sequence followed by R 924 6: set previous level sequence to level 2 925 926 Notes: 927 1) These tables are used in processPropertySeq(). The input 928 is property sequences as determined by resolveImplicitLevels. 929 2) Most such property sequences are processed immediately 930 (levels are assigned). 931 3) However, some sequences cannot be assigned a final level till 932 one or more following sequences are received. For instance, 933 ON following an R sequence within an even-level paragraph. 934 If the following sequence is R, the ON sequence will be 935 assigned basic run level+1, and so will the R sequence. 936 4) S is generally handled like ON, since its level will be fixed 937 to paragraph level in adjustWSLevels(). 938 939 */ 940 941 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ 942 /* In this table, conditional sequences receive the higher possible level 943 until proven otherwise. 944 */ 945 { 946 /* L , R , EN , AN , ON , S , B , Res */ 947 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, 948 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, 949 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, 950 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, 951 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, 952 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } 953 }; 954 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ 955 /* In this table, conditional sequences receive the lower possible level 956 until proven otherwise. 957 */ 958 { 959 /* L , R , EN , AN , ON , S , B , Res */ 960 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 961 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, 962 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 963 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, 964 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, 965 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } 966 }; 967 static const ImpAct impAct0 = {0,1,2,3,4,5,6}; 968 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, 969 &impTabR_DEFAULT}, 970 {&impAct0, &impAct0}}; 971 972 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ 973 /* In this table, conditional sequences receive the higher possible level 974 until proven otherwise. 975 */ 976 { 977 /* L , R , EN , AN , ON , S , B , Res */ 978 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, 979 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, 980 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, 981 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, 982 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } 983 }; 984 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, 985 &impTabR_DEFAULT}, 986 {&impAct0, &impAct0}}; 987 988 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = 989 /* In this table, EN/AN+ON sequences receive levels as if associated with R 990 until proven that there is L or sor/eor on both sides. AN is handled like EN. 991 */ 992 { 993 /* L , R , EN , AN , ON , S , B , Res */ 994 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, 995 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, 996 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, 997 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, 998 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, 999 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } 1000 }; 1001 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = 1002 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1003 until proven that there is L on both sides. AN is handled like EN. 1004 */ 1005 { 1006 /* L , R , EN , AN , ON , S , B , Res */ 1007 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1008 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1009 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, 1010 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, 1011 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } 1012 }; 1013 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { 1014 {&impTabL_GROUP_NUMBERS_WITH_R, 1015 &impTabR_GROUP_NUMBERS_WITH_R}, 1016 {&impAct0, &impAct0}}; 1017 1018 1019 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = 1020 /* This table is identical to the Default LTR table except that EN and AN are 1021 handled like L. 1022 */ 1023 { 1024 /* L , R , EN , AN , ON , S , B , Res */ 1025 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, 1026 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, 1027 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, 1028 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, 1029 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, 1030 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } 1031 }; 1032 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = 1033 /* This table is identical to the Default RTL table except that EN and AN are 1034 handled like L. 1035 */ 1036 { 1037 /* L , R , EN , AN , ON , S , B , Res */ 1038 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1039 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, 1040 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1041 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, 1042 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, 1043 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } 1044 }; 1045 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { 1046 {&impTabL_INVERSE_NUMBERS_AS_L, 1047 &impTabR_INVERSE_NUMBERS_AS_L}, 1048 {&impAct0, &impAct0}}; 1049 1050 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ 1051 /* In this table, conditional sequences receive the lower possible level 1052 until proven otherwise. 1053 */ 1054 { 1055 /* L , R , EN , AN , ON , S , B , Res */ 1056 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1057 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, 1058 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1059 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, 1060 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, 1061 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, 1062 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } 1063 }; 1064 static const ImpAct impAct1 = {0,1,11,12}; 1065 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" 1066 */ 1067 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { 1068 {&impTabL_DEFAULT, 1069 &impTabR_INVERSE_LIKE_DIRECT}, 1070 {&impAct0, &impAct1}}; 1071 1072 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = 1073 /* The case handled in this table is (visually): R EN L 1074 */ 1075 { 1076 /* L , R , EN , AN , ON , S , B , Res */ 1077 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, 1078 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, 1079 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, 1080 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, 1081 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, 1082 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, 1083 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } 1084 }; 1085 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = 1086 /* The cases handled in this table are (visually): R EN L 1087 R L AN L 1088 */ 1089 { 1090 /* L , R , EN , AN , ON , S , B , Res */ 1091 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1092 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, 1093 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, 1094 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, 1095 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, 1096 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, 1097 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } 1098 }; 1099 static const ImpAct impAct2 = {0,1,7,8,9,10}; 1100 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { 1101 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1102 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1103 {&impAct0, &impAct2}}; 1104 1105 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { 1106 {&impTabL_NUMBERS_SPECIAL, 1107 &impTabR_INVERSE_LIKE_DIRECT}, 1108 {&impAct0, &impAct1}}; 1109 1110 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = 1111 /* The case handled in this table is (visually): R EN L 1112 */ 1113 { 1114 /* L , R , EN , AN , ON , S , B , Res */ 1115 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, 1116 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, 1117 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, 1118 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, 1119 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } 1120 }; 1121 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { 1122 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1123 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1124 {&impAct0, &impAct2}}; 1125 1126 #undef s 1127 1128 typedef struct { 1129 const ImpTab * pImpTab; /* level table pointer */ 1130 const ImpAct * pImpAct; /* action map array */ 1131 int32_t startON; /* start of ON sequence */ 1132 int32_t startL2EN; /* start of level 2 sequence */ 1133 int32_t lastStrongRTL; /* index of last found R or AL */ 1134 int32_t state; /* current state */ 1135 UBiDiLevel runLevel; /* run level before implicit solving */ 1136 } LevState; 1137 1138 /*------------------------------------------------------------------------*/ 1139 1140 static void 1141 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) 1142 /* param pos: position where to insert 1143 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1144 */ 1145 { 1146 #define FIRSTALLOC 10 1147 Point point; 1148 InsertPoints * pInsertPoints=&(pBiDi->insertPoints); 1149 1150 if (pInsertPoints->capacity == 0) 1151 { 1152 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); 1153 if (pInsertPoints->points == NULL) 1154 { 1155 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1156 return; 1157 } 1158 pInsertPoints->capacity=FIRSTALLOC; 1159 } 1160 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ 1161 { 1162 void * savePoints=pInsertPoints->points; 1163 pInsertPoints->points=uprv_realloc(pInsertPoints->points, 1164 pInsertPoints->capacity*2*sizeof(Point)); 1165 if (pInsertPoints->points == NULL) 1166 { 1167 pInsertPoints->points=savePoints; 1168 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1169 return; 1170 } 1171 else pInsertPoints->capacity*=2; 1172 } 1173 point.pos=pos; 1174 point.flag=flag; 1175 pInsertPoints->points[pInsertPoints->size]=point; 1176 pInsertPoints->size++; 1177 #undef FIRSTALLOC 1178 } 1179 1180 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1181 1182 /* 1183 * This implementation of the (Wn) rules applies all rules in one pass. 1184 * In order to do so, it needs a look-ahead of typically 1 character 1185 * (except for W5: sequences of ET) and keeps track of changes 1186 * in a rule Wp that affect a later Wq (p<q). 1187 * 1188 * The (Nn) and (In) rules are also performed in that same single loop, 1189 * but effectively one iteration behind for white space. 1190 * 1191 * Since all implicit rules are performed in one step, it is not necessary 1192 * to actually store the intermediate directional properties in dirProps[]. 1193 */ 1194 1195 static void 1196 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, 1197 int32_t start, int32_t limit) { 1198 uint8_t cell, oldStateSeq, actionSeq; 1199 const ImpTab * pImpTab=pLevState->pImpTab; 1200 const ImpAct * pImpAct=pLevState->pImpAct; 1201 UBiDiLevel * levels=pBiDi->levels; 1202 UBiDiLevel level, addLevel; 1203 InsertPoints * pInsertPoints; 1204 int32_t start0, k; 1205 1206 start0=start; /* save original start position */ 1207 oldStateSeq=(uint8_t)pLevState->state; 1208 cell=(*pImpTab)[oldStateSeq][_prop]; 1209 pLevState->state=GET_STATE(cell); /* isolate the new state */ 1210 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ 1211 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; 1212 1213 if(actionSeq) { 1214 switch(actionSeq) { 1215 case 1: /* init ON seq */ 1216 pLevState->startON=start0; 1217 break; 1218 1219 case 2: /* prepend ON seq to current seq */ 1220 start=pLevState->startON; 1221 break; 1222 1223 case 3: /* L or S after possible relevant EN/AN */ 1224 /* check if we had EN after R/AL */ 1225 if (pLevState->startL2EN >= 0) { 1226 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1227 } 1228 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ 1229 /* check if we had any relevant EN/AN after R/AL */ 1230 pInsertPoints=&(pBiDi->insertPoints); 1231 if ((pInsertPoints->capacity == 0) || 1232 (pInsertPoints->size <= pInsertPoints->confirmed)) 1233 { 1234 /* nothing, just clean up */ 1235 pLevState->lastStrongRTL=-1; 1236 /* check if we have a pending conditional segment */ 1237 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; 1238 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ 1239 start=pLevState->startON; /* reset to basic run level */ 1240 } 1241 if (_prop == DirProp_S) /* add LRM before S */ 1242 { 1243 addPoint(pBiDi, start0, LRM_BEFORE); 1244 pInsertPoints->confirmed=pInsertPoints->size; 1245 } 1246 break; 1247 } 1248 /* reset previous RTL cont to level for LTR text */ 1249 for (k=pLevState->lastStrongRTL+1; k<start0; k++) 1250 { 1251 /* reset odd level, leave runLevel+2 as is */ 1252 levels[k]=(levels[k] - 2) & ~1; 1253 } 1254 /* mark insert points as confirmed */ 1255 pInsertPoints->confirmed=pInsertPoints->size; 1256 pLevState->lastStrongRTL=-1; 1257 if (_prop == DirProp_S) /* add LRM before S */ 1258 { 1259 addPoint(pBiDi, start0, LRM_BEFORE); 1260 pInsertPoints->confirmed=pInsertPoints->size; 1261 } 1262 break; 1263 1264 case 4: /* R/AL after possible relevant EN/AN */ 1265 /* just clean up */ 1266 pInsertPoints=&(pBiDi->insertPoints); 1267 if (pInsertPoints->capacity > 0) 1268 /* remove all non confirmed insert points */ 1269 pInsertPoints->size=pInsertPoints->confirmed; 1270 pLevState->startON=-1; 1271 pLevState->startL2EN=-1; 1272 pLevState->lastStrongRTL=limit - 1; 1273 break; 1274 1275 case 5: /* EN/AN after R/AL + possible cont */ 1276 /* check for real AN */ 1277 if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) && 1278 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) 1279 { 1280 /* real AN */ 1281 if (pLevState->startL2EN == -1) /* if no relevant EN already found */ 1282 { 1283 /* just note the righmost digit as a strong RTL */ 1284 pLevState->lastStrongRTL=limit - 1; 1285 break; 1286 } 1287 if (pLevState->startL2EN >= 0) /* after EN, no AN */ 1288 { 1289 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1290 pLevState->startL2EN=-2; 1291 } 1292 /* note AN */ 1293 addPoint(pBiDi, start0, LRM_BEFORE); 1294 break; 1295 } 1296 /* if first EN/AN after R/AL */ 1297 if (pLevState->startL2EN == -1) { 1298 pLevState->startL2EN=start0; 1299 } 1300 break; 1301 1302 case 6: /* note location of latest R/AL */ 1303 pLevState->lastStrongRTL=limit - 1; 1304 pLevState->startON=-1; 1305 break; 1306 1307 case 7: /* L after R+ON/EN/AN */ 1308 /* include possible adjacent number on the left */ 1309 for (k=start0-1; k>=0 && !(levels[k]&1); k--); 1310 if(k>=0) { 1311 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ 1312 pInsertPoints=&(pBiDi->insertPoints); 1313 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ 1314 } 1315 pLevState->startON=start0; 1316 break; 1317 1318 case 8: /* AN after L */ 1319 /* AN numbers between L text on both sides may be trouble. */ 1320 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 1321 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ 1322 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ 1323 break; 1324 1325 case 9: /* R after L+ON/EN/AN */ 1326 /* false alert, infirm LRMs around previous AN */ 1327 pInsertPoints=&(pBiDi->insertPoints); 1328 pInsertPoints->size=pInsertPoints->confirmed; 1329 if (_prop == DirProp_S) /* add RLM before S */ 1330 { 1331 addPoint(pBiDi, start0, RLM_BEFORE); 1332 pInsertPoints->confirmed=pInsertPoints->size; 1333 } 1334 break; 1335 1336 case 10: /* L after L+ON/AN */ 1337 level=pLevState->runLevel + addLevel; 1338 for(k=pLevState->startON; k<start0; k++) { 1339 if (levels[k]<level) 1340 levels[k]=level; 1341 } 1342 pInsertPoints=&(pBiDi->insertPoints); 1343 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ 1344 pLevState->startON=start0; 1345 break; 1346 1347 case 11: /* L after L+ON+EN/AN/ON */ 1348 level=pLevState->runLevel; 1349 for(k=start0-1; k>=pLevState->startON; k--) { 1350 if(levels[k]==level+3) { 1351 while(levels[k]==level+3) { 1352 levels[k--]-=2; 1353 } 1354 while(levels[k]==level) { 1355 k--; 1356 } 1357 } 1358 if(levels[k]==level+2) { 1359 levels[k]=level; 1360 continue; 1361 } 1362 levels[k]=level+1; 1363 } 1364 break; 1365 1366 case 12: /* R after L+ON+EN/AN/ON */ 1367 level=pLevState->runLevel+1; 1368 for(k=start0-1; k>=pLevState->startON; k--) { 1369 if(levels[k]>level) { 1370 levels[k]-=2; 1371 } 1372 } 1373 break; 1374 1375 default: /* we should never get here */ 1376 U_ASSERT(FALSE); 1377 break; 1378 } 1379 } 1380 if((addLevel) || (start < start0)) { 1381 level=pLevState->runLevel + addLevel; 1382 for(k=start; k<limit; k++) { 1383 levels[k]=level; 1384 } 1385 } 1386 } 1387 1388 static void 1389 resolveImplicitLevels(UBiDi *pBiDi, 1390 int32_t start, int32_t limit, 1391 DirProp sor, DirProp eor) { 1392 const DirProp *dirProps=pBiDi->dirProps; 1393 1394 LevState levState; 1395 int32_t i, start1, start2; 1396 uint8_t oldStateImp, stateImp, actionImp; 1397 uint8_t gprop, resProp, cell; 1398 UBool inverseRTL; 1399 DirProp nextStrongProp=R; 1400 int32_t nextStrongPos=-1; 1401 1402 levState.startON = -1; /* silence gcc flow analysis */ 1403 1404 /* check for RTL inverse BiDi mode */ 1405 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to 1406 * loop on the text characters from end to start. 1407 * This would need a different properties state table (at least different 1408 * actions) and different levels state tables (maybe very similar to the 1409 * LTR corresponding ones. 1410 */ 1411 inverseRTL=(UBool) 1412 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && 1413 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 1414 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); 1415 /* initialize for levels state table */ 1416 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1417 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1418 levState.state=0; 1419 levState.runLevel=pBiDi->levels[start]; 1420 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; 1421 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; 1422 processPropertySeq(pBiDi, &levState, sor, start, start); 1423 /* initialize for property state table */ 1424 if(dirProps[start]==NSM) { 1425 stateImp = 1 + sor; 1426 } else { 1427 stateImp=0; 1428 } 1429 start1=start; 1430 start2=start; 1431 1432 for(i=start; i<=limit; i++) { 1433 if(i>=limit) { 1434 gprop=eor; 1435 } else { 1436 DirProp prop, prop1; 1437 prop=NO_CONTEXT_RTL(dirProps[i]); 1438 if(inverseRTL) { 1439 if(prop==AL) { 1440 /* AL before EN does not make it AN */ 1441 prop=R; 1442 } else if(prop==EN) { 1443 if(nextStrongPos<=i) { 1444 /* look for next strong char (L/R/AL) */ 1445 int32_t j; 1446 nextStrongProp=R; /* set default */ 1447 nextStrongPos=limit; 1448 for(j=i+1; j<limit; j++) { 1449 prop1=NO_CONTEXT_RTL(dirProps[j]); 1450 if(prop1==L || prop1==R || prop1==AL) { 1451 nextStrongProp=prop1; 1452 nextStrongPos=j; 1453 break; 1454 } 1455 } 1456 } 1457 if(nextStrongProp==AL) { 1458 prop=AN; 1459 } 1460 } 1461 } 1462 gprop=groupProp[prop]; 1463 } 1464 oldStateImp=stateImp; 1465 cell=impTabProps[oldStateImp][gprop]; 1466 stateImp=GET_STATEPROPS(cell); /* isolate the new state */ 1467 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ 1468 if((i==limit) && (actionImp==0)) { 1469 /* there is an unprocessed sequence if its property == eor */ 1470 actionImp=1; /* process the last sequence */ 1471 } 1472 if(actionImp) { 1473 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; 1474 switch(actionImp) { 1475 case 1: /* process current seq1, init new seq1 */ 1476 processPropertySeq(pBiDi, &levState, resProp, start1, i); 1477 start1=i; 1478 break; 1479 case 2: /* init new seq2 */ 1480 start2=i; 1481 break; 1482 case 3: /* process seq1, process seq2, init new seq1 */ 1483 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1484 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); 1485 start1=i; 1486 break; 1487 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 1488 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1489 start1=start2; 1490 start2=i; 1491 break; 1492 default: /* we should never get here */ 1493 U_ASSERT(FALSE); 1494 break; 1495 } 1496 } 1497 } 1498 /* flush possible pending sequence, e.g. ON */ 1499 processPropertySeq(pBiDi, &levState, eor, limit, limit); 1500 } 1501 1502 /* perform (L1) and (X9) ---------------------------------------------------- */ 1503 1504 /* 1505 * Reset the embedding levels for some non-graphic characters (L1). 1506 * This function also sets appropriate levels for BN, and 1507 * explicit embedding types that are supposed to have been removed 1508 * from the paragraph in (X9). 1509 */ 1510 static void 1511 adjustWSLevels(UBiDi *pBiDi) { 1512 const DirProp *dirProps=pBiDi->dirProps; 1513 UBiDiLevel *levels=pBiDi->levels; 1514 int32_t i; 1515 1516 if(pBiDi->flags&MASK_WS) { 1517 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; 1518 Flags flag; 1519 1520 i=pBiDi->trailingWSStart; 1521 while(i>0) { 1522 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 1523 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { 1524 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1525 levels[i]=0; 1526 } else { 1527 levels[i]=GET_PARALEVEL(pBiDi, i); 1528 } 1529 } 1530 1531 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 1532 /* here, i+1 is guaranteed to be <length */ 1533 while(i>0) { 1534 flag=DIRPROP_FLAG_NC(dirProps[--i]); 1535 if(flag&MASK_BN_EXPLICIT) { 1536 levels[i]=levels[i+1]; 1537 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1538 levels[i]=0; 1539 break; 1540 } else if(flag&MASK_B_S) { 1541 levels[i]=GET_PARALEVEL(pBiDi, i); 1542 break; 1543 } 1544 } 1545 } 1546 } 1547 } 1548 1549 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) 1550 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) 1551 static void 1552 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, 1553 UBiDiLevel paraLevel, UErrorCode *pErrorCode) { 1554 void *runsOnlyMemory; 1555 int32_t *visualMap; 1556 UChar *visualText; 1557 int32_t saveLength, saveTrailingWSStart; 1558 const UBiDiLevel *levels; 1559 UBiDiLevel *saveLevels; 1560 UBiDiDirection saveDirection; 1561 UBool saveMayAllocateText; 1562 Run *runs; 1563 int32_t visualLength, i, j, visualStart, logicalStart, 1564 runCount, runLength, addedRuns, insertRemove, 1565 start, limit, step, indexOddBit, logicalPos, 1566 index0, index1; 1567 uint32_t saveOptions; 1568 1569 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; 1570 if(length==0) { 1571 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1572 goto cleanup3; 1573 } 1574 /* obtain memory for mapping table and visual text */ 1575 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); 1576 if(runsOnlyMemory==NULL) { 1577 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1578 goto cleanup3; 1579 } 1580 visualMap=runsOnlyMemory; 1581 visualText=(UChar *)&visualMap[length]; 1582 saveLevels=(UBiDiLevel *)&visualText[length]; 1583 saveOptions=pBiDi->reorderingOptions; 1584 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { 1585 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 1586 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; 1587 } 1588 paraLevel&=1; /* accept only 0 or 1 */ 1589 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1590 if(U_FAILURE(*pErrorCode)) { 1591 goto cleanup3; 1592 } 1593 /* we cannot access directly pBiDi->levels since it is not yet set if 1594 * direction is not MIXED 1595 */ 1596 levels=ubidi_getLevels(pBiDi, pErrorCode); 1597 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); 1598 saveTrailingWSStart=pBiDi->trailingWSStart; 1599 saveLength=pBiDi->length; 1600 saveDirection=pBiDi->direction; 1601 1602 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use 1603 * the visual map and the dirProps array to drive the second call 1604 * to ubidi_setPara (but must make provision for possible removal of 1605 * BiDi controls. Alternatively, only use the dirProps array via 1606 * customized classifier callback. 1607 */ 1608 visualLength=ubidi_writeReordered(pBiDi, visualText, length, 1609 UBIDI_DO_MIRRORING, pErrorCode); 1610 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); 1611 if(U_FAILURE(*pErrorCode)) { 1612 goto cleanup2; 1613 } 1614 pBiDi->reorderingOptions=saveOptions; 1615 1616 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; 1617 paraLevel^=1; 1618 /* Because what we did with reorderingOptions, visualText may be shorter 1619 * than the original text. But we don't want the levels memory to be 1620 * reallocated shorter than the original length, since we need to restore 1621 * the levels as after the first call to ubidi_setpara() before returning. 1622 * We will force mayAllocateText to FALSE before the second call to 1623 * ubidi_setpara(), and will restore it afterwards. 1624 */ 1625 saveMayAllocateText=pBiDi->mayAllocateText; 1626 pBiDi->mayAllocateText=FALSE; 1627 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); 1628 pBiDi->mayAllocateText=saveMayAllocateText; 1629 ubidi_getRuns(pBiDi, pErrorCode); 1630 if(U_FAILURE(*pErrorCode)) { 1631 goto cleanup1; 1632 } 1633 /* check if some runs must be split, count how many splits */ 1634 addedRuns=0; 1635 runCount=pBiDi->runCount; 1636 runs=pBiDi->runs; 1637 visualStart=0; 1638 for(i=0; i<runCount; i++, visualStart+=runLength) { 1639 runLength=runs[i].visualLimit-visualStart; 1640 if(runLength<2) { 1641 continue; 1642 } 1643 logicalStart=GET_INDEX(runs[i].logicalStart); 1644 for(j=logicalStart+1; j<logicalStart+runLength; j++) { 1645 index0=visualMap[j]; 1646 index1=visualMap[j-1]; 1647 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1648 addedRuns++; 1649 } 1650 } 1651 } 1652 if(addedRuns) { 1653 if(getRunsMemory(pBiDi, runCount+addedRuns)) { 1654 if(runCount==1) { 1655 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ 1656 pBiDi->runsMemory[0]=runs[0]; 1657 } 1658 runs=pBiDi->runs=pBiDi->runsMemory; 1659 pBiDi->runCount+=addedRuns; 1660 } else { 1661 goto cleanup1; 1662 } 1663 } 1664 /* split runs which are not consecutive in source text */ 1665 for(i=runCount-1; i>=0; i--) { 1666 runLength= i==0 ? runs[0].visualLimit : 1667 runs[i].visualLimit-runs[i-1].visualLimit; 1668 logicalStart=runs[i].logicalStart; 1669 indexOddBit=GET_ODD_BIT(logicalStart); 1670 logicalStart=GET_INDEX(logicalStart); 1671 if(runLength<2) { 1672 if(addedRuns) { 1673 runs[i+addedRuns]=runs[i]; 1674 } 1675 logicalPos=visualMap[logicalStart]; 1676 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1677 saveLevels[logicalPos]^indexOddBit); 1678 continue; 1679 } 1680 if(indexOddBit) { 1681 start=logicalStart; 1682 limit=logicalStart+runLength-1; 1683 step=1; 1684 } else { 1685 start=logicalStart+runLength-1; 1686 limit=logicalStart; 1687 step=-1; 1688 } 1689 for(j=start; j!=limit; j+=step) { 1690 index0=visualMap[j]; 1691 index1=visualMap[j+step]; 1692 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1693 logicalPos=BIDI_MIN(visualMap[start], index0); 1694 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1695 saveLevels[logicalPos]^indexOddBit); 1696 runs[i+addedRuns].visualLimit=runs[i].visualLimit; 1697 runs[i].visualLimit-=BIDI_ABS(j-start)+1; 1698 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); 1699 runs[i+addedRuns].insertRemove=insertRemove; 1700 runs[i].insertRemove&=~insertRemove; 1701 start=j+step; 1702 addedRuns--; 1703 } 1704 } 1705 if(addedRuns) { 1706 runs[i+addedRuns]=runs[i]; 1707 } 1708 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); 1709 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1710 saveLevels[logicalPos]^indexOddBit); 1711 } 1712 1713 cleanup1: 1714 /* restore initial paraLevel */ 1715 pBiDi->paraLevel^=1; 1716 cleanup2: 1717 /* restore real text */ 1718 pBiDi->text=text; 1719 pBiDi->length=saveLength; 1720 pBiDi->originalLength=length; 1721 pBiDi->direction=saveDirection; 1722 /* the saved levels should never excess levelsSize, but we check anyway */ 1723 if(saveLength>pBiDi->levelsSize) { 1724 saveLength=pBiDi->levelsSize; 1725 } 1726 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); 1727 pBiDi->trailingWSStart=saveTrailingWSStart; 1728 /* free memory for mapping table and visual text */ 1729 uprv_free(runsOnlyMemory); 1730 if(pBiDi->runCount>1) { 1731 pBiDi->direction=UBIDI_MIXED; 1732 } 1733 cleanup3: 1734 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; 1735 } 1736 1737 /* ubidi_setPara ------------------------------------------------------------ */ 1738 1739 U_CAPI void U_EXPORT2 1740 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1741 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1742 UErrorCode *pErrorCode) { 1743 UBiDiDirection direction; 1744 1745 /* check the argument values */ 1746 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1747 if(pBiDi==NULL || text==NULL || length<-1 || 1748 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { 1749 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1750 return; 1751 } 1752 1753 if(length==-1) { 1754 length=u_strlen(text); 1755 } 1756 1757 /* special treatment for RUNS_ONLY mode */ 1758 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { 1759 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); 1760 return; 1761 } 1762 1763 /* initialize the UBiDi structure */ 1764 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ 1765 pBiDi->text=text; 1766 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; 1767 pBiDi->paraLevel=paraLevel; 1768 pBiDi->direction=UBIDI_LTR; 1769 pBiDi->paraCount=1; 1770 1771 pBiDi->dirProps=NULL; 1772 pBiDi->levels=NULL; 1773 pBiDi->runs=NULL; 1774 pBiDi->insertPoints.size=0; /* clean up from last call */ 1775 pBiDi->insertPoints.confirmed=0; /* clean up from last call */ 1776 1777 /* 1778 * Save the original paraLevel if contextual; otherwise, set to 0. 1779 */ 1780 if(IS_DEFAULT_LEVEL(paraLevel)) { 1781 pBiDi->defaultParaLevel=paraLevel; 1782 } else { 1783 pBiDi->defaultParaLevel=0; 1784 } 1785 1786 if(length==0) { 1787 /* 1788 * For an empty paragraph, create a UBiDi object with the paraLevel and 1789 * the flags and the direction set but without allocating zero-length arrays. 1790 * There is nothing more to do. 1791 */ 1792 if(IS_DEFAULT_LEVEL(paraLevel)) { 1793 pBiDi->paraLevel&=1; 1794 pBiDi->defaultParaLevel=0; 1795 } 1796 if(paraLevel&1) { 1797 pBiDi->flags=DIRPROP_FLAG(R); 1798 pBiDi->direction=UBIDI_RTL; 1799 } else { 1800 pBiDi->flags=DIRPROP_FLAG(L); 1801 pBiDi->direction=UBIDI_LTR; 1802 } 1803 1804 pBiDi->runCount=0; 1805 pBiDi->paraCount=0; 1806 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 1807 return; 1808 } 1809 1810 pBiDi->runCount=-1; 1811 1812 /* 1813 * Get the directional properties, 1814 * the flags bit-set, and 1815 * determine the paragraph level if necessary. 1816 */ 1817 if(getDirPropsMemory(pBiDi, length)) { 1818 pBiDi->dirProps=pBiDi->dirPropsMemory; 1819 getDirProps(pBiDi); 1820 } else { 1821 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1822 return; 1823 } 1824 /* the processed length may have changed if UBIDI_OPTION_STREAMING */ 1825 length= pBiDi->length; 1826 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ 1827 /* allocate paras memory */ 1828 if(pBiDi->paraCount>1) { 1829 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { 1830 pBiDi->paras=pBiDi->parasMemory; 1831 pBiDi->paras[pBiDi->paraCount-1]=length; 1832 } else { 1833 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1834 return; 1835 } 1836 } else { 1837 /* initialize paras for single paragraph */ 1838 pBiDi->paras=pBiDi->simpleParas; 1839 pBiDi->simpleParas[0]=length; 1840 } 1841 1842 /* are explicit levels specified? */ 1843 if(embeddingLevels==NULL) { 1844 /* no: determine explicit levels according to the (Xn) rules */\ 1845 if(getLevelsMemory(pBiDi, length)) { 1846 pBiDi->levels=pBiDi->levelsMemory; 1847 direction=resolveExplicitLevels(pBiDi); 1848 } else { 1849 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1850 return; 1851 } 1852 } else { 1853 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ 1854 pBiDi->levels=embeddingLevels; 1855 direction=checkExplicitLevels(pBiDi, pErrorCode); 1856 if(U_FAILURE(*pErrorCode)) { 1857 return; 1858 } 1859 } 1860 1861 /* 1862 * The steps after (X9) in the UBiDi algorithm are performed only if 1863 * the paragraph text has mixed directionality! 1864 */ 1865 pBiDi->direction=direction; 1866 switch(direction) { 1867 case UBIDI_LTR: 1868 /* make sure paraLevel is even */ 1869 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); 1870 1871 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 1872 pBiDi->trailingWSStart=0; 1873 break; 1874 case UBIDI_RTL: 1875 /* make sure paraLevel is odd */ 1876 pBiDi->paraLevel|=1; 1877 1878 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 1879 pBiDi->trailingWSStart=0; 1880 break; 1881 default: 1882 /* 1883 * Choose the right implicit state table 1884 */ 1885 switch(pBiDi->reorderingMode) { 1886 case UBIDI_REORDER_DEFAULT: 1887 pBiDi->pImpTabPair=&impTab_DEFAULT; 1888 break; 1889 case UBIDI_REORDER_NUMBERS_SPECIAL: 1890 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; 1891 break; 1892 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: 1893 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; 1894 break; 1895 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: 1896 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; 1897 break; 1898 case UBIDI_REORDER_INVERSE_LIKE_DIRECT: 1899 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 1900 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; 1901 } else { 1902 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; 1903 } 1904 break; 1905 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: 1906 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 1907 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; 1908 } else { 1909 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; 1910 } 1911 break; 1912 default: 1913 /* we should never get here */ 1914 U_ASSERT(FALSE); 1915 break; 1916 } 1917 /* 1918 * If there are no external levels specified and there 1919 * are no significant explicit level codes in the text, 1920 * then we can treat the entire paragraph as one run. 1921 * Otherwise, we need to perform the following rules on runs of 1922 * the text with the same embedding levels. (X10) 1923 * "Significant" explicit level codes are ones that actually 1924 * affect non-BN characters. 1925 * Examples for "insignificant" ones are empty embeddings 1926 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 1927 */ 1928 if(embeddingLevels==NULL && pBiDi->paraCount<=1 && 1929 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { 1930 resolveImplicitLevels(pBiDi, 0, length, 1931 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), 1932 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); 1933 } else { 1934 /* sor, eor: start and end types of same-level-run */ 1935 UBiDiLevel *levels=pBiDi->levels; 1936 int32_t start, limit=0; 1937 UBiDiLevel level, nextLevel; 1938 DirProp sor, eor; 1939 1940 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 1941 level=GET_PARALEVEL(pBiDi, 0); 1942 nextLevel=levels[0]; 1943 if(level<nextLevel) { 1944 eor=GET_LR_FROM_LEVEL(nextLevel); 1945 } else { 1946 eor=GET_LR_FROM_LEVEL(level); 1947 } 1948 1949 do { 1950 /* determine start and limit of the run (end points just behind the run) */ 1951 1952 /* the values for this run's start are the same as for the previous run's end */ 1953 start=limit; 1954 level=nextLevel; 1955 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { 1956 /* except if this is a new paragraph, then set sor = para level */ 1957 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); 1958 } else { 1959 sor=eor; 1960 } 1961 1962 /* search for the limit of this run */ 1963 while(++limit<length && levels[limit]==level) {} 1964 1965 /* get the correct level of the next run */ 1966 if(limit<length) { 1967 nextLevel=levels[limit]; 1968 } else { 1969 nextLevel=GET_PARALEVEL(pBiDi, length-1); 1970 } 1971 1972 /* determine eor from max(level, nextLevel); sor is last run's eor */ 1973 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { 1974 eor=GET_LR_FROM_LEVEL(nextLevel); 1975 } else { 1976 eor=GET_LR_FROM_LEVEL(level); 1977 } 1978 1979 /* if the run consists of overridden directional types, then there 1980 are no implicit types to be resolved */ 1981 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 1982 resolveImplicitLevels(pBiDi, start, limit, sor, eor); 1983 } else { 1984 /* remove the UBIDI_LEVEL_OVERRIDE flags */ 1985 do { 1986 levels[start++]&=~UBIDI_LEVEL_OVERRIDE; 1987 } while(start<limit); 1988 } 1989 } while(limit<length); 1990 } 1991 /* check if we got any memory shortage while adding insert points */ 1992 if (U_FAILURE(pBiDi->insertPoints.errorCode)) 1993 { 1994 *pErrorCode=pBiDi->insertPoints.errorCode; 1995 return; 1996 } 1997 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 1998 adjustWSLevels(pBiDi); 1999 break; 2000 } 2001 /* add RLM for inverse Bidi with contextual orientation resolving 2002 * to RTL which would not round-trip otherwise 2003 */ 2004 if((pBiDi->defaultParaLevel>0) && 2005 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && 2006 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || 2007 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { 2008 int32_t i, j, start, last; 2009 DirProp dirProp; 2010 for(i=0; i<pBiDi->paraCount; i++) { 2011 last=pBiDi->paras[i]-1; 2012 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { 2013 continue; /* LTR paragraph */ 2014 } 2015 start= i==0 ? 0 : pBiDi->paras[i - 1]; 2016 for(j=last; j>=start; j--) { 2017 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); 2018 if(dirProp==L) { 2019 if(j<last) { 2020 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) { 2021 last--; 2022 } 2023 } 2024 addPoint(pBiDi, last, RLM_BEFORE); 2025 break; 2026 } 2027 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { 2028 break; 2029 } 2030 } 2031 } 2032 } 2033 2034 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 2035 pBiDi->resultLength -= pBiDi->controlCount; 2036 } else { 2037 pBiDi->resultLength += pBiDi->insertPoints.size; 2038 } 2039 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 2040 } 2041 2042 U_CAPI void U_EXPORT2 2043 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { 2044 if(pBiDi!=NULL) { 2045 pBiDi->orderParagraphsLTR=orderParagraphsLTR; 2046 } 2047 } 2048 2049 U_CAPI UBool U_EXPORT2 2050 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { 2051 if(pBiDi!=NULL) { 2052 return pBiDi->orderParagraphsLTR; 2053 } else { 2054 return FALSE; 2055 } 2056 } 2057 2058 U_CAPI UBiDiDirection U_EXPORT2 2059 ubidi_getDirection(const UBiDi *pBiDi) { 2060 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2061 return pBiDi->direction; 2062 } else { 2063 return UBIDI_LTR; 2064 } 2065 } 2066 2067 U_CAPI const UChar * U_EXPORT2 2068 ubidi_getText(const UBiDi *pBiDi) { 2069 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2070 return pBiDi->text; 2071 } else { 2072 return NULL; 2073 } 2074 } 2075 2076 U_CAPI int32_t U_EXPORT2 2077 ubidi_getLength(const UBiDi *pBiDi) { 2078 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2079 return pBiDi->originalLength; 2080 } else { 2081 return 0; 2082 } 2083 } 2084 2085 U_CAPI int32_t U_EXPORT2 2086 ubidi_getProcessedLength(const UBiDi *pBiDi) { 2087 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2088 return pBiDi->length; 2089 } else { 2090 return 0; 2091 } 2092 } 2093 2094 U_CAPI int32_t U_EXPORT2 2095 ubidi_getResultLength(const UBiDi *pBiDi) { 2096 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2097 return pBiDi->resultLength; 2098 } else { 2099 return 0; 2100 } 2101 } 2102 2103 /* paragraphs API functions ------------------------------------------------- */ 2104 2105 U_CAPI UBiDiLevel U_EXPORT2 2106 ubidi_getParaLevel(const UBiDi *pBiDi) { 2107 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2108 return pBiDi->paraLevel; 2109 } else { 2110 return 0; 2111 } 2112 } 2113 2114 U_CAPI int32_t U_EXPORT2 2115 ubidi_countParagraphs(UBiDi *pBiDi) { 2116 if(!IS_VALID_PARA_OR_LINE(pBiDi)) { 2117 return 0; 2118 } else { 2119 return pBiDi->paraCount; 2120 } 2121 } 2122 2123 U_CAPI void U_EXPORT2 2124 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 2125 int32_t *pParaStart, int32_t *pParaLimit, 2126 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2127 int32_t paraStart; 2128 2129 /* check the argument values */ 2130 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2131 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); 2132 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); 2133 2134 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2135 if(paraIndex) { 2136 paraStart=pBiDi->paras[paraIndex-1]; 2137 } else { 2138 paraStart=0; 2139 } 2140 if(pParaStart!=NULL) { 2141 *pParaStart=paraStart; 2142 } 2143 if(pParaLimit!=NULL) { 2144 *pParaLimit=pBiDi->paras[paraIndex]; 2145 } 2146 if(pParaLevel!=NULL) { 2147 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); 2148 } 2149 } 2150 2151 U_CAPI int32_t U_EXPORT2 2152 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, 2153 int32_t *pParaStart, int32_t *pParaLimit, 2154 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2155 uint32_t paraIndex; 2156 2157 /* check the argument values */ 2158 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ 2159 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); 2160 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); 2161 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2162 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); 2163 2164 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); 2165 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); 2166 return paraIndex; 2167 } 2168 2169 U_CAPI void U_EXPORT2 2170 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 2171 const void *newContext, UBiDiClassCallback **oldFn, 2172 const void **oldContext, UErrorCode *pErrorCode) 2173 { 2174 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2175 if(pBiDi==NULL) { 2176 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2177 return; 2178 } 2179 if( oldFn ) 2180 { 2181 *oldFn = pBiDi->fnClassCallback; 2182 } 2183 if( oldContext ) 2184 { 2185 *oldContext = pBiDi->coClassCallback; 2186 } 2187 pBiDi->fnClassCallback = newFn; 2188 pBiDi->coClassCallback = newContext; 2189 } 2190 2191 U_CAPI void U_EXPORT2 2192 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) 2193 { 2194 if(pBiDi==NULL) { 2195 return; 2196 } 2197 if( fn ) 2198 { 2199 *fn = pBiDi->fnClassCallback; 2200 } 2201 if( context ) 2202 { 2203 *context = pBiDi->coClassCallback; 2204 } 2205 } 2206 2207 U_CAPI UCharDirection U_EXPORT2 2208 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) 2209 { 2210 UCharDirection dir; 2211 2212 if( pBiDi->fnClassCallback == NULL || 2213 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) 2214 { 2215 return ubidi_getClass(pBiDi->bdp, c); 2216 } else { 2217 return dir; 2218 } 2219 } 2220 2221