1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidi.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999jul27 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #ifndef UBIDI_H 18 #define UBIDI_H 19 20 #include "unicode/utypes.h" 21 #include "unicode/uchar.h" 22 #include "unicode/localpointer.h" 23 24 /** 25 *\file 26 * \brief C API: Bidi algorithm 27 * 28 * <h2>Bidi algorithm for ICU</h2> 29 * 30 * This is an implementation of the Unicode Bidirectional Algorithm. 31 * The algorithm is defined in the 32 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p> 33 * 34 * Note: Libraries that perform a bidirectional algorithm and 35 * reorder strings accordingly are sometimes called "Storage Layout Engines". 36 * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such 37 * "Storage Layout Engines". 38 * 39 * <h3>General remarks about the API:</h3> 40 * 41 * In functions with an error code parameter, 42 * the <code>pErrorCode</code> pointer must be valid 43 * and the value that it points to must not indicate a failure before 44 * the function call. Otherwise, the function returns immediately. 45 * After the function call, the value indicates success or failure.<p> 46 * 47 * The "limit" of a sequence of characters is the position just after their 48 * last character, i.e., one more than that position.<p> 49 * 50 * Some of the API functions provide access to "runs". 51 * Such a "run" is defined as a sequence of characters 52 * that are at the same embedding level 53 * after performing the Bidi algorithm.<p> 54 * 55 * @author Markus W. Scherer 56 * @version 1.0 57 * 58 * 59 * <h4> Sample code for the ICU Bidi API </h4> 60 * 61 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 62 * 63 * This is (hypothetical) sample code that illustrates 64 * how the ICU Bidi API could be used to render a paragraph of text. 65 * Rendering code depends highly on the graphics system, 66 * therefore this sample code must make a lot of assumptions, 67 * which may or may not match any existing graphics system's properties. 68 * 69 * <p>The basic assumptions are:</p> 70 * <ul> 71 * <li>Rendering is done from left to right on a horizontal line.</li> 72 * <li>A run of single-style, unidirectional text can be rendered at once.</li> 73 * <li>Such a run of text is passed to the graphics system with 74 * characters (code units) in logical order.</li> 75 * <li>The line-breaking algorithm is very complicated 76 * and Locale-dependent - 77 * and therefore its implementation omitted from this sample code.</li> 78 * </ul> 79 * 80 * <pre> 81 * \code 82 *#include "unicode/ubidi.h" 83 * 84 *typedef enum { 85 * styleNormal=0, styleSelected=1, 86 * styleBold=2, styleItalics=4, 87 * styleSuper=8, styleSub=16 88 *} Style; 89 * 90 *typedef struct { int32_t limit; Style style; } StyleRun; 91 * 92 *int getTextWidth(const UChar *text, int32_t start, int32_t limit, 93 * const StyleRun *styleRuns, int styleRunCount); 94 * 95 * // set *pLimit and *pStyleRunLimit for a line 96 * // from text[start] and from styleRuns[styleRunStart] 97 * // using ubidi_getLogicalRun(para, ...) 98 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, 99 * UBiDi *para, 100 * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, 101 * int *pLineWidth); 102 * 103 * // render runs on a line sequentially, always from left to right 104 * 105 * // prepare rendering a new line 106 * void startLine(UBiDiDirection textDirection, int lineWidth); 107 * 108 * // render a run of text and advance to the right by the run width 109 * // the text[start..limit-1] is always in logical order 110 * void renderRun(const UChar *text, int32_t start, int32_t limit, 111 * UBiDiDirection textDirection, Style style); 112 * 113 * // We could compute a cross-product 114 * // from the style runs with the directional runs 115 * // and then reorder it. 116 * // Instead, here we iterate over each run type 117 * // and render the intersections - 118 * // with shortcuts in simple (and common) cases. 119 * // renderParagraph() is the main function. 120 * 121 * // render a directional run with 122 * // (possibly) multiple style runs intersecting with it 123 * void renderDirectionalRun(const UChar *text, 124 * int32_t start, int32_t limit, 125 * UBiDiDirection direction, 126 * const StyleRun *styleRuns, int styleRunCount) { 127 * int i; 128 * 129 * // iterate over style runs 130 * if(direction==UBIDI_LTR) { 131 * int styleLimit; 132 * 133 * for(i=0; i<styleRunCount; ++i) { 134 * styleLimit=styleRun[i].limit; 135 * if(start<styleLimit) { 136 * if(styleLimit>limit) { styleLimit=limit; } 137 * renderRun(text, start, styleLimit, 138 * direction, styleRun[i].style); 139 * if(styleLimit==limit) { break; } 140 * start=styleLimit; 141 * } 142 * } 143 * } else { 144 * int styleStart; 145 * 146 * for(i=styleRunCount-1; i>=0; --i) { 147 * if(i>0) { 148 * styleStart=styleRun[i-1].limit; 149 * } else { 150 * styleStart=0; 151 * } 152 * if(limit>=styleStart) { 153 * if(styleStart<start) { styleStart=start; } 154 * renderRun(text, styleStart, limit, 155 * direction, styleRun[i].style); 156 * if(styleStart==start) { break; } 157 * limit=styleStart; 158 * } 159 * } 160 * } 161 * } 162 * 163 * // the line object represents text[start..limit-1] 164 * void renderLine(UBiDi *line, const UChar *text, 165 * int32_t start, int32_t limit, 166 * const StyleRun *styleRuns, int styleRunCount) { 167 * UBiDiDirection direction=ubidi_getDirection(line); 168 * if(direction!=UBIDI_MIXED) { 169 * // unidirectional 170 * if(styleRunCount<=1) { 171 * renderRun(text, start, limit, direction, styleRuns[0].style); 172 * } else { 173 * renderDirectionalRun(text, start, limit, 174 * direction, styleRuns, styleRunCount); 175 * } 176 * } else { 177 * // mixed-directional 178 * int32_t count, i, length; 179 * UBiDiLevel level; 180 * 181 * count=ubidi_countRuns(para, pErrorCode); 182 * if(U_SUCCESS(*pErrorCode)) { 183 * if(styleRunCount<=1) { 184 * Style style=styleRuns[0].style; 185 * 186 * // iterate over directional runs 187 * for(i=0; i<count; ++i) { 188 * direction=ubidi_getVisualRun(para, i, &start, &length); 189 * renderRun(text, start, start+length, direction, style); 190 * } 191 * } else { 192 * int32_t j; 193 * 194 * // iterate over both directional and style runs 195 * for(i=0; i<count; ++i) { 196 * direction=ubidi_getVisualRun(line, i, &start, &length); 197 * renderDirectionalRun(text, start, start+length, 198 * direction, styleRuns, styleRunCount); 199 * } 200 * } 201 * } 202 * } 203 * } 204 * 205 *void renderParagraph(const UChar *text, int32_t length, 206 * UBiDiDirection textDirection, 207 * const StyleRun *styleRuns, int styleRunCount, 208 * int lineWidth, 209 * UErrorCode *pErrorCode) { 210 * UBiDi *para; 211 * 212 * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { 213 * return; 214 * } 215 * 216 * para=ubidi_openSized(length, 0, pErrorCode); 217 * if(para==NULL) { return; } 218 * 219 * ubidi_setPara(para, text, length, 220 * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, 221 * NULL, pErrorCode); 222 * if(U_SUCCESS(*pErrorCode)) { 223 * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); 224 * StyleRun styleRun={ length, styleNormal }; 225 * int width; 226 * 227 * if(styleRuns==NULL || styleRunCount<=0) { 228 * styleRunCount=1; 229 * styleRuns=&styleRun; 230 * } 231 * 232 * // assume styleRuns[styleRunCount-1].limit>=length 233 * 234 * width=getTextWidth(text, 0, length, styleRuns, styleRunCount); 235 * if(width<=lineWidth) { 236 * // everything fits onto one line 237 * 238 * // prepare rendering a new line from either left or right 239 * startLine(paraLevel, width); 240 * 241 * renderLine(para, text, 0, length, 242 * styleRuns, styleRunCount); 243 * } else { 244 * UBiDi *line; 245 * 246 * // we need to render several lines 247 * line=ubidi_openSized(length, 0, pErrorCode); 248 * if(line!=NULL) { 249 * int32_t start=0, limit; 250 * int styleRunStart=0, styleRunLimit; 251 * 252 * for(;;) { 253 * limit=length; 254 * styleRunLimit=styleRunCount; 255 * getLineBreak(text, start, &limit, para, 256 * styleRuns, styleRunStart, &styleRunLimit, 257 * &width); 258 * ubidi_setLine(para, start, limit, line, pErrorCode); 259 * if(U_SUCCESS(*pErrorCode)) { 260 * // prepare rendering a new line 261 * // from either left or right 262 * startLine(paraLevel, width); 263 * 264 * renderLine(line, text, start, limit, 265 * styleRuns+styleRunStart, 266 * styleRunLimit-styleRunStart); 267 * } 268 * if(limit==length) { break; } 269 * start=limit; 270 * styleRunStart=styleRunLimit-1; 271 * if(start>=styleRuns[styleRunStart].limit) { 272 * ++styleRunStart; 273 * } 274 * } 275 * 276 * ubidi_close(line); 277 * } 278 * } 279 * } 280 * 281 * ubidi_close(para); 282 *} 283 *\endcode 284 * </pre> 285 */ 286 287 /*DOCXX_TAG*/ 288 /*@{*/ 289 290 /** 291 * UBiDiLevel is the type of the level values in this 292 * Bidi implementation. 293 * It holds an embedding level and indicates the visual direction 294 * by its bit 0 (even/odd value).<p> 295 * 296 * It can also hold non-level values for the 297 * <code>paraLevel</code> and <code>embeddingLevels</code> 298 * arguments of <code>ubidi_setPara()</code>; there: 299 * <ul> 300 * <li>bit 7 of an <code>embeddingLevels[]</code> 301 * value indicates whether the using application is 302 * specifying the level of a character to <i>override</i> whatever the 303 * Bidi implementation would resolve it to.</li> 304 * <li><code>paraLevel</code> can be set to the 305 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code> 306 * and <code>UBIDI_DEFAULT_RTL</code>.</li> 307 * </ul> 308 * 309 * @see ubidi_setPara 310 * 311 * <p>The related constants are not real, valid level values. 312 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify 313 * a default for the paragraph level for 314 * when the <code>ubidi_setPara()</code> function 315 * shall determine it but there is no 316 * strongly typed character in the input.<p> 317 * 318 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even 319 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd, 320 * just like with normal LTR and RTL level values - 321 * these special values are designed that way. Also, the implementation 322 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 323 * 324 * @see UBIDI_DEFAULT_LTR 325 * @see UBIDI_DEFAULT_RTL 326 * @see UBIDI_LEVEL_OVERRIDE 327 * @see UBIDI_MAX_EXPLICIT_LEVEL 328 * @stable ICU 2.0 329 */ 330 typedef uint8_t UBiDiLevel; 331 332 /** Paragraph level setting.<p> 333 * 334 * Constant indicating that the base direction depends on the first strong 335 * directional character in the text according to the Unicode Bidirectional 336 * Algorithm. If no strong directional character is present, 337 * then set the paragraph level to 0 (left-to-right).<p> 338 * 339 * If this value is used in conjunction with reordering modes 340 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or 341 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 342 * is assumed to be visual LTR, and the text after reordering is required 343 * to be the corresponding logical string with appropriate contextual 344 * direction. The direction of the result string will be RTL if either 345 * the righmost or leftmost strong character of the source text is RTL 346 * or Arabic Letter, the direction will be LTR otherwise.<p> 347 * 348 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may 349 * be added at the beginning of the result string to ensure round trip 350 * (that the result string, when reordered back to visual, will produce 351 * the original source text). 352 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT 353 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL 354 * @stable ICU 2.0 355 */ 356 #define UBIDI_DEFAULT_LTR 0xfe 357 358 /** Paragraph level setting.<p> 359 * 360 * Constant indicating that the base direction depends on the first strong 361 * directional character in the text according to the Unicode Bidirectional 362 * Algorithm. If no strong directional character is present, 363 * then set the paragraph level to 1 (right-to-left).<p> 364 * 365 * If this value is used in conjunction with reordering modes 366 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or 367 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 368 * is assumed to be visual LTR, and the text after reordering is required 369 * to be the corresponding logical string with appropriate contextual 370 * direction. The direction of the result string will be RTL if either 371 * the righmost or leftmost strong character of the source text is RTL 372 * or Arabic Letter, or if the text contains no strong character; 373 * the direction will be LTR otherwise.<p> 374 * 375 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may 376 * be added at the beginning of the result string to ensure round trip 377 * (that the result string, when reordered back to visual, will produce 378 * the original source text). 379 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT 380 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL 381 * @stable ICU 2.0 382 */ 383 #define UBIDI_DEFAULT_RTL 0xff 384 385 /** 386 * Maximum explicit embedding level. 387 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>). 388 * @stable ICU 2.0 389 */ 390 #define UBIDI_MAX_EXPLICIT_LEVEL 61 391 392 /** Bit flag for level input. 393 * Overrides directional properties. 394 * @stable ICU 2.0 395 */ 396 #define UBIDI_LEVEL_OVERRIDE 0x80 397 398 /** 399 * Special value which can be returned by the mapping functions when a logical 400 * index has no corresponding visual index or vice-versa. This may happen 401 * for the logical-to-visual mapping of a Bidi control when option 402 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen 403 * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted 404 * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 405 * @see ubidi_getVisualIndex 406 * @see ubidi_getVisualMap 407 * @see ubidi_getLogicalIndex 408 * @see ubidi_getLogicalMap 409 * @stable ICU 3.6 410 */ 411 #define UBIDI_MAP_NOWHERE (-1) 412 413 /** 414 * <code>UBiDiDirection</code> values indicate the text direction. 415 * @stable ICU 2.0 416 */ 417 enum UBiDiDirection { 418 /** Left-to-right text. This is a 0 value. 419 * <ul> 420 * <li>As return value for <code>ubidi_getDirection()</code>, it means 421 * that the source string contains no right-to-left characters, or 422 * that the source string is empty and the paragraph level is even. 423 * <li> As return value for <code>ubidi_getBaseDirection()</code>, it 424 * means that the first strong character of the source string has 425 * a left-to-right direction. 426 * </ul> 427 * @stable ICU 2.0 428 */ 429 UBIDI_LTR, 430 /** Right-to-left text. This is a 1 value. 431 * <ul> 432 * <li>As return value for <code>ubidi_getDirection()</code>, it means 433 * that the source string contains no left-to-right characters, or 434 * that the source string is empty and the paragraph level is odd. 435 * <li> As return value for <code>ubidi_getBaseDirection()</code>, it 436 * means that the first strong character of the source string has 437 * a right-to-left direction. 438 * </ul> 439 * @stable ICU 2.0 440 */ 441 UBIDI_RTL, 442 /** Mixed-directional text. 443 * <p>As return value for <code>ubidi_getDirection()</code>, it means 444 * that the source string contains both left-to-right and 445 * right-to-left characters. 446 * @stable ICU 2.0 447 */ 448 UBIDI_MIXED, 449 /** No strongly directional text. 450 * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means 451 * that the source string is missing or empty, or contains neither left-to-right 452 * nor right-to-left characters. 453 * @draft ICU 4.6 454 */ 455 UBIDI_NEUTRAL 456 }; 457 458 /** @stable ICU 2.0 */ 459 typedef enum UBiDiDirection UBiDiDirection; 460 461 /** 462 * Forward declaration of the <code>UBiDi</code> structure for the declaration of 463 * the API functions. Its fields are implementation-specific.<p> 464 * This structure holds information about a paragraph (or multiple paragraphs) 465 * of text with Bidi-algorithm-related details, or about one line of 466 * such a paragraph.<p> 467 * Reordering can be done on a line, or on one or more paragraphs which are 468 * then interpreted each as one single line. 469 * @stable ICU 2.0 470 */ 471 struct UBiDi; 472 473 /** @stable ICU 2.0 */ 474 typedef struct UBiDi UBiDi; 475 476 /** 477 * Allocate a <code>UBiDi</code> structure. 478 * Such an object is initially empty. It is assigned 479 * the Bidi properties of a piece of text containing one or more paragraphs 480 * by <code>ubidi_setPara()</code> 481 * or the Bidi properties of a line within a paragraph by 482 * <code>ubidi_setLine()</code>.<p> 483 * This object can be reused for as long as it is not deallocated 484 * by calling <code>ubidi_close()</code>.<p> 485 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate 486 * additional memory for internal structures as necessary. 487 * 488 * @return An empty <code>UBiDi</code> object. 489 * @stable ICU 2.0 490 */ 491 U_STABLE UBiDi * U_EXPORT2 492 ubidi_open(void); 493 494 /** 495 * Allocate a <code>UBiDi</code> structure with preallocated memory 496 * for internal structures. 497 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code> 498 * with no arguments, but it also preallocates memory for internal structures 499 * according to the sizings supplied by the caller.<p> 500 * Subsequent functions will not allocate any more memory, and are thus 501 * guaranteed not to fail because of lack of memory.<p> 502 * The preallocation can be limited to some of the internal memory 503 * by setting some values to 0 here. That means that if, e.g., 504 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 505 * be set to <code>maxLength</code> (the only failproof value) to avoid 506 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 507 * and the internal structures that are associated with it will be allocated 508 * on demand, just like with <code>ubidi_open()</code>. 509 * 510 * @param maxLength is the maximum text or line length that internal memory 511 * will be preallocated for. An attempt to associate this object with a 512 * longer text will fail, unless this value is 0, which leaves the allocation 513 * up to the implementation. 514 * 515 * @param maxRunCount is the maximum anticipated number of same-level runs 516 * that internal memory will be preallocated for. An attempt to access 517 * visual runs on an object that was not preallocated for as many runs 518 * as the text was actually resolved to will fail, 519 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 520 * The number of runs depends on the actual text and maybe anywhere between 521 * 1 and <code>maxLength</code>. It is typically small. 522 * 523 * @param pErrorCode must be a valid pointer to an error code value. 524 * 525 * @return An empty <code>UBiDi</code> object with preallocated memory. 526 * @stable ICU 2.0 527 */ 528 U_STABLE UBiDi * U_EXPORT2 529 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); 530 531 /** 532 * <code>ubidi_close()</code> must be called to free the memory 533 * associated with a UBiDi object.<p> 534 * 535 * <strong>Important: </strong> 536 * A parent <code>UBiDi</code> object must not be destroyed or reused if 537 * it still has children. 538 * If a <code>UBiDi</code> object has become the <i>child</i> 539 * of another one (its <i>parent</i>) by calling 540 * <code>ubidi_setLine()</code>, then the child object must 541 * be destroyed (closed) or reused (by calling 542 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>) 543 * before the parent object. 544 * 545 * @param pBiDi is a <code>UBiDi</code> object. 546 * 547 * @see ubidi_setPara 548 * @see ubidi_setLine 549 * @stable ICU 2.0 550 */ 551 U_STABLE void U_EXPORT2 552 ubidi_close(UBiDi *pBiDi); 553 554 #if U_SHOW_CPLUSPLUS_API 555 556 U_NAMESPACE_BEGIN 557 558 /** 559 * \class LocalUBiDiPointer 560 * "Smart pointer" class, closes a UBiDi via ubidi_close(). 561 * For most methods see the LocalPointerBase base class. 562 * 563 * @see LocalPointerBase 564 * @see LocalPointer 565 * @stable ICU 4.4 566 */ 567 U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close); 568 569 U_NAMESPACE_END 570 571 #endif 572 573 /** 574 * Modify the operation of the Bidi algorithm such that it 575 * approximates an "inverse Bidi" algorithm. This function 576 * must be called before <code>ubidi_setPara()</code>. 577 * 578 * <p>The normal operation of the Bidi algorithm as described 579 * in the Unicode Technical Report is to take text stored in logical 580 * (keyboard, typing) order and to determine the reordering of it for visual 581 * rendering. 582 * Some legacy systems store text in visual order, and for operations 583 * with standard, Unicode-based algorithms, the text needs to be transformed 584 * to logical order. This is effectively the inverse algorithm of the 585 * described Bidi algorithm. Note that there is no standard algorithm for 586 * this "inverse Bidi" and that the current implementation provides only an 587 * approximation of "inverse Bidi".</p> 588 * 589 * <p>With <code>isInverse</code> set to <code>TRUE</code>, 590 * this function changes the behavior of some of the subsequent functions 591 * in a way that they can be used for the inverse Bidi algorithm. 592 * Specifically, runs of text with numeric characters will be treated in a 593 * special way and may need to be surrounded with LRM characters when they are 594 * written in reordered sequence.</p> 595 * 596 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>. 597 * Since the actual input for "inverse Bidi" is visually ordered text and 598 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually 599 * the runs of the logically ordered output.</p> 600 * 601 * <p>Calling this function with argument <code>isInverse</code> set to 602 * <code>TRUE</code> is equivalent to calling 603 * <code>ubidi_setReorderingMode</code> with argument 604 * <code>reorderingMode</code> 605 * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> 606 * Calling this function with argument <code>isInverse</code> set to 607 * <code>FALSE</code> is equivalent to calling 608 * <code>ubidi_setReorderingMode</code> with argument 609 * <code>reorderingMode</code> 610 * set to <code>#UBIDI_REORDER_DEFAULT</code>. 611 * 612 * @param pBiDi is a <code>UBiDi</code> object. 613 * 614 * @param isInverse specifies "forward" or "inverse" Bidi operation. 615 * 616 * @see ubidi_setPara 617 * @see ubidi_writeReordered 618 * @see ubidi_setReorderingMode 619 * @stable ICU 2.0 620 */ 621 U_STABLE void U_EXPORT2 622 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); 623 624 /** 625 * Is this Bidi object set to perform the inverse Bidi algorithm? 626 * <p>Note: calling this function after setting the reordering mode with 627 * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the 628 * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, 629 * <code>FALSE</code> for all other values.</p> 630 * 631 * @param pBiDi is a <code>UBiDi</code> object. 632 * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm 633 * by handling numbers as L. 634 * 635 * @see ubidi_setInverse 636 * @see ubidi_setReorderingMode 637 * @stable ICU 2.0 638 */ 639 640 U_STABLE UBool U_EXPORT2 641 ubidi_isInverse(UBiDi *pBiDi); 642 643 /** 644 * Specify whether block separators must be allocated level zero, 645 * so that successive paragraphs will progress from left to right. 646 * This function must be called before <code>ubidi_setPara()</code>. 647 * Paragraph separators (B) may appear in the text. Setting them to level zero 648 * means that all paragraph separators (including one possibly appearing 649 * in the last text position) are kept in the reordered text after the text 650 * that they follow in the source text. 651 * When this feature is not enabled, a paragraph separator at the last 652 * position of the text before reordering will go to the first position 653 * of the reordered text when the paragraph level is odd. 654 * 655 * @param pBiDi is a <code>UBiDi</code> object. 656 * 657 * @param orderParagraphsLTR specifies whether paragraph separators (B) must 658 * receive level 0, so that successive paragraphs progress from left to right. 659 * 660 * @see ubidi_setPara 661 * @stable ICU 3.4 662 */ 663 U_STABLE void U_EXPORT2 664 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR); 665 666 /** 667 * Is this Bidi object set to allocate level 0 to block separators so that 668 * successive paragraphs progress from left to right? 669 * 670 * @param pBiDi is a <code>UBiDi</code> object. 671 * @return TRUE if the Bidi object is set to allocate level 0 to block 672 * separators. 673 * 674 * @see ubidi_orderParagraphsLTR 675 * @stable ICU 3.4 676 */ 677 U_STABLE UBool U_EXPORT2 678 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); 679 680 /** 681 * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi 682 * algorithm to use. 683 * 684 * @see ubidi_setReorderingMode 685 * @stable ICU 3.6 686 */ 687 typedef enum UBiDiReorderingMode { 688 /** Regular Logical to Visual Bidi algorithm according to Unicode. 689 * This is a 0 value. 690 * @stable ICU 3.6 */ 691 UBIDI_REORDER_DEFAULT = 0, 692 /** Logical to Visual algorithm which handles numbers in a way which 693 * mimicks the behavior of Windows XP. 694 * @stable ICU 3.6 */ 695 UBIDI_REORDER_NUMBERS_SPECIAL, 696 /** Logical to Visual algorithm grouping numbers with adjacent R characters 697 * (reversible algorithm). 698 * @stable ICU 3.6 */ 699 UBIDI_REORDER_GROUP_NUMBERS_WITH_R, 700 /** Reorder runs only to transform a Logical LTR string to the Logical RTL 701 * string with the same display, or vice-versa.<br> 702 * If this mode is set together with option 703 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source 704 * text may be removed and other controls may be added to produce the 705 * minimum combination which has the required display. 706 * @stable ICU 3.6 */ 707 UBIDI_REORDER_RUNS_ONLY, 708 /** Visual to Logical algorithm which handles numbers like L 709 * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>. 710 * @see ubidi_setInverse 711 * @stable ICU 3.6 */ 712 UBIDI_REORDER_INVERSE_NUMBERS_AS_L, 713 /** Visual to Logical algorithm equivalent to the regular Logical to Visual 714 * algorithm. 715 * @stable ICU 3.6 */ 716 UBIDI_REORDER_INVERSE_LIKE_DIRECT, 717 /** Inverse Bidi (Visual to Logical) algorithm for the 718 * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 719 * @stable ICU 3.6 */ 720 UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, 721 /** Number of values for reordering mode. 722 * @stable ICU 3.6 */ 723 UBIDI_REORDER_COUNT 724 } UBiDiReorderingMode; 725 726 /** 727 * Modify the operation of the Bidi algorithm such that it implements some 728 * variant to the basic Bidi algorithm or approximates an "inverse Bidi" 729 * algorithm, depending on different values of the "reordering mode". 730 * This function must be called before <code>ubidi_setPara()</code>, and stays 731 * in effect until called again with a different argument. 732 * 733 * <p>The normal operation of the Bidi algorithm as described 734 * in the Unicode Standard Annex #9 is to take text stored in logical 735 * (keyboard, typing) order and to determine how to reorder it for visual 736 * rendering.</p> 737 * 738 * <p>With the reordering mode set to a value other than 739 * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of 740 * some of the subsequent functions in a way such that they implement an 741 * inverse Bidi algorithm or some other algorithm variants.</p> 742 * 743 * <p>Some legacy systems store text in visual order, and for operations 744 * with standard, Unicode-based algorithms, the text needs to be transformed 745 * into logical order. This is effectively the inverse algorithm of the 746 * described Bidi algorithm. Note that there is no standard algorithm for 747 * this "inverse Bidi", so a number of variants are implemented here.</p> 748 * 749 * <p>In other cases, it may be desirable to emulate some variant of the 750 * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a 751 * Logical to Logical transformation.</p> 752 * 753 * <ul> 754 * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>, 755 * the standard Bidi Logical to Visual algorithm is applied.</li> 756 * 757 * <li>When the reordering mode is set to 758 * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>, 759 * the algorithm used to perform Bidi transformations when calling 760 * <code>ubidi_setPara</code> should approximate the algorithm used in 761 * Microsoft Windows XP rather than strictly conform to the Unicode Bidi 762 * algorithm. 763 * <br> 764 * The differences between the basic algorithm and the algorithm addressed 765 * by this option are as follows: 766 * <ul> 767 * <li>Within text at an even embedding level, the sequence "123AB" 768 * (where AB represent R or AL letters) is transformed to "123BA" by the 769 * Unicode algorithm and to "BA123" by the Windows algorithm.</li> 770 * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just 771 * like regular numbers (EN).</li> 772 * </ul></li> 773 * 774 * <li>When the reordering mode is set to 775 * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>, 776 * numbers located between LTR text and RTL text are associated with the RTL 777 * text. For instance, an LTR paragraph with content "abc 123 DEF" (where 778 * upper case letters represent RTL characters) will be transformed to 779 * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed 780 * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc". 781 * This makes the algorithm reversible and makes it useful when round trip 782 * (from visual to logical and back to visual) must be achieved without 783 * adding LRM characters. However, this is a variation from the standard 784 * Unicode Bidi algorithm.<br> 785 * The source text should not contain Bidi control characters other than LRM 786 * or RLM.</li> 787 * 788 * <li>When the reordering mode is set to 789 * <code>#UBIDI_REORDER_RUNS_ONLY</code>, 790 * a "Logical to Logical" transformation must be performed: 791 * <ul> 792 * <li>If the default text level of the source text (argument <code>paraLevel</code> 793 * in <code>ubidi_setPara</code>) is even, the source text will be handled as 794 * LTR logical text and will be transformed to the RTL logical text which has 795 * the same LTR visual display.</li> 796 * <li>If the default level of the source text is odd, the source text 797 * will be handled as RTL logical text and will be transformed to the 798 * LTR logical text which has the same LTR visual display.</li> 799 * </ul> 800 * This mode may be needed when logical text which is basically Arabic or 801 * Hebrew, with possible included numbers or phrases in English, has to be 802 * displayed as if it had an even embedding level (this can happen if the 803 * displaying application treats all text as if it was basically LTR). 804 * <br> 805 * This mode may also be needed in the reverse case, when logical text which is 806 * basically English, with possible included phrases in Arabic or Hebrew, has to 807 * be displayed as if it had an odd embedding level. 808 * <br> 809 * Both cases could be handled by adding LRE or RLE at the head of the text, 810 * if the display subsystem supports these formatting controls. If it does not, 811 * the problem may be handled by transforming the source text in this mode 812 * before displaying it, so that it will be displayed properly.<br> 813 * The source text should not contain Bidi control characters other than LRM 814 * or RLM.</li> 815 * 816 * <li>When the reordering mode is set to 817 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm 818 * is applied. 819 * Runs of text with numeric characters will be treated like LTR letters and 820 * may need to be surrounded with LRM characters when they are written in 821 * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can 822 * be used with function <code>ubidi_writeReordered</code> to this end. This 823 * mode is equivalent to calling <code>ubidi_setInverse()</code> with 824 * argument <code>isInverse</code> set to <code>TRUE</code>.</li> 825 * 826 * <li>When the reordering mode is set to 827 * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual 828 * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm. 829 * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> 830 * but is closer to the regular Bidi algorithm. 831 * <br> 832 * For example, an LTR paragraph with the content "FED 123 456 CBA" (where 833 * upper case represents RTL characters) will be transformed to 834 * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC" 835 * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> 836 * When used in conjunction with option 837 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally 838 * adds Bidi marks to the output significantly more sparingly than mode 839 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option 840 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to 841 * <code>ubidi_writeReordered</code>.</li> 842 * 843 * <li>When the reordering mode is set to 844 * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual 845 * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm. 846 * <br> 847 * For example, an LTR paragraph with the content "abc FED123" (where 848 * upper case represents RTL characters) will be transformed to "abc 123DEF."</li> 849 * </ul> 850 * 851 * <p>In all the reordering modes specifying an "inverse Bidi" algorithm 852 * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>), 853 * output runs should be retrieved using 854 * <code>ubidi_getVisualRun()</code>, and the output text with 855 * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in 856 * "inverse Bidi" modes the input is actually visually ordered text and 857 * reordered output returned by <code>ubidi_getVisualRun()</code> or 858 * <code>ubidi_writeReordered()</code> are actually runs or character string 859 * of logically ordered output.<br> 860 * For all the "inverse Bidi" modes, the source text should not contain 861 * Bidi control characters other than LRM or RLM.</p> 862 * 863 * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of 864 * <code>ubidi_writeReordered</code> has no useful meaning and should not be 865 * used in conjunction with any value of the reordering mode specifying 866 * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>. 867 * 868 * @param pBiDi is a <code>UBiDi</code> object. 869 * @param reorderingMode specifies the required variant of the Bidi algorithm. 870 * 871 * @see UBiDiReorderingMode 872 * @see ubidi_setInverse 873 * @see ubidi_setPara 874 * @see ubidi_writeReordered 875 * @stable ICU 3.6 876 */ 877 U_STABLE void U_EXPORT2 878 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode); 879 880 /** 881 * What is the requested reordering mode for a given Bidi object? 882 * 883 * @param pBiDi is a <code>UBiDi</code> object. 884 * @return the current reordering mode of the Bidi object 885 * @see ubidi_setReorderingMode 886 * @stable ICU 3.6 887 */ 888 U_STABLE UBiDiReorderingMode U_EXPORT2 889 ubidi_getReorderingMode(UBiDi *pBiDi); 890 891 /** 892 * <code>UBiDiReorderingOption</code> values indicate which options are 893 * specified to affect the Bidi algorithm. 894 * 895 * @see ubidi_setReorderingOptions 896 * @stable ICU 3.6 897 */ 898 typedef enum UBiDiReorderingOption { 899 /** 900 * option value for <code>ubidi_setReorderingOptions</code>: 901 * disable all the options which can be set with this function 902 * @see ubidi_setReorderingOptions 903 * @stable ICU 3.6 904 */ 905 UBIDI_OPTION_DEFAULT = 0, 906 907 /** 908 * option bit for <code>ubidi_setReorderingOptions</code>: 909 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 910 * a reordering to a Logical order 911 * 912 * <p>This option must be set or reset before calling 913 * <code>ubidi_setPara</code>.</p> 914 * 915 * <p>This option is significant only with reordering modes which generate 916 * a result with Logical order, specifically:</p> 917 * <ul> 918 * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li> 919 * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li> 920 * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li> 921 * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 922 * </ul> 923 * 924 * <p>If this option is set in conjunction with reordering mode 925 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 926 * <code>ubidi_setInverse(TRUE)</code>, it implies 927 * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> 928 * in calls to function <code>ubidi_writeReordered()</code>.</p> 929 * 930 * <p>For other reordering modes, a minimum number of LRM or RLM characters 931 * will be added to the source text after reordering it so as to ensure 932 * round trip, i.e. when applying the inverse reordering mode on the 933 * resulting logical text with removal of Bidi marks 934 * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling 935 * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> 936 * in <code>ubidi_writeReordered</code>), the result will be identical to the 937 * source text in the first transformation. 938 * 939 * <p>This option will be ignored if specified together with option 940 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option 941 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function 942 * <code>ubidi_writeReordered()</code> and it implies option 943 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function 944 * <code>ubidi_writeReordered()</code> if the reordering mode is 945 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 946 * 947 * @see ubidi_setReorderingMode 948 * @see ubidi_setReorderingOptions 949 * @stable ICU 3.6 950 */ 951 UBIDI_OPTION_INSERT_MARKS = 1, 952 953 /** 954 * option bit for <code>ubidi_setReorderingOptions</code>: 955 * remove Bidi control characters 956 * 957 * <p>This option must be set or reset before calling 958 * <code>ubidi_setPara</code>.</p> 959 * 960 * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 961 * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls 962 * to function <code>ubidi_writeReordered()</code> and it implies option 963 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p> 964 * 965 * @see ubidi_setReorderingMode 966 * @see ubidi_setReorderingOptions 967 * @stable ICU 3.6 968 */ 969 UBIDI_OPTION_REMOVE_CONTROLS = 2, 970 971 /** 972 * option bit for <code>ubidi_setReorderingOptions</code>: 973 * process the output as part of a stream to be continued 974 * 975 * <p>This option must be set or reset before calling 976 * <code>ubidi_setPara</code>.</p> 977 * 978 * <p>This option specifies that the caller is interested in processing large 979 * text object in parts. 980 * The results of the successive calls are expected to be concatenated by the 981 * caller. Only the call for the last part will have this option bit off.</p> 982 * 983 * <p>When this option bit is on, <code>ubidi_setPara()</code> may process 984 * less than the full source text in order to truncate the text at a meaningful 985 * boundary. The caller should call <code>ubidi_getProcessedLength()</code> 986 * immediately after calling <code>ubidi_setPara()</code> in order to 987 * determine how much of the source text has been processed. 988 * Source text beyond that length should be resubmitted in following calls to 989 * <code>ubidi_setPara</code>. The processed length may be less than 990 * the length of the source text if a character preceding the last character of 991 * the source text constitutes a reasonable boundary (like a block separator) 992 * for text to be continued.<br> 993 * If the last character of the source text constitutes a reasonable 994 * boundary, the whole text will be processed at once.<br> 995 * If nowhere in the source text there exists 996 * such a reasonable boundary, the processed length will be zero.<br> 997 * The caller should check for such an occurrence and do one of the following: 998 * <ul><li>submit a larger amount of text with a better chance to include 999 * a reasonable boundary.</li> 1000 * <li>resubmit the same text after turning off option 1001 * <code>UBIDI_OPTION_STREAMING</code>.</li></ul> 1002 * In all cases, this option should be turned off before processing the last 1003 * part of the text.</p> 1004 * 1005 * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used, 1006 * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with 1007 * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before 1008 * calling <code>ubidi_setPara</code> so that later paragraphs may be 1009 * concatenated to previous paragraphs on the right.</p> 1010 * 1011 * @see ubidi_setReorderingMode 1012 * @see ubidi_setReorderingOptions 1013 * @see ubidi_getProcessedLength 1014 * @see ubidi_orderParagraphsLTR 1015 * @stable ICU 3.6 1016 */ 1017 UBIDI_OPTION_STREAMING = 4 1018 } UBiDiReorderingOption; 1019 1020 /** 1021 * Specify which of the reordering options 1022 * should be applied during Bidi transformations. 1023 * 1024 * @param pBiDi is a <code>UBiDi</code> object. 1025 * @param reorderingOptions is a combination of zero or more of the following 1026 * options: 1027 * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>, 1028 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>. 1029 * 1030 * @see ubidi_getReorderingOptions 1031 * @stable ICU 3.6 1032 */ 1033 U_STABLE void U_EXPORT2 1034 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions); 1035 1036 /** 1037 * What are the reordering options applied to a given Bidi object? 1038 * 1039 * @param pBiDi is a <code>UBiDi</code> object. 1040 * @return the current reordering options of the Bidi object 1041 * @see ubidi_setReorderingOptions 1042 * @stable ICU 3.6 1043 */ 1044 U_STABLE uint32_t U_EXPORT2 1045 ubidi_getReorderingOptions(UBiDi *pBiDi); 1046 1047 /** 1048 * Perform the Unicode Bidi algorithm. It is defined in the 1049 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>, 1050 * version 13, 1051 * also described in The Unicode Standard, Version 4.0 .<p> 1052 * 1053 * This function takes a piece of plain text containing one or more paragraphs, 1054 * with or without externally specified embedding levels from <i>styled</i> 1055 * text and computes the left-right-directionality of each character.<p> 1056 * 1057 * If the entire text is all of the same directionality, then 1058 * the function may not perform all the steps described by the algorithm, 1059 * i.e., some levels may not be the same as if all steps were performed. 1060 * This is not relevant for unidirectional text.<br> 1061 * For example, in pure LTR text with numbers the numbers would get 1062 * a resolved level of 2 higher than the surrounding text according to 1063 * the algorithm. This implementation may set all resolved levels to 1064 * the same value in such a case.<p> 1065 * 1066 * The text can be composed of multiple paragraphs. Occurrence of a block 1067 * separator in the text terminates a paragraph, and whatever comes next starts 1068 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 1069 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 1070 * in that case, the pair of characters is considered as terminating the 1071 * preceding paragraph, and a new paragraph will be started by a character 1072 * coming after the LF. 1073 * 1074 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code> 1075 * which will be set to contain the reordering information, 1076 * especially the resolved levels for all the characters in <code>text</code>. 1077 * 1078 * @param text is a pointer to the text that the Bidi algorithm will be performed on. 1079 * This pointer is stored in the UBiDi object and can be retrieved 1080 * with <code>ubidi_getText()</code>.<br> 1081 * <strong>Note:</strong> the text must be (at least) <code>length</code> long. 1082 * 1083 * @param length is the length of the text; if <code>length==-1</code> then 1084 * the text must be zero-terminated. 1085 * 1086 * @param paraLevel specifies the default level for the text; 1087 * it is typically 0 (LTR) or 1 (RTL). 1088 * If the function shall determine the paragraph level from the text, 1089 * then <code>paraLevel</code> can be set to 1090 * either <code>#UBIDI_DEFAULT_LTR</code> 1091 * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple 1092 * paragraphs, the paragraph level shall be determined separately for 1093 * each paragraph; if a paragraph does not include any strongly typed 1094 * character, then the desired default is used (0 for LTR or 1 for RTL). 1095 * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code> 1096 * is also valid, with odd levels indicating RTL. 1097 * 1098 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 1099 * ignoring characters like LRE and PDF in the text. 1100 * A level overrides the directional property of its corresponding 1101 * (same index) character if the level has the 1102 * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br> 1103 * Except for that bit, it must be 1104 * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>, 1105 * with one exception: a level of zero may be specified for a paragraph 1106 * separator even if <code>paraLevel>0</code> when multiple paragraphs 1107 * are submitted in the same call to <code>ubidi_setPara()</code>.<br><br> 1108 * <strong>Caution: </strong>A copy of this pointer, not of the levels, 1109 * will be stored in the <code>UBiDi</code> object; 1110 * the <code>embeddingLevels</code> array must not be 1111 * deallocated before the <code>UBiDi</code> structure is destroyed or reused, 1112 * and the <code>embeddingLevels</code> 1113 * should not be modified to avoid unexpected results on subsequent Bidi operations. 1114 * However, the <code>ubidi_setPara()</code> and 1115 * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br> 1116 * After the <code>UBiDi</code> object is reused or destroyed, the caller 1117 * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br> 1118 * <strong>Note:</strong> the <code>embeddingLevels</code> array must be 1119 * at least <code>length</code> long. 1120 * This pointer can be <code>NULL</code> if this 1121 * value is not necessary. 1122 * 1123 * @param pErrorCode must be a valid pointer to an error code value. 1124 * @stable ICU 2.0 1125 */ 1126 U_STABLE void U_EXPORT2 1127 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1128 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1129 UErrorCode *pErrorCode); 1130 1131 /** 1132 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to 1133 * contain the reordering information, especially the resolved levels, 1134 * for all the characters in a line of text. This line of text is 1135 * specified by referring to a <code>UBiDi</code> object representing 1136 * this information for a piece of text containing one or more paragraphs, 1137 * and by specifying a range of indexes in this text.<p> 1138 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 1139 * 1140 * This is used after calling <code>ubidi_setPara()</code> 1141 * for a piece of text, and after line-breaking on that text. 1142 * It is not necessary if each paragraph is treated as a single line.<p> 1143 * 1144 * After line-breaking, rules (L1) and (L2) for the treatment of 1145 * trailing WS and for reordering are performed on 1146 * a <code>UBiDi</code> object that represents a line.<p> 1147 * 1148 * <strong>Important: </strong><code>pLineBiDi</code> shares data with 1149 * <code>pParaBiDi</code>. 1150 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>. 1151 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line 1152 * before the object for its parent paragraph.<p> 1153 * 1154 * The text pointer that was stored in <code>pParaBiDi</code> is also copied, 1155 * and <code>start</code> is added to it so that it points to the beginning of the 1156 * line for this object. 1157 * 1158 * @param pParaBiDi is the parent paragraph object. It must have been set 1159 * by a successful call to ubidi_setPara. 1160 * 1161 * @param start is the line's first index into the text. 1162 * 1163 * @param limit is just behind the line's last index into the text 1164 * (its last index +1).<br> 1165 * It must be <code>0<=start<limit<=</code>containing paragraph limit. 1166 * If the specified line crosses a paragraph boundary, the function 1167 * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR. 1168 * 1169 * @param pLineBiDi is the object that will now represent a line of the text. 1170 * 1171 * @param pErrorCode must be a valid pointer to an error code value. 1172 * 1173 * @see ubidi_setPara 1174 * @see ubidi_getProcessedLength 1175 * @stable ICU 2.0 1176 */ 1177 U_STABLE void U_EXPORT2 1178 ubidi_setLine(const UBiDi *pParaBiDi, 1179 int32_t start, int32_t limit, 1180 UBiDi *pLineBiDi, 1181 UErrorCode *pErrorCode); 1182 1183 /** 1184 * Get the directionality of the text. 1185 * 1186 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1187 * 1188 * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code> 1189 * or <code>UBIDI_MIXED</code> 1190 * that indicates if the entire text 1191 * represented by this object is unidirectional, 1192 * and which direction, or if it is mixed-directional. 1193 * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method. 1194 * 1195 * @see UBiDiDirection 1196 * @stable ICU 2.0 1197 */ 1198 U_STABLE UBiDiDirection U_EXPORT2 1199 ubidi_getDirection(const UBiDi *pBiDi); 1200 1201 /** 1202 * Gets the base direction of the text provided according 1203 * to the Unicode Bidirectional Algorithm. The base direction 1204 * is derived from the first character in the string with bidirectional 1205 * character type L, R, or AL. If the first such character has type L, 1206 * <code>UBIDI_LTR</code> is returned. If the first such character has 1207 * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does 1208 * not contain any character of these types, then 1209 * <code>UBIDI_NEUTRAL</code> is returned. 1210 * 1211 * This is a lightweight function for use when only the base direction 1212 * is needed and no further bidi processing of the text is needed. 1213 * 1214 * @param text is a pointer to the text whose base 1215 * direction is needed. 1216 * Note: the text must be (at least) @c length long. 1217 * 1218 * @param length is the length of the text; 1219 * if <code>length==-1</code> then the text 1220 * must be zero-terminated. 1221 * 1222 * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>, 1223 * <code>UBIDI_NEUTRAL</code> 1224 * 1225 * @see UBiDiDirection 1226 * @draft ICU 4.6 1227 */ 1228 U_DRAFT UBiDiDirection U_EXPORT2 1229 ubidi_getBaseDirection(const UChar *text, int32_t length ); 1230 1231 /** 1232 * Get the pointer to the text. 1233 * 1234 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1235 * 1236 * @return The pointer to the text that the UBiDi object was created for. 1237 * 1238 * @see ubidi_setPara 1239 * @see ubidi_setLine 1240 * @stable ICU 2.0 1241 */ 1242 U_STABLE const UChar * U_EXPORT2 1243 ubidi_getText(const UBiDi *pBiDi); 1244 1245 /** 1246 * Get the length of the text. 1247 * 1248 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1249 * 1250 * @return The length of the text that the UBiDi object was created for. 1251 * @stable ICU 2.0 1252 */ 1253 U_STABLE int32_t U_EXPORT2 1254 ubidi_getLength(const UBiDi *pBiDi); 1255 1256 /** 1257 * Get the paragraph level of the text. 1258 * 1259 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1260 * 1261 * @return The paragraph level. If there are multiple paragraphs, their 1262 * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or 1263 * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph 1264 * is returned. 1265 * 1266 * @see UBiDiLevel 1267 * @see ubidi_getParagraph 1268 * @see ubidi_getParagraphByIndex 1269 * @stable ICU 2.0 1270 */ 1271 U_STABLE UBiDiLevel U_EXPORT2 1272 ubidi_getParaLevel(const UBiDi *pBiDi); 1273 1274 /** 1275 * Get the number of paragraphs. 1276 * 1277 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1278 * 1279 * @return The number of paragraphs. 1280 * @stable ICU 3.4 1281 */ 1282 U_STABLE int32_t U_EXPORT2 1283 ubidi_countParagraphs(UBiDi *pBiDi); 1284 1285 /** 1286 * Get a paragraph, given a position within the text. 1287 * This function returns information about a paragraph.<br> 1288 * Note: if the paragraph index is known, it is more efficient to 1289 * retrieve the paragraph information using ubidi_getParagraphByIndex().<p> 1290 * 1291 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1292 * 1293 * @param charIndex is the index of a character within the text, in the 1294 * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>. 1295 * 1296 * @param pParaStart will receive the index of the first character of the 1297 * paragraph in the text. 1298 * This pointer can be <code>NULL</code> if this 1299 * value is not necessary. 1300 * 1301 * @param pParaLimit will receive the limit of the paragraph. 1302 * The l-value that you point to here may be the 1303 * same expression (variable) as the one for 1304 * <code>charIndex</code>. 1305 * This pointer can be <code>NULL</code> if this 1306 * value is not necessary. 1307 * 1308 * @param pParaLevel will receive the level of the paragraph. 1309 * This pointer can be <code>NULL</code> if this 1310 * value is not necessary. 1311 * 1312 * @param pErrorCode must be a valid pointer to an error code value. 1313 * 1314 * @return The index of the paragraph containing the specified position. 1315 * 1316 * @see ubidi_getProcessedLength 1317 * @stable ICU 3.4 1318 */ 1319 U_STABLE int32_t U_EXPORT2 1320 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, 1321 int32_t *pParaLimit, UBiDiLevel *pParaLevel, 1322 UErrorCode *pErrorCode); 1323 1324 /** 1325 * Get a paragraph, given the index of this paragraph. 1326 * 1327 * This function returns information about a paragraph.<p> 1328 * 1329 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1330 * 1331 * @param paraIndex is the number of the paragraph, in the 1332 * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>. 1333 * 1334 * @param pParaStart will receive the index of the first character of the 1335 * paragraph in the text. 1336 * This pointer can be <code>NULL</code> if this 1337 * value is not necessary. 1338 * 1339 * @param pParaLimit will receive the limit of the paragraph. 1340 * This pointer can be <code>NULL</code> if this 1341 * value is not necessary. 1342 * 1343 * @param pParaLevel will receive the level of the paragraph. 1344 * This pointer can be <code>NULL</code> if this 1345 * value is not necessary. 1346 * 1347 * @param pErrorCode must be a valid pointer to an error code value. 1348 * 1349 * @stable ICU 3.4 1350 */ 1351 U_STABLE void U_EXPORT2 1352 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 1353 int32_t *pParaStart, int32_t *pParaLimit, 1354 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); 1355 1356 /** 1357 * Get the level for one character. 1358 * 1359 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1360 * 1361 * @param charIndex the index of a character. It must be in the range 1362 * [0..ubidi_getProcessedLength(pBiDi)]. 1363 * 1364 * @return The level for the character at charIndex (0 if charIndex is not 1365 * in the valid range). 1366 * 1367 * @see UBiDiLevel 1368 * @see ubidi_getProcessedLength 1369 * @stable ICU 2.0 1370 */ 1371 U_STABLE UBiDiLevel U_EXPORT2 1372 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); 1373 1374 /** 1375 * Get an array of levels for each character.<p> 1376 * 1377 * Note that this function may allocate memory under some 1378 * circumstances, unlike <code>ubidi_getLevelAt()</code>. 1379 * 1380 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose 1381 * text length must be strictly positive. 1382 * 1383 * @param pErrorCode must be a valid pointer to an error code value. 1384 * 1385 * @return The levels array for the text, 1386 * or <code>NULL</code> if an error occurs. 1387 * 1388 * @see UBiDiLevel 1389 * @see ubidi_getProcessedLength 1390 * @stable ICU 2.0 1391 */ 1392 U_STABLE const UBiDiLevel * U_EXPORT2 1393 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); 1394 1395 /** 1396 * Get a logical run. 1397 * This function returns information about a run and is used 1398 * to retrieve runs in logical order.<p> 1399 * This is especially useful for line-breaking on a paragraph. 1400 * 1401 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1402 * 1403 * @param logicalPosition is a logical position within the source text. 1404 * 1405 * @param pLogicalLimit will receive the limit of the corresponding run. 1406 * The l-value that you point to here may be the 1407 * same expression (variable) as the one for 1408 * <code>logicalPosition</code>. 1409 * This pointer can be <code>NULL</code> if this 1410 * value is not necessary. 1411 * 1412 * @param pLevel will receive the level of the corresponding run. 1413 * This pointer can be <code>NULL</code> if this 1414 * value is not necessary. 1415 * 1416 * @see ubidi_getProcessedLength 1417 * @stable ICU 2.0 1418 */ 1419 U_STABLE void U_EXPORT2 1420 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, 1421 int32_t *pLogicalLimit, UBiDiLevel *pLevel); 1422 1423 /** 1424 * Get the number of runs. 1425 * This function may invoke the actual reordering on the 1426 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code> 1427 * may have resolved only the levels of the text. Therefore, 1428 * <code>ubidi_countRuns()</code> may have to allocate memory, 1429 * and may fail doing so. 1430 * 1431 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1432 * 1433 * @param pErrorCode must be a valid pointer to an error code value. 1434 * 1435 * @return The number of runs. 1436 * @stable ICU 2.0 1437 */ 1438 U_STABLE int32_t U_EXPORT2 1439 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); 1440 1441 /** 1442 * Get one run's logical start, length, and directionality, 1443 * which can be 0 for LTR or 1 for RTL. 1444 * In an RTL run, the character at the logical start is 1445 * visually on the right of the displayed run. 1446 * The length is the number of characters in the run.<p> 1447 * <code>ubidi_countRuns()</code> should be called 1448 * before the runs are retrieved. 1449 * 1450 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1451 * 1452 * @param runIndex is the number of the run in visual order, in the 1453 * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>. 1454 * 1455 * @param pLogicalStart is the first logical character index in the text. 1456 * The pointer may be <code>NULL</code> if this index is not needed. 1457 * 1458 * @param pLength is the number of characters (at least one) in the run. 1459 * The pointer may be <code>NULL</code> if this is not needed. 1460 * 1461 * @return the directionality of the run, 1462 * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>, 1463 * never <code>UBIDI_MIXED</code>, 1464 * never <code>UBIDI_NEUTRAL</code>. 1465 * 1466 * @see ubidi_countRuns 1467 * 1468 * Example: 1469 * <pre> 1470 * \code 1471 * int32_t i, count=ubidi_countRuns(pBiDi), 1472 * logicalStart, visualIndex=0, length; 1473 * for(i=0; i<count; ++i) { 1474 * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) { 1475 * do { // LTR 1476 * show_char(text[logicalStart++], visualIndex++); 1477 * } while(--length>0); 1478 * } else { 1479 * logicalStart+=length; // logicalLimit 1480 * do { // RTL 1481 * show_char(text[--logicalStart], visualIndex++); 1482 * } while(--length>0); 1483 * } 1484 * } 1485 *\endcode 1486 * </pre> 1487 * 1488 * Note that in right-to-left runs, code like this places 1489 * second surrogates before first ones (which is generally a bad idea) 1490 * and combining characters before base characters. 1491 * <p> 1492 * Use of <code>ubidi_writeReordered()</code>, optionally with the 1493 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order 1494 * to avoid these issues. 1495 * @stable ICU 2.0 1496 */ 1497 U_STABLE UBiDiDirection U_EXPORT2 1498 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, 1499 int32_t *pLogicalStart, int32_t *pLength); 1500 1501 /** 1502 * Get the visual position from a logical text position. 1503 * If such a mapping is used many times on the same 1504 * <code>UBiDi</code> object, then calling 1505 * <code>ubidi_getLogicalMap()</code> is more efficient.<p> 1506 * 1507 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no 1508 * visual position because the corresponding text character is a Bidi control 1509 * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. 1510 * <p> 1511 * When the visual output is altered by using options of 1512 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1513 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1514 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not 1515 * be correct. It is advised to use, when possible, reordering options 1516 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1517 * <p> 1518 * Note that in right-to-left runs, this mapping places 1519 * second surrogates before first ones (which is generally a bad idea) 1520 * and combining characters before base characters. 1521 * Use of <code>ubidi_writeReordered()</code>, optionally with the 1522 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead 1523 * of using the mapping, in order to avoid these issues. 1524 * 1525 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1526 * 1527 * @param logicalIndex is the index of a character in the text. 1528 * 1529 * @param pErrorCode must be a valid pointer to an error code value. 1530 * 1531 * @return The visual position of this character. 1532 * 1533 * @see ubidi_getLogicalMap 1534 * @see ubidi_getLogicalIndex 1535 * @see ubidi_getProcessedLength 1536 * @stable ICU 2.0 1537 */ 1538 U_STABLE int32_t U_EXPORT2 1539 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); 1540 1541 /** 1542 * Get the logical text position from a visual position. 1543 * If such a mapping is used many times on the same 1544 * <code>UBiDi</code> object, then calling 1545 * <code>ubidi_getVisualMap()</code> is more efficient.<p> 1546 * 1547 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no 1548 * logical position because the corresponding text character is a Bidi mark 1549 * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 1550 * <p> 1551 * This is the inverse function to <code>ubidi_getVisualIndex()</code>. 1552 * <p> 1553 * When the visual output is altered by using options of 1554 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1555 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1556 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not 1557 * be correct. It is advised to use, when possible, reordering options 1558 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1559 * 1560 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1561 * 1562 * @param visualIndex is the visual position of a character. 1563 * 1564 * @param pErrorCode must be a valid pointer to an error code value. 1565 * 1566 * @return The index of this character in the text. 1567 * 1568 * @see ubidi_getVisualMap 1569 * @see ubidi_getVisualIndex 1570 * @see ubidi_getResultLength 1571 * @stable ICU 2.0 1572 */ 1573 U_STABLE int32_t U_EXPORT2 1574 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); 1575 1576 /** 1577 * Get a logical-to-visual index map (array) for the characters in the UBiDi 1578 * (paragraph or line) object. 1579 * <p> 1580 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the 1581 * corresponding text characters are Bidi controls removed from the visual 1582 * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. 1583 * <p> 1584 * When the visual output is altered by using options of 1585 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1586 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1587 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not 1588 * be correct. It is advised to use, when possible, reordering options 1589 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1590 * <p> 1591 * Note that in right-to-left runs, this mapping places 1592 * second surrogates before first ones (which is generally a bad idea) 1593 * and combining characters before base characters. 1594 * Use of <code>ubidi_writeReordered()</code>, optionally with the 1595 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead 1596 * of using the mapping, in order to avoid these issues. 1597 * 1598 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1599 * 1600 * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code> 1601 * indexes which will reflect the reordering of the characters. 1602 * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number 1603 * of elements allocated in <code>indexMap</code> must be no less than 1604 * <code>ubidi_getResultLength()</code>. 1605 * The array does not need to be initialized.<br><br> 1606 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. 1607 * 1608 * @param pErrorCode must be a valid pointer to an error code value. 1609 * 1610 * @see ubidi_getVisualMap 1611 * @see ubidi_getVisualIndex 1612 * @see ubidi_getProcessedLength 1613 * @see ubidi_getResultLength 1614 * @stable ICU 2.0 1615 */ 1616 U_STABLE void U_EXPORT2 1617 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); 1618 1619 /** 1620 * Get a visual-to-logical index map (array) for the characters in the UBiDi 1621 * (paragraph or line) object. 1622 * <p> 1623 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the 1624 * corresponding text characters are Bidi marks inserted in the visual output 1625 * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 1626 * <p> 1627 * When the visual output is altered by using options of 1628 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1629 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1630 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 1631 * be correct. It is advised to use, when possible, reordering options 1632 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1633 * 1634 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1635 * 1636 * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code> 1637 * indexes which will reflect the reordering of the characters. 1638 * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number 1639 * of elements allocated in <code>indexMap</code> must be no less than 1640 * <code>ubidi_getProcessedLength()</code>. 1641 * The array does not need to be initialized.<br><br> 1642 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. 1643 * 1644 * @param pErrorCode must be a valid pointer to an error code value. 1645 * 1646 * @see ubidi_getLogicalMap 1647 * @see ubidi_getLogicalIndex 1648 * @see ubidi_getProcessedLength 1649 * @see ubidi_getResultLength 1650 * @stable ICU 2.0 1651 */ 1652 U_STABLE void U_EXPORT2 1653 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); 1654 1655 /** 1656 * This is a convenience function that does not use a UBiDi object. 1657 * It is intended to be used for when an application has determined the levels 1658 * of objects (character sequences) and just needs to have them reordered (L2). 1659 * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a 1660 * <code>UBiDi</code> object. 1661 * 1662 * @param levels is an array with <code>length</code> levels that have been determined by 1663 * the application. 1664 * 1665 * @param length is the number of levels in the array, or, semantically, 1666 * the number of objects to be reordered. 1667 * It must be <code>length>0</code>. 1668 * 1669 * @param indexMap is a pointer to an array of <code>length</code> 1670 * indexes which will reflect the reordering of the characters. 1671 * The array does not need to be initialized.<p> 1672 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. 1673 * @stable ICU 2.0 1674 */ 1675 U_STABLE void U_EXPORT2 1676 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); 1677 1678 /** 1679 * This is a convenience function that does not use a UBiDi object. 1680 * It is intended to be used for when an application has determined the levels 1681 * of objects (character sequences) and just needs to have them reordered (L2). 1682 * This is equivalent to using <code>ubidi_getVisualMap()</code> on a 1683 * <code>UBiDi</code> object. 1684 * 1685 * @param levels is an array with <code>length</code> levels that have been determined by 1686 * the application. 1687 * 1688 * @param length is the number of levels in the array, or, semantically, 1689 * the number of objects to be reordered. 1690 * It must be <code>length>0</code>. 1691 * 1692 * @param indexMap is a pointer to an array of <code>length</code> 1693 * indexes which will reflect the reordering of the characters. 1694 * The array does not need to be initialized.<p> 1695 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. 1696 * @stable ICU 2.0 1697 */ 1698 U_STABLE void U_EXPORT2 1699 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); 1700 1701 /** 1702 * Invert an index map. 1703 * The index mapping of the first map is inverted and written to 1704 * the second one. 1705 * 1706 * @param srcMap is an array with <code>length</code> elements 1707 * which defines the original mapping from a source array containing 1708 * <code>length</code> elements to a destination array. 1709 * Some elements of the source array may have no mapping in the 1710 * destination array. In that case, their value will be 1711 * the special value <code>UBIDI_MAP_NOWHERE</code>. 1712 * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>. 1713 * Some elements may have a value >= <code>length</code>, if the 1714 * destination array has more elements than the source array. 1715 * There must be no duplicate indexes (two or more elements with the 1716 * same value except <code>UBIDI_MAP_NOWHERE</code>). 1717 * 1718 * @param destMap is an array with a number of elements equal to 1 + the highest 1719 * value in <code>srcMap</code>. 1720 * <code>destMap</code> will be filled with the inverse mapping. 1721 * If element with index i in <code>srcMap</code> has a value k different 1722 * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of 1723 * the source array maps to element k in the destination array. 1724 * The inverse map will have value i in its k-th element. 1725 * For all elements of the destination array which do not map to 1726 * an element in the source array, the corresponding element in the 1727 * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>. 1728 * 1729 * @param length is the length of each array. 1730 * @see UBIDI_MAP_NOWHERE 1731 * @stable ICU 2.0 1732 */ 1733 U_STABLE void U_EXPORT2 1734 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); 1735 1736 /** option flags for ubidi_writeReordered() */ 1737 1738 /** 1739 * option bit for ubidi_writeReordered(): 1740 * keep combining characters after their base characters in RTL runs 1741 * 1742 * @see ubidi_writeReordered 1743 * @stable ICU 2.0 1744 */ 1745 #define UBIDI_KEEP_BASE_COMBINING 1 1746 1747 /** 1748 * option bit for ubidi_writeReordered(): 1749 * replace characters with the "mirrored" property in RTL runs 1750 * by their mirror-image mappings 1751 * 1752 * @see ubidi_writeReordered 1753 * @stable ICU 2.0 1754 */ 1755 #define UBIDI_DO_MIRRORING 2 1756 1757 /** 1758 * option bit for ubidi_writeReordered(): 1759 * surround the run with LRMs if necessary; 1760 * this is part of the approximate "inverse Bidi" algorithm 1761 * 1762 * <p>This option does not imply corresponding adjustment of the index 1763 * mappings.</p> 1764 * 1765 * @see ubidi_setInverse 1766 * @see ubidi_writeReordered 1767 * @stable ICU 2.0 1768 */ 1769 #define UBIDI_INSERT_LRM_FOR_NUMERIC 4 1770 1771 /** 1772 * option bit for ubidi_writeReordered(): 1773 * remove Bidi control characters 1774 * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC) 1775 * 1776 * <p>This option does not imply corresponding adjustment of the index 1777 * mappings.</p> 1778 * 1779 * @see ubidi_writeReordered 1780 * @stable ICU 2.0 1781 */ 1782 #define UBIDI_REMOVE_BIDI_CONTROLS 8 1783 1784 /** 1785 * option bit for ubidi_writeReordered(): 1786 * write the output in reverse order 1787 * 1788 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code> 1789 * first without this option, and then calling 1790 * <code>ubidi_writeReverse()</code> without mirroring. 1791 * Doing this in the same step is faster and avoids a temporary buffer. 1792 * An example for using this option is output to a character terminal that 1793 * is designed for RTL scripts and stores text in reverse order.</p> 1794 * 1795 * @see ubidi_writeReordered 1796 * @stable ICU 2.0 1797 */ 1798 #define UBIDI_OUTPUT_REVERSE 16 1799 1800 /** 1801 * Get the length of the source text processed by the last call to 1802 * <code>ubidi_setPara()</code>. This length may be different from the length 1803 * of the source text if option <code>#UBIDI_OPTION_STREAMING</code> 1804 * has been set. 1805 * <br> 1806 * Note that whenever the length of the text affects the execution or the 1807 * result of a function, it is the processed length which must be considered, 1808 * except for <code>ubidi_setPara</code> (which receives unprocessed source 1809 * text) and <code>ubidi_getLength</code> (which returns the original length 1810 * of the source text).<br> 1811 * In particular, the processed length is the one to consider in the following 1812 * cases: 1813 * <ul> 1814 * <li>maximum value of the <code>limit</code> argument of 1815 * <code>ubidi_setLine</code></li> 1816 * <li>maximum value of the <code>charIndex</code> argument of 1817 * <code>ubidi_getParagraph</code></li> 1818 * <li>maximum value of the <code>charIndex</code> argument of 1819 * <code>ubidi_getLevelAt</code></li> 1820 * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li> 1821 * <li>maximum value of the <code>logicalStart</code> argument of 1822 * <code>ubidi_getLogicalRun</code></li> 1823 * <li>maximum value of the <code>logicalIndex</code> argument of 1824 * <code>ubidi_getVisualIndex</code></li> 1825 * <li>number of elements filled in the <code>*indexMap</code> argument of 1826 * <code>ubidi_getLogicalMap</code></li> 1827 * <li>length of text processed by <code>ubidi_writeReordered</code></li> 1828 * </ul> 1829 * 1830 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1831 * 1832 * @return The length of the part of the source text processed by 1833 * the last call to <code>ubidi_setPara</code>. 1834 * @see ubidi_setPara 1835 * @see UBIDI_OPTION_STREAMING 1836 * @stable ICU 3.6 1837 */ 1838 U_STABLE int32_t U_EXPORT2 1839 ubidi_getProcessedLength(const UBiDi *pBiDi); 1840 1841 /** 1842 * Get the length of the reordered text resulting from the last call to 1843 * <code>ubidi_setPara()</code>. This length may be different from the length 1844 * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code> 1845 * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set. 1846 * <br> 1847 * This resulting length is the one to consider in the following cases: 1848 * <ul> 1849 * <li>maximum value of the <code>visualIndex</code> argument of 1850 * <code>ubidi_getLogicalIndex</code></li> 1851 * <li>number of elements of the <code>*indexMap</code> argument of 1852 * <code>ubidi_getVisualMap</code></li> 1853 * </ul> 1854 * Note that this length stays identical to the source text length if 1855 * Bidi marks are inserted or removed using option bits of 1856 * <code>ubidi_writeReordered</code>, or if option 1857 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set. 1858 * 1859 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1860 * 1861 * @return The length of the reordered text resulting from 1862 * the last call to <code>ubidi_setPara</code>. 1863 * @see ubidi_setPara 1864 * @see UBIDI_OPTION_INSERT_MARKS 1865 * @see UBIDI_OPTION_REMOVE_CONTROLS 1866 * @stable ICU 3.6 1867 */ 1868 U_STABLE int32_t U_EXPORT2 1869 ubidi_getResultLength(const UBiDi *pBiDi); 1870 1871 U_CDECL_BEGIN 1872 /** 1873 * value returned by <code>UBiDiClassCallback</code> callbacks when 1874 * there is no need to override the standard Bidi class for a given code point. 1875 * @see UBiDiClassCallback 1876 * @stable ICU 3.6 1877 */ 1878 #define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT 1879 1880 /** 1881 * Callback type declaration for overriding default Bidi class values with 1882 * custom ones. 1883 * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code> 1884 * object by calling the <code>ubidi_setClassCallback()</code> function; 1885 * then the callback will be invoked by the UBA implementation any time the 1886 * class of a character is to be determined.</p> 1887 * 1888 * @param context is a pointer to the callback private data. 1889 * 1890 * @param c is the code point to get a Bidi class for. 1891 * 1892 * @return The directional property / Bidi class for the given code point 1893 * <code>c</code> if the default class has been overridden, or 1894 * <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value 1895 * for <code>c</code> is to be used. 1896 * @see ubidi_setClassCallback 1897 * @see ubidi_getClassCallback 1898 * @stable ICU 3.6 1899 */ 1900 typedef UCharDirection U_CALLCONV 1901 UBiDiClassCallback(const void *context, UChar32 c); 1902 1903 U_CDECL_END 1904 1905 /** 1906 * Retrieve the Bidi class for a given code point. 1907 * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a 1908 * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used; 1909 * otherwise the default class determination mechanism is invoked.</p> 1910 * 1911 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1912 * 1913 * @param c is the code point whose Bidi class must be retrieved. 1914 * 1915 * @return The Bidi class for character <code>c</code> based 1916 * on the given <code>pBiDi</code> instance. 1917 * @see UBiDiClassCallback 1918 * @stable ICU 3.6 1919 */ 1920 U_STABLE UCharDirection U_EXPORT2 1921 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c); 1922 1923 /** 1924 * Set the callback function and callback data used by the UBA 1925 * implementation for Bidi class determination. 1926 * <p>This may be useful for assigning Bidi classes to PUA characters, or 1927 * for special application needs. For instance, an application may want to 1928 * handle all spaces like L or R characters (according to the base direction) 1929 * when creating the visual ordering of logical lines which are part of a report 1930 * organized in columns: there should not be interaction between adjacent 1931 * cells.<p> 1932 * 1933 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1934 * 1935 * @param newFn is the new callback function pointer. 1936 * 1937 * @param newContext is the new callback context pointer. This can be NULL. 1938 * 1939 * @param oldFn fillin: Returns the old callback function pointer. This can be 1940 * NULL. 1941 * 1942 * @param oldContext fillin: Returns the old callback's context. This can be 1943 * NULL. 1944 * 1945 * @param pErrorCode must be a valid pointer to an error code value. 1946 * 1947 * @see ubidi_getClassCallback 1948 * @stable ICU 3.6 1949 */ 1950 U_STABLE void U_EXPORT2 1951 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 1952 const void *newContext, UBiDiClassCallback **oldFn, 1953 const void **oldContext, UErrorCode *pErrorCode); 1954 1955 /** 1956 * Get the current callback function used for Bidi class determination. 1957 * 1958 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1959 * 1960 * @param fn fillin: Returns the callback function pointer. 1961 * 1962 * @param context fillin: Returns the callback's private context. 1963 * 1964 * @see ubidi_setClassCallback 1965 * @stable ICU 3.6 1966 */ 1967 U_STABLE void U_EXPORT2 1968 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context); 1969 1970 /** 1971 * Take a <code>UBiDi</code> object containing the reordering 1972 * information for a piece of text (one or more paragraphs) set by 1973 * <code>ubidi_setPara()</code> or for a line of text set by 1974 * <code>ubidi_setLine()</code> and write a reordered string to the 1975 * destination buffer. 1976 * 1977 * This function preserves the integrity of characters with multiple 1978 * code units and (optionally) combining characters. 1979 * Characters in RTL runs can be replaced by mirror-image characters 1980 * in the destination buffer. Note that "real" mirroring has 1981 * to be done in a rendering engine by glyph selection 1982 * and that for many "mirrored" characters there are no 1983 * Unicode characters as mirror-image equivalents. 1984 * There are also options to insert or remove Bidi control 1985 * characters; see the description of the <code>destSize</code> 1986 * and <code>options</code> parameters and of the option bit flags. 1987 * 1988 * @param pBiDi A pointer to a <code>UBiDi</code> object that 1989 * is set by <code>ubidi_setPara()</code> or 1990 * <code>ubidi_setLine()</code> and contains the reordering 1991 * information for the text that it was defined for, 1992 * as well as a pointer to that text.<br><br> 1993 * The text was aliased (only the pointer was stored 1994 * without copying the contents) and must not have been modified 1995 * since the <code>ubidi_setPara()</code> call. 1996 * 1997 * @param dest A pointer to where the reordered text is to be copied. 1998 * The source text and <code>dest[destSize]</code> 1999 * must not overlap. 2000 * 2001 * @param destSize The size of the <code>dest</code> buffer, 2002 * in number of UChars. 2003 * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code> 2004 * option is set, then the destination length could be 2005 * as large as 2006 * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>. 2007 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option 2008 * is set, then the destination length may be less than 2009 * <code>ubidi_getLength(pBiDi)</code>. 2010 * If none of these options is set, then the destination length 2011 * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>. 2012 * 2013 * @param options A bit set of options for the reordering that control 2014 * how the reordered text is written. 2015 * The options include mirroring the characters on a code 2016 * point basis and inserting LRM characters, which is used 2017 * especially for transforming visually stored text 2018 * to logically stored text (although this is still an 2019 * imperfect implementation of an "inverse Bidi" algorithm 2020 * because it uses the "forward Bidi" algorithm at its core). 2021 * The available options are: 2022 * <code>#UBIDI_DO_MIRRORING</code>, 2023 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 2024 * <code>#UBIDI_KEEP_BASE_COMBINING</code>, 2025 * <code>#UBIDI_OUTPUT_REVERSE</code>, 2026 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> 2027 * 2028 * @param pErrorCode must be a valid pointer to an error code value. 2029 * 2030 * @return The length of the output string. 2031 * 2032 * @see ubidi_getProcessedLength 2033 * @stable ICU 2.0 2034 */ 2035 U_STABLE int32_t U_EXPORT2 2036 ubidi_writeReordered(UBiDi *pBiDi, 2037 UChar *dest, int32_t destSize, 2038 uint16_t options, 2039 UErrorCode *pErrorCode); 2040 2041 /** 2042 * Reverse a Right-To-Left run of Unicode text. 2043 * 2044 * This function preserves the integrity of characters with multiple 2045 * code units and (optionally) combining characters. 2046 * Characters can be replaced by mirror-image characters 2047 * in the destination buffer. Note that "real" mirroring has 2048 * to be done in a rendering engine by glyph selection 2049 * and that for many "mirrored" characters there are no 2050 * Unicode characters as mirror-image equivalents. 2051 * There are also options to insert or remove Bidi control 2052 * characters. 2053 * 2054 * This function is the implementation for reversing RTL runs as part 2055 * of <code>ubidi_writeReordered()</code>. For detailed descriptions 2056 * of the parameters, see there. 2057 * Since no Bidi controls are inserted here, the output string length 2058 * will never exceed <code>srcLength</code>. 2059 * 2060 * @see ubidi_writeReordered 2061 * 2062 * @param src A pointer to the RTL run text. 2063 * 2064 * @param srcLength The length of the RTL run. 2065 * 2066 * @param dest A pointer to where the reordered text is to be copied. 2067 * <code>src[srcLength]</code> and <code>dest[destSize]</code> 2068 * must not overlap. 2069 * 2070 * @param destSize The size of the <code>dest</code> buffer, 2071 * in number of UChars. 2072 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option 2073 * is set, then the destination length may be less than 2074 * <code>srcLength</code>. 2075 * If this option is not set, then the destination length 2076 * will be exactly <code>srcLength</code>. 2077 * 2078 * @param options A bit set of options for the reordering that control 2079 * how the reordered text is written. 2080 * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>. 2081 * 2082 * @param pErrorCode must be a valid pointer to an error code value. 2083 * 2084 * @return The length of the output string. 2085 * @stable ICU 2.0 2086 */ 2087 U_STABLE int32_t U_EXPORT2 2088 ubidi_writeReverse(const UChar *src, int32_t srcLength, 2089 UChar *dest, int32_t destSize, 2090 uint16_t options, 2091 UErrorCode *pErrorCode); 2092 2093 /*#define BIDI_SAMPLE_CODE*/ 2094 /*@}*/ 2095 2096 #endif 2097