1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidi.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999jul27 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #ifndef UBIDI_H 18 #define UBIDI_H 19 20 #include "unicode/utypes.h" 21 #include "unicode/uchar.h" 22 23 /** 24 *\file 25 * \brief C API: Bidi algorithm 26 * 27 * <h2>Bidi algorithm for ICU</h2> 28 * 29 * This is an implementation of the Unicode Bidirectional algorithm. 30 * The algorithm is defined in the 31 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 32 * version 13, also described in The Unicode Standard, Version 4.0 .<p> 33 * 34 * Note: Libraries that perform a bidirectional algorithm and 35 * reorder strings accordingly are sometimes called "Storage Layout Engines". 36 * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such 37 * "Storage Layout Engines". 38 * 39 * <h3>General remarks about the API:</h3> 40 * 41 * In functions with an error code parameter, 42 * the <code>pErrorCode</code> pointer must be valid 43 * and the value that it points to must not indicate a failure before 44 * the function call. Otherwise, the function returns immediately. 45 * After the function call, the value indicates success or failure.<p> 46 * 47 * The "limit" of a sequence of characters is the position just after their 48 * last character, i.e., one more than that position.<p> 49 * 50 * Some of the API functions provide access to "runs". 51 * Such a "run" is defined as a sequence of characters 52 * that are at the same embedding level 53 * after performing the Bidi algorithm.<p> 54 * 55 * @author Markus W. Scherer 56 * @version 1.0 57 * 58 * 59 * <h4> Sample code for the ICU Bidi API </h4> 60 * 61 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 62 * 63 * This is (hypothetical) sample code that illustrates 64 * how the ICU Bidi API could be used to render a paragraph of text. 65 * Rendering code depends highly on the graphics system, 66 * therefore this sample code must make a lot of assumptions, 67 * which may or may not match any existing graphics system's properties. 68 * 69 * <p>The basic assumptions are:</p> 70 * <ul> 71 * <li>Rendering is done from left to right on a horizontal line.</li> 72 * <li>A run of single-style, unidirectional text can be rendered at once.</li> 73 * <li>Such a run of text is passed to the graphics system with 74 * characters (code units) in logical order.</li> 75 * <li>The line-breaking algorithm is very complicated 76 * and Locale-dependent - 77 * and therefore its implementation omitted from this sample code.</li> 78 * </ul> 79 * 80 * <pre> 81 * \code 82 *#include "unicode/ubidi.h" 83 * 84 *typedef enum { 85 * styleNormal=0, styleSelected=1, 86 * styleBold=2, styleItalics=4, 87 * styleSuper=8, styleSub=16 88 *} Style; 89 * 90 *typedef struct { int32_t limit; Style style; } StyleRun; 91 * 92 *int getTextWidth(const UChar *text, int32_t start, int32_t limit, 93 * const StyleRun *styleRuns, int styleRunCount); 94 * 95 * // set *pLimit and *pStyleRunLimit for a line 96 * // from text[start] and from styleRuns[styleRunStart] 97 * // using ubidi_getLogicalRun(para, ...) 98 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, 99 * UBiDi *para, 100 * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, 101 * int *pLineWidth); 102 * 103 * // render runs on a line sequentially, always from left to right 104 * 105 * // prepare rendering a new line 106 * void startLine(UBiDiDirection textDirection, int lineWidth); 107 * 108 * // render a run of text and advance to the right by the run width 109 * // the text[start..limit-1] is always in logical order 110 * void renderRun(const UChar *text, int32_t start, int32_t limit, 111 * UBiDiDirection textDirection, Style style); 112 * 113 * // We could compute a cross-product 114 * // from the style runs with the directional runs 115 * // and then reorder it. 116 * // Instead, here we iterate over each run type 117 * // and render the intersections - 118 * // with shortcuts in simple (and common) cases. 119 * // renderParagraph() is the main function. 120 * 121 * // render a directional run with 122 * // (possibly) multiple style runs intersecting with it 123 * void renderDirectionalRun(const UChar *text, 124 * int32_t start, int32_t limit, 125 * UBiDiDirection direction, 126 * const StyleRun *styleRuns, int styleRunCount) { 127 * int i; 128 * 129 * // iterate over style runs 130 * if(direction==UBIDI_LTR) { 131 * int styleLimit; 132 * 133 * for(i=0; i<styleRunCount; ++i) { 134 * styleLimit=styleRun[i].limit; 135 * if(start<styleLimit) { 136 * if(styleLimit>limit) { styleLimit=limit; } 137 * renderRun(text, start, styleLimit, 138 * direction, styleRun[i].style); 139 * if(styleLimit==limit) { break; } 140 * start=styleLimit; 141 * } 142 * } 143 * } else { 144 * int styleStart; 145 * 146 * for(i=styleRunCount-1; i>=0; --i) { 147 * if(i>0) { 148 * styleStart=styleRun[i-1].limit; 149 * } else { 150 * styleStart=0; 151 * } 152 * if(limit>=styleStart) { 153 * if(styleStart<start) { styleStart=start; } 154 * renderRun(text, styleStart, limit, 155 * direction, styleRun[i].style); 156 * if(styleStart==start) { break; } 157 * limit=styleStart; 158 * } 159 * } 160 * } 161 * } 162 * 163 * // the line object represents text[start..limit-1] 164 * void renderLine(UBiDi *line, const UChar *text, 165 * int32_t start, int32_t limit, 166 * const StyleRun *styleRuns, int styleRunCount) { 167 * UBiDiDirection direction=ubidi_getDirection(line); 168 * if(direction!=UBIDI_MIXED) { 169 * // unidirectional 170 * if(styleRunCount<=1) { 171 * renderRun(text, start, limit, direction, styleRuns[0].style); 172 * } else { 173 * renderDirectionalRun(text, start, limit, 174 * direction, styleRuns, styleRunCount); 175 * } 176 * } else { 177 * // mixed-directional 178 * int32_t count, i, length; 179 * UBiDiLevel level; 180 * 181 * count=ubidi_countRuns(para, pErrorCode); 182 * if(U_SUCCESS(*pErrorCode)) { 183 * if(styleRunCount<=1) { 184 * Style style=styleRuns[0].style; 185 * 186 * // iterate over directional runs 187 * for(i=0; i<count; ++i) { 188 * direction=ubidi_getVisualRun(para, i, &start, &length); 189 * renderRun(text, start, start+length, direction, style); 190 * } 191 * } else { 192 * int32_t j; 193 * 194 * // iterate over both directional and style runs 195 * for(i=0; i<count; ++i) { 196 * direction=ubidi_getVisualRun(line, i, &start, &length); 197 * renderDirectionalRun(text, start, start+length, 198 * direction, styleRuns, styleRunCount); 199 * } 200 * } 201 * } 202 * } 203 * } 204 * 205 *void renderParagraph(const UChar *text, int32_t length, 206 * UBiDiDirection textDirection, 207 * const StyleRun *styleRuns, int styleRunCount, 208 * int lineWidth, 209 * UErrorCode *pErrorCode) { 210 * UBiDi *para; 211 * 212 * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { 213 * return; 214 * } 215 * 216 * para=ubidi_openSized(length, 0, pErrorCode); 217 * if(para==NULL) { return; } 218 * 219 * ubidi_setPara(para, text, length, 220 * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, 221 * NULL, pErrorCode); 222 * if(U_SUCCESS(*pErrorCode)) { 223 * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); 224 * StyleRun styleRun={ length, styleNormal }; 225 * int width; 226 * 227 * if(styleRuns==NULL || styleRunCount<=0) { 228 * styleRunCount=1; 229 * styleRuns=&styleRun; 230 * } 231 * 232 * // assume styleRuns[styleRunCount-1].limit>=length 233 * 234 * width=getTextWidth(text, 0, length, styleRuns, styleRunCount); 235 * if(width<=lineWidth) { 236 * // everything fits onto one line 237 * 238 * // prepare rendering a new line from either left or right 239 * startLine(paraLevel, width); 240 * 241 * renderLine(para, text, 0, length, 242 * styleRuns, styleRunCount); 243 * } else { 244 * UBiDi *line; 245 * 246 * // we need to render several lines 247 * line=ubidi_openSized(length, 0, pErrorCode); 248 * if(line!=NULL) { 249 * int32_t start=0, limit; 250 * int styleRunStart=0, styleRunLimit; 251 * 252 * for(;;) { 253 * limit=length; 254 * styleRunLimit=styleRunCount; 255 * getLineBreak(text, start, &limit, para, 256 * styleRuns, styleRunStart, &styleRunLimit, 257 * &width); 258 * ubidi_setLine(para, start, limit, line, pErrorCode); 259 * if(U_SUCCESS(*pErrorCode)) { 260 * // prepare rendering a new line 261 * // from either left or right 262 * startLine(paraLevel, width); 263 * 264 * renderLine(line, text, start, limit, 265 * styleRuns+styleRunStart, 266 * styleRunLimit-styleRunStart); 267 * } 268 * if(limit==length) { break; } 269 * start=limit; 270 * styleRunStart=styleRunLimit-1; 271 * if(start>=styleRuns[styleRunStart].limit) { 272 * ++styleRunStart; 273 * } 274 * } 275 * 276 * ubidi_close(line); 277 * } 278 * } 279 * } 280 * 281 * ubidi_close(para); 282 *} 283 *\endcode 284 * </pre> 285 */ 286 287 /*DOCXX_TAG*/ 288 /*@{*/ 289 290 /** 291 * UBiDiLevel is the type of the level values in this 292 * Bidi implementation. 293 * It holds an embedding level and indicates the visual direction 294 * by its bit 0 (even/odd value).<p> 295 * 296 * It can also hold non-level values for the 297 * <code>paraLevel</code> and <code>embeddingLevels</code> 298 * arguments of <code>ubidi_setPara()</code>; there: 299 * <ul> 300 * <li>bit 7 of an <code>embeddingLevels[]</code> 301 * value indicates whether the using application is 302 * specifying the level of a character to <i>override</i> whatever the 303 * Bidi implementation would resolve it to.</li> 304 * <li><code>paraLevel</code> can be set to the 305 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code> 306 * and <code>UBIDI_DEFAULT_RTL</code>.</li> 307 * </ul> 308 * 309 * @see ubidi_setPara 310 * 311 * <p>The related constants are not real, valid level values. 312 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify 313 * a default for the paragraph level for 314 * when the <code>ubidi_setPara()</code> function 315 * shall determine it but there is no 316 * strongly typed character in the input.<p> 317 * 318 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even 319 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd, 320 * just like with normal LTR and RTL level values - 321 * these special values are designed that way. Also, the implementation 322 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 323 * 324 * @see UBIDI_DEFAULT_LTR 325 * @see UBIDI_DEFAULT_RTL 326 * @see UBIDI_LEVEL_OVERRIDE 327 * @see UBIDI_MAX_EXPLICIT_LEVEL 328 * @stable ICU 2.0 329 */ 330 typedef uint8_t UBiDiLevel; 331 332 /** Paragraph level setting.<p> 333 * 334 * Constant indicating that the base direction depends on the first strong 335 * directional character in the text according to the Unicode Bidirectional 336 * Algorithm. If no strong directional character is present, 337 * then set the paragraph level to 0 (left-to-right).<p> 338 * 339 * If this value is used in conjunction with reordering modes 340 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or 341 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 342 * is assumed to be visual LTR, and the text after reordering is required 343 * to be the corresponding logical string with appropriate contextual 344 * direction. The direction of the result string will be RTL if either 345 * the righmost or leftmost strong character of the source text is RTL 346 * or Arabic Letter, the direction will be LTR otherwise.<p> 347 * 348 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may 349 * be added at the beginning of the result string to ensure round trip 350 * (that the result string, when reordered back to visual, will produce 351 * the original source text). 352 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT 353 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL 354 * @stable ICU 2.0 355 */ 356 #define UBIDI_DEFAULT_LTR 0xfe 357 358 /** Paragraph level setting.<p> 359 * 360 * Constant indicating that the base direction depends on the first strong 361 * directional character in the text according to the Unicode Bidirectional 362 * Algorithm. If no strong directional character is present, 363 * then set the paragraph level to 1 (right-to-left).<p> 364 * 365 * If this value is used in conjunction with reordering modes 366 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or 367 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 368 * is assumed to be visual LTR, and the text after reordering is required 369 * to be the corresponding logical string with appropriate contextual 370 * direction. The direction of the result string will be RTL if either 371 * the righmost or leftmost strong character of the source text is RTL 372 * or Arabic Letter, or if the text contains no strong character; 373 * the direction will be LTR otherwise.<p> 374 * 375 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may 376 * be added at the beginning of the result string to ensure round trip 377 * (that the result string, when reordered back to visual, will produce 378 * the original source text). 379 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT 380 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL 381 * @stable ICU 2.0 382 */ 383 #define UBIDI_DEFAULT_RTL 0xff 384 385 /** 386 * Maximum explicit embedding level. 387 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>). 388 * @stable ICU 2.0 389 */ 390 #define UBIDI_MAX_EXPLICIT_LEVEL 61 391 392 /** Bit flag for level input. 393 * Overrides directional properties. 394 * @stable ICU 2.0 395 */ 396 #define UBIDI_LEVEL_OVERRIDE 0x80 397 398 /** 399 * Special value which can be returned by the mapping functions when a logical 400 * index has no corresponding visual index or vice-versa. This may happen 401 * for the logical-to-visual mapping of a Bidi control when option 402 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen 403 * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted 404 * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 405 * @see ubidi_getVisualIndex 406 * @see ubidi_getVisualMap 407 * @see ubidi_getLogicalIndex 408 * @see ubidi_getLogicalMap 409 * @stable ICU 3.6 410 */ 411 #define UBIDI_MAP_NOWHERE (-1) 412 413 /** 414 * <code>UBiDiDirection</code> values indicate the text direction. 415 * @stable ICU 2.0 416 */ 417 enum UBiDiDirection { 418 /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */ 419 UBIDI_LTR, 420 /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */ 421 UBIDI_RTL, 422 /** Mixed-directional text. @stable ICU 2.0 */ 423 UBIDI_MIXED 424 }; 425 426 /** @stable ICU 2.0 */ 427 typedef enum UBiDiDirection UBiDiDirection; 428 429 /** 430 * Forward declaration of the <code>UBiDi</code> structure for the declaration of 431 * the API functions. Its fields are implementation-specific.<p> 432 * This structure holds information about a paragraph (or multiple paragraphs) 433 * of text with Bidi-algorithm-related details, or about one line of 434 * such a paragraph.<p> 435 * Reordering can be done on a line, or on one or more paragraphs which are 436 * then interpreted each as one single line. 437 * @stable ICU 2.0 438 */ 439 struct UBiDi; 440 441 /** @stable ICU 2.0 */ 442 typedef struct UBiDi UBiDi; 443 444 /** 445 * Allocate a <code>UBiDi</code> structure. 446 * Such an object is initially empty. It is assigned 447 * the Bidi properties of a piece of text containing one or more paragraphs 448 * by <code>ubidi_setPara()</code> 449 * or the Bidi properties of a line within a paragraph by 450 * <code>ubidi_setLine()</code>.<p> 451 * This object can be reused for as long as it is not deallocated 452 * by calling <code>ubidi_close()</code>.<p> 453 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate 454 * additional memory for internal structures as necessary. 455 * 456 * @return An empty <code>UBiDi</code> object. 457 * @stable ICU 2.0 458 */ 459 U_STABLE UBiDi * U_EXPORT2 460 ubidi_open(void); 461 462 /** 463 * Allocate a <code>UBiDi</code> structure with preallocated memory 464 * for internal structures. 465 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code> 466 * with no arguments, but it also preallocates memory for internal structures 467 * according to the sizings supplied by the caller.<p> 468 * Subsequent functions will not allocate any more memory, and are thus 469 * guaranteed not to fail because of lack of memory.<p> 470 * The preallocation can be limited to some of the internal memory 471 * by setting some values to 0 here. That means that if, e.g., 472 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 473 * be set to <code>maxLength</code> (the only failproof value) to avoid 474 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 475 * and the internal structures that are associated with it will be allocated 476 * on demand, just like with <code>ubidi_open()</code>. 477 * 478 * @param maxLength is the maximum text or line length that internal memory 479 * will be preallocated for. An attempt to associate this object with a 480 * longer text will fail, unless this value is 0, which leaves the allocation 481 * up to the implementation. 482 * 483 * @param maxRunCount is the maximum anticipated number of same-level runs 484 * that internal memory will be preallocated for. An attempt to access 485 * visual runs on an object that was not preallocated for as many runs 486 * as the text was actually resolved to will fail, 487 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 488 * The number of runs depends on the actual text and maybe anywhere between 489 * 1 and <code>maxLength</code>. It is typically small. 490 * 491 * @param pErrorCode must be a valid pointer to an error code value. 492 * 493 * @return An empty <code>UBiDi</code> object with preallocated memory. 494 * @stable ICU 2.0 495 */ 496 U_STABLE UBiDi * U_EXPORT2 497 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); 498 499 /** 500 * <code>ubidi_close()</code> must be called to free the memory 501 * associated with a UBiDi object.<p> 502 * 503 * <strong>Important: </strong> 504 * A parent <code>UBiDi</code> object must not be destroyed or reused if 505 * it still has children. 506 * If a <code>UBiDi</code> object has become the <i>child</i> 507 * of another one (its <i>parent</i>) by calling 508 * <code>ubidi_setLine()</code>, then the child object must 509 * be destroyed (closed) or reused (by calling 510 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>) 511 * before the parent object. 512 * 513 * @param pBiDi is a <code>UBiDi</code> object. 514 * 515 * @see ubidi_setPara 516 * @see ubidi_setLine 517 * @stable ICU 2.0 518 */ 519 U_STABLE void U_EXPORT2 520 ubidi_close(UBiDi *pBiDi); 521 522 /** 523 * Modify the operation of the Bidi algorithm such that it 524 * approximates an "inverse Bidi" algorithm. This function 525 * must be called before <code>ubidi_setPara()</code>. 526 * 527 * <p>The normal operation of the Bidi algorithm as described 528 * in the Unicode Technical Report is to take text stored in logical 529 * (keyboard, typing) order and to determine the reordering of it for visual 530 * rendering. 531 * Some legacy systems store text in visual order, and for operations 532 * with standard, Unicode-based algorithms, the text needs to be transformed 533 * to logical order. This is effectively the inverse algorithm of the 534 * described Bidi algorithm. Note that there is no standard algorithm for 535 * this "inverse Bidi" and that the current implementation provides only an 536 * approximation of "inverse Bidi".</p> 537 * 538 * <p>With <code>isInverse</code> set to <code>TRUE</code>, 539 * this function changes the behavior of some of the subsequent functions 540 * in a way that they can be used for the inverse Bidi algorithm. 541 * Specifically, runs of text with numeric characters will be treated in a 542 * special way and may need to be surrounded with LRM characters when they are 543 * written in reordered sequence.</p> 544 * 545 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>. 546 * Since the actual input for "inverse Bidi" is visually ordered text and 547 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually 548 * the runs of the logically ordered output.</p> 549 * 550 * <p>Calling this function with argument <code>isInverse</code> set to 551 * <code>TRUE</code> is equivalent to calling 552 * <code>ubidi_setReorderingMode</code> with argument 553 * <code>reorderingMode</code> 554 * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> 555 * Calling this function with argument <code>isInverse</code> set to 556 * <code>FALSE</code> is equivalent to calling 557 * <code>ubidi_setReorderingMode</code> with argument 558 * <code>reorderingMode</code> 559 * set to <code>#UBIDI_REORDER_DEFAULT</code>. 560 * 561 * @param pBiDi is a <code>UBiDi</code> object. 562 * 563 * @param isInverse specifies "forward" or "inverse" Bidi operation. 564 * 565 * @see ubidi_setPara 566 * @see ubidi_writeReordered 567 * @see ubidi_setReorderingMode 568 * @stable ICU 2.0 569 */ 570 U_STABLE void U_EXPORT2 571 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); 572 573 /** 574 * Is this Bidi object set to perform the inverse Bidi algorithm? 575 * <p>Note: calling this function after setting the reordering mode with 576 * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the 577 * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, 578 * <code>FALSE</code> for all other values.</p> 579 * 580 * @param pBiDi is a <code>UBiDi</code> object. 581 * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm 582 * by handling numbers as L. 583 * 584 * @see ubidi_setInverse 585 * @see ubidi_setReorderingMode 586 * @stable ICU 2.0 587 */ 588 589 U_STABLE UBool U_EXPORT2 590 ubidi_isInverse(UBiDi *pBiDi); 591 592 /** 593 * Specify whether block separators must be allocated level zero, 594 * so that successive paragraphs will progress from left to right. 595 * This function must be called before <code>ubidi_setPara()</code>. 596 * Paragraph separators (B) may appear in the text. Setting them to level zero 597 * means that all paragraph separators (including one possibly appearing 598 * in the last text position) are kept in the reordered text after the text 599 * that they follow in the source text. 600 * When this feature is not enabled, a paragraph separator at the last 601 * position of the text before reordering will go to the first position 602 * of the reordered text when the paragraph level is odd. 603 * 604 * @param pBiDi is a <code>UBiDi</code> object. 605 * 606 * @param orderParagraphsLTR specifies whether paragraph separators (B) must 607 * receive level 0, so that successive paragraphs progress from left to right. 608 * 609 * @see ubidi_setPara 610 * @stable ICU 3.4 611 */ 612 U_STABLE void U_EXPORT2 613 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR); 614 615 /** 616 * Is this Bidi object set to allocate level 0 to block separators so that 617 * successive paragraphs progress from left to right? 618 * 619 * @param pBiDi is a <code>UBiDi</code> object. 620 * @return TRUE if the Bidi object is set to allocate level 0 to block 621 * separators. 622 * 623 * @see ubidi_orderParagraphsLTR 624 * @stable ICU 3.4 625 */ 626 U_STABLE UBool U_EXPORT2 627 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); 628 629 /** 630 * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi 631 * algorithm to use. 632 * 633 * @see ubidi_setReorderingMode 634 * @stable ICU 3.6 635 */ 636 typedef enum UBiDiReorderingMode { 637 /** Regular Logical to Visual Bidi algorithm according to Unicode. 638 * This is a 0 value. 639 * @stable ICU 3.6 */ 640 UBIDI_REORDER_DEFAULT = 0, 641 /** Logical to Visual algorithm which handles numbers in a way which 642 * mimicks the behavior of Windows XP. 643 * @stable ICU 3.6 */ 644 UBIDI_REORDER_NUMBERS_SPECIAL, 645 /** Logical to Visual algorithm grouping numbers with adjacent R characters 646 * (reversible algorithm). 647 * @stable ICU 3.6 */ 648 UBIDI_REORDER_GROUP_NUMBERS_WITH_R, 649 /** Reorder runs only to transform a Logical LTR string to the Logical RTL 650 * string with the same display, or vice-versa.<br> 651 * If this mode is set together with option 652 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source 653 * text may be removed and other controls may be added to produce the 654 * minimum combination which has the required display. 655 * @stable ICU 3.6 */ 656 UBIDI_REORDER_RUNS_ONLY, 657 /** Visual to Logical algorithm which handles numbers like L 658 * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>. 659 * @see ubidi_setInverse 660 * @stable ICU 3.6 */ 661 UBIDI_REORDER_INVERSE_NUMBERS_AS_L, 662 /** Visual to Logical algorithm equivalent to the regular Logical to Visual 663 * algorithm. 664 * @stable ICU 3.6 */ 665 UBIDI_REORDER_INVERSE_LIKE_DIRECT, 666 /** Inverse Bidi (Visual to Logical) algorithm for the 667 * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 668 * @stable ICU 3.6 */ 669 UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, 670 /** Number of values for reordering mode. 671 * @stable ICU 3.6 */ 672 UBIDI_REORDER_COUNT 673 } UBiDiReorderingMode; 674 675 /** 676 * Modify the operation of the Bidi algorithm such that it implements some 677 * variant to the basic Bidi algorithm or approximates an "inverse Bidi" 678 * algorithm, depending on different values of the "reordering mode". 679 * This function must be called before <code>ubidi_setPara()</code>, and stays 680 * in effect until called again with a different argument. 681 * 682 * <p>The normal operation of the Bidi algorithm as described 683 * in the Unicode Standard Annex #9 is to take text stored in logical 684 * (keyboard, typing) order and to determine how to reorder it for visual 685 * rendering.</p> 686 * 687 * <p>With the reordering mode set to a value other than 688 * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of 689 * some of the subsequent functions in a way such that they implement an 690 * inverse Bidi algorithm or some other algorithm variants.</p> 691 * 692 * <p>Some legacy systems store text in visual order, and for operations 693 * with standard, Unicode-based algorithms, the text needs to be transformed 694 * into logical order. This is effectively the inverse algorithm of the 695 * described Bidi algorithm. Note that there is no standard algorithm for 696 * this "inverse Bidi", so a number of variants are implemented here.</p> 697 * 698 * <p>In other cases, it may be desirable to emulate some variant of the 699 * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a 700 * Logical to Logical transformation.</p> 701 * 702 * <ul> 703 * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>, 704 * the standard Bidi Logical to Visual algorithm is applied.</li> 705 * 706 * <li>When the reordering mode is set to 707 * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>, 708 * the algorithm used to perform Bidi transformations when calling 709 * <code>ubidi_setPara</code> should approximate the algorithm used in 710 * Microsoft Windows XP rather than strictly conform to the Unicode Bidi 711 * algorithm. 712 * <br> 713 * The differences between the basic algorithm and the algorithm addressed 714 * by this option are as follows: 715 * <ul> 716 * <li>Within text at an even embedding level, the sequence "123AB" 717 * (where AB represent R or AL letters) is transformed to "123BA" by the 718 * Unicode algorithm and to "BA123" by the Windows algorithm.</li> 719 * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just 720 * like regular numbers (EN).</li> 721 * </ul></li> 722 * 723 * <li>When the reordering mode is set to 724 * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>, 725 * numbers located between LTR text and RTL text are associated with the RTL 726 * text. For instance, an LTR paragraph with content "abc 123 DEF" (where 727 * upper case letters represent RTL characters) will be transformed to 728 * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed 729 * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc". 730 * This makes the algorithm reversible and makes it useful when round trip 731 * (from visual to logical and back to visual) must be achieved without 732 * adding LRM characters. However, this is a variation from the standard 733 * Unicode Bidi algorithm.<br> 734 * The source text should not contain Bidi control characters other than LRM 735 * or RLM.</li> 736 * 737 * <li>When the reordering mode is set to 738 * <code>#UBIDI_REORDER_RUNS_ONLY</code>, 739 * a "Logical to Logical" transformation must be performed: 740 * <ul> 741 * <li>If the default text level of the source text (argument <code>paraLevel</code> 742 * in <code>ubidi_setPara</code>) is even, the source text will be handled as 743 * LTR logical text and will be transformed to the RTL logical text which has 744 * the same LTR visual display.</li> 745 * <li>If the default level of the source text is odd, the source text 746 * will be handled as RTL logical text and will be transformed to the 747 * LTR logical text which has the same LTR visual display.</li> 748 * </ul> 749 * This mode may be needed when logical text which is basically Arabic or 750 * Hebrew, with possible included numbers or phrases in English, has to be 751 * displayed as if it had an even embedding level (this can happen if the 752 * displaying application treats all text as if it was basically LTR). 753 * <br> 754 * This mode may also be needed in the reverse case, when logical text which is 755 * basically English, with possible included phrases in Arabic or Hebrew, has to 756 * be displayed as if it had an odd embedding level. 757 * <br> 758 * Both cases could be handled by adding LRE or RLE at the head of the text, 759 * if the display subsystem supports these formatting controls. If it does not, 760 * the problem may be handled by transforming the source text in this mode 761 * before displaying it, so that it will be displayed properly.<br> 762 * The source text should not contain Bidi control characters other than LRM 763 * or RLM.</li> 764 * 765 * <li>When the reordering mode is set to 766 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm 767 * is applied. 768 * Runs of text with numeric characters will be treated like LTR letters and 769 * may need to be surrounded with LRM characters when they are written in 770 * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can 771 * be used with function <code>ubidi_writeReordered</code> to this end. This 772 * mode is equivalent to calling <code>ubidi_setInverse()</code> with 773 * argument <code>isInverse</code> set to <code>TRUE</code>.</li> 774 * 775 * <li>When the reordering mode is set to 776 * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual 777 * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm. 778 * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> 779 * but is closer to the regular Bidi algorithm. 780 * <br> 781 * For example, an LTR paragraph with the content "FED 123 456 CBA" (where 782 * upper case represents RTL characters) will be transformed to 783 * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC" 784 * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> 785 * When used in conjunction with option 786 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally 787 * adds Bidi marks to the output significantly more sparingly than mode 788 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option 789 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to 790 * <code>ubidi_writeReordered</code>.</li> 791 * 792 * <li>When the reordering mode is set to 793 * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual 794 * Bidi algorithm used in Windows XP is used as an approximation of an 795 * "inverse Bidi" algorithm. 796 * <br> 797 * For example, an LTR paragraph with the content "abc FED123" (where 798 * upper case represents RTL characters) will be transformed to 799 * "abc 123DEF.</li> 800 * </ul> 801 * 802 * <p>In all the reordering modes specifying an "inverse Bidi" algorithm 803 * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>), 804 * output runs should be retrieved using 805 * <code>ubidi_getVisualRun()</code>, and the output text with 806 * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in 807 * "inverse Bidi" modes the input is actually visually ordered text and 808 * reordered output returned by <code>ubidi_getVisualRun()</code> or 809 * <code>ubidi_writeReordered()</code> are actually runs or character string 810 * of logically ordered output.<br> 811 * For all the "inverse Bidi" modes, the source text should not contain 812 * Bidi control characters other than LRM or RLM.</p> 813 * 814 * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of 815 * <code>ubidi_writeReordered</code> has no useful meaning and should not be 816 * used in conjunction with any value of the reordering mode specifying 817 * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>. 818 * 819 * @param pBiDi is a <code>UBiDi</code> object. 820 * @param reorderingMode specifies the required variant of the Bidi algorithm. 821 * 822 * @see UBiDiReorderingMode 823 * @see ubidi_setInverse 824 * @see ubidi_setPara 825 * @see ubidi_writeReordered 826 * @stable ICU 3.6 827 */ 828 U_STABLE void U_EXPORT2 829 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode); 830 831 /** 832 * What is the requested reordering mode for a given Bidi object? 833 * 834 * @param pBiDi is a <code>UBiDi</code> object. 835 * @return the current reordering mode of the Bidi object 836 * @see ubidi_setReorderingMode 837 * @stable ICU 3.6 838 */ 839 U_STABLE UBiDiReorderingMode U_EXPORT2 840 ubidi_getReorderingMode(UBiDi *pBiDi); 841 842 /** 843 * <code>UBiDiReorderingOption</code> values indicate which options are 844 * specified to affect the Bidi algorithm. 845 * 846 * @see ubidi_setReorderingOptions 847 * @stable ICU 3.6 848 */ 849 typedef enum UBiDiReorderingOption { 850 /** 851 * option value for <code>ubidi_setReorderingOptions</code>: 852 * disable all the options which can be set with this function 853 * @see ubidi_setReorderingOptions 854 * @stable ICU 3.6 855 */ 856 UBIDI_OPTION_DEFAULT = 0, 857 858 /** 859 * option bit for <code>ubidi_setReorderingOptions</code>: 860 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 861 * a reordering to a Logical order 862 * 863 * <p>This option must be set or reset before calling 864 * <code>ubidi_setPara</code>.</p> 865 * 866 * <p>This option is significant only with reordering modes which generate 867 * a result with Logical order, specifically:</p> 868 * <ul> 869 * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li> 870 * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li> 871 * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li> 872 * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 873 * </ul> 874 * 875 * <p>If this option is set in conjunction with reordering mode 876 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 877 * <code>ubidi_setInverse(TRUE)</code>, it implies 878 * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> 879 * in calls to function <code>ubidi_writeReordered()</code>.</p> 880 * 881 * <p>For other reordering modes, a minimum number of LRM or RLM characters 882 * will be added to the source text after reordering it so as to ensure 883 * round trip, i.e. when applying the inverse reordering mode on the 884 * resulting logical text with removal of Bidi marks 885 * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling 886 * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> 887 * in <code>ubidi_writeReordered</code>), the result will be identical to the 888 * source text in the first transformation. 889 * 890 * <p>This option will be ignored if specified together with option 891 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option 892 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function 893 * <code>ubidi_writeReordered()</code> and it implies option 894 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function 895 * <code>ubidi_writeReordered()</code> if the reordering mode is 896 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 897 * 898 * @see ubidi_setReorderingMode 899 * @see ubidi_setReorderingOptions 900 * @stable ICU 3.6 901 */ 902 UBIDI_OPTION_INSERT_MARKS = 1, 903 904 /** 905 * option bit for <code>ubidi_setReorderingOptions</code>: 906 * remove Bidi control characters 907 * 908 * <p>This option must be set or reset before calling 909 * <code>ubidi_setPara</code>.</p> 910 * 911 * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 912 * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls 913 * to function <code>ubidi_writeReordered()</code> and it implies option 914 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p> 915 * 916 * @see ubidi_setReorderingMode 917 * @see ubidi_setReorderingOptions 918 * @stable ICU 3.6 919 */ 920 UBIDI_OPTION_REMOVE_CONTROLS = 2, 921 922 /** 923 * option bit for <code>ubidi_setReorderingOptions</code>: 924 * process the output as part of a stream to be continued 925 * 926 * <p>This option must be set or reset before calling 927 * <code>ubidi_setPara</code>.</p> 928 * 929 * <p>This option specifies that the caller is interested in processing large 930 * text object in parts. 931 * The results of the successive calls are expected to be concatenated by the 932 * caller. Only the call for the last part will have this option bit off.</p> 933 * 934 * <p>When this option bit is on, <code>ubidi_setPara()</code> may process 935 * less than the full source text in order to truncate the text at a meaningful 936 * boundary. The caller should call <code>ubidi_getProcessedLength()</code> 937 * immediately after calling <code>ubidi_setPara()</code> in order to 938 * determine how much of the source text has been processed. 939 * Source text beyond that length should be resubmitted in following calls to 940 * <code>ubidi_setPara</code>. The processed length may be less than 941 * the length of the source text if a character preceding the last character of 942 * the source text constitutes a reasonable boundary (like a block separator) 943 * for text to be continued.<br> 944 * If the last character of the source text constitutes a reasonable 945 * boundary, the whole text will be processed at once.<br> 946 * If nowhere in the source text there exists 947 * such a reasonable boundary, the processed length will be zero.<br> 948 * The caller should check for such an occurrence and do one of the following: 949 * <ul><li>submit a larger amount of text with a better chance to include 950 * a reasonable boundary.</li> 951 * <li>resubmit the same text after turning off option 952 * <code>UBIDI_OPTION_STREAMING</code>.</li></ul> 953 * In all cases, this option should be turned off before processing the last 954 * part of the text.</p> 955 * 956 * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used, 957 * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with 958 * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before 959 * calling <code>ubidi_setPara</code> so that later paragraphs may be 960 * concatenated to previous paragraphs on the right.</p> 961 * 962 * @see ubidi_setReorderingMode 963 * @see ubidi_setReorderingOptions 964 * @see ubidi_getProcessedLength 965 * @see ubidi_orderParagraphsLTR 966 * @stable ICU 3.6 967 */ 968 UBIDI_OPTION_STREAMING = 4 969 } UBiDiReorderingOption; 970 971 /** 972 * Specify which of the reordering options 973 * should be applied during Bidi transformations. 974 * 975 * @param pBiDi is a <code>UBiDi</code> object. 976 * @param reorderingOptions is a combination of zero or more of the following 977 * options: 978 * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>, 979 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>. 980 * 981 * @see ubidi_getReorderingOptions 982 * @stable ICU 3.6 983 */ 984 U_STABLE void U_EXPORT2 985 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions); 986 987 /** 988 * What are the reordering options applied to a given Bidi object? 989 * 990 * @param pBiDi is a <code>UBiDi</code> object. 991 * @return the current reordering options of the Bidi object 992 * @see ubidi_setReorderingOptions 993 * @stable ICU 3.6 994 */ 995 U_STABLE uint32_t U_EXPORT2 996 ubidi_getReorderingOptions(UBiDi *pBiDi); 997 998 /** 999 * Perform the Unicode Bidi algorithm. It is defined in the 1000 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>, 1001 * version 13, 1002 * also described in The Unicode Standard, Version 4.0 .<p> 1003 * 1004 * This function takes a piece of plain text containing one or more paragraphs, 1005 * with or without externally specified embedding levels from <i>styled</i> 1006 * text and computes the left-right-directionality of each character.<p> 1007 * 1008 * If the entire text is all of the same directionality, then 1009 * the function may not perform all the steps described by the algorithm, 1010 * i.e., some levels may not be the same as if all steps were performed. 1011 * This is not relevant for unidirectional text.<br> 1012 * For example, in pure LTR text with numbers the numbers would get 1013 * a resolved level of 2 higher than the surrounding text according to 1014 * the algorithm. This implementation may set all resolved levels to 1015 * the same value in such a case.<p> 1016 * 1017 * The text can be composed of multiple paragraphs. Occurrence of a block 1018 * separator in the text terminates a paragraph, and whatever comes next starts 1019 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 1020 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 1021 * in that case, the pair of characters is considered as terminating the 1022 * preceding paragraph, and a new paragraph will be started by a character 1023 * coming after the LF. 1024 * 1025 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code> 1026 * which will be set to contain the reordering information, 1027 * especially the resolved levels for all the characters in <code>text</code>. 1028 * 1029 * @param text is a pointer to the text that the Bidi algorithm will be performed on. 1030 * This pointer is stored in the UBiDi object and can be retrieved 1031 * with <code>ubidi_getText()</code>.<br> 1032 * <strong>Note:</strong> the text must be (at least) <code>length</code> long. 1033 * 1034 * @param length is the length of the text; if <code>length==-1</code> then 1035 * the text must be zero-terminated. 1036 * 1037 * @param paraLevel specifies the default level for the text; 1038 * it is typically 0 (LTR) or 1 (RTL). 1039 * If the function shall determine the paragraph level from the text, 1040 * then <code>paraLevel</code> can be set to 1041 * either <code>#UBIDI_DEFAULT_LTR</code> 1042 * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple 1043 * paragraphs, the paragraph level shall be determined separately for 1044 * each paragraph; if a paragraph does not include any strongly typed 1045 * character, then the desired default is used (0 for LTR or 1 for RTL). 1046 * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code> 1047 * is also valid, with odd levels indicating RTL. 1048 * 1049 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 1050 * ignoring characters like LRE and PDF in the text. 1051 * A level overrides the directional property of its corresponding 1052 * (same index) character if the level has the 1053 * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br> 1054 * Except for that bit, it must be 1055 * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>, 1056 * with one exception: a level of zero may be specified for a paragraph 1057 * separator even if <code>paraLevel>0</code> when multiple paragraphs 1058 * are submitted in the same call to <code>ubidi_setPara()</code>.<br><br> 1059 * <strong>Caution: </strong>A copy of this pointer, not of the levels, 1060 * will be stored in the <code>UBiDi</code> object; 1061 * the <code>embeddingLevels</code> array must not be 1062 * deallocated before the <code>UBiDi</code> structure is destroyed or reused, 1063 * and the <code>embeddingLevels</code> 1064 * should not be modified to avoid unexpected results on subsequent Bidi operations. 1065 * However, the <code>ubidi_setPara()</code> and 1066 * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br> 1067 * After the <code>UBiDi</code> object is reused or destroyed, the caller 1068 * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br> 1069 * <strong>Note:</strong> the <code>embeddingLevels</code> array must be 1070 * at least <code>length</code> long. 1071 * This pointer can be <code>NULL</code> if this 1072 * value is not necessary. 1073 * 1074 * @param pErrorCode must be a valid pointer to an error code value. 1075 * @stable ICU 2.0 1076 */ 1077 U_STABLE void U_EXPORT2 1078 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1079 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1080 UErrorCode *pErrorCode); 1081 1082 /** 1083 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to 1084 * contain the reordering information, especially the resolved levels, 1085 * for all the characters in a line of text. This line of text is 1086 * specified by referring to a <code>UBiDi</code> object representing 1087 * this information for a piece of text containing one or more paragraphs, 1088 * and by specifying a range of indexes in this text.<p> 1089 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 1090 * 1091 * This is used after calling <code>ubidi_setPara()</code> 1092 * for a piece of text, and after line-breaking on that text. 1093 * It is not necessary if each paragraph is treated as a single line.<p> 1094 * 1095 * After line-breaking, rules (L1) and (L2) for the treatment of 1096 * trailing WS and for reordering are performed on 1097 * a <code>UBiDi</code> object that represents a line.<p> 1098 * 1099 * <strong>Important: </strong><code>pLineBiDi</code> shares data with 1100 * <code>pParaBiDi</code>. 1101 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>. 1102 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line 1103 * before the object for its parent paragraph.<p> 1104 * 1105 * The text pointer that was stored in <code>pParaBiDi</code> is also copied, 1106 * and <code>start</code> is added to it so that it points to the beginning of the 1107 * line for this object. 1108 * 1109 * @param pParaBiDi is the parent paragraph object. It must have been set 1110 * by a successful call to ubidi_setPara. 1111 * 1112 * @param start is the line's first index into the text. 1113 * 1114 * @param limit is just behind the line's last index into the text 1115 * (its last index +1).<br> 1116 * It must be <code>0<=start<limit<=</code>containing paragraph limit. 1117 * If the specified line crosses a paragraph boundary, the function 1118 * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR. 1119 * 1120 * @param pLineBiDi is the object that will now represent a line of the text. 1121 * 1122 * @param pErrorCode must be a valid pointer to an error code value. 1123 * 1124 * @see ubidi_setPara 1125 * @see ubidi_getProcessedLength 1126 * @stable ICU 2.0 1127 */ 1128 U_STABLE void U_EXPORT2 1129 ubidi_setLine(const UBiDi *pParaBiDi, 1130 int32_t start, int32_t limit, 1131 UBiDi *pLineBiDi, 1132 UErrorCode *pErrorCode); 1133 1134 /** 1135 * Get the directionality of the text. 1136 * 1137 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1138 * 1139 * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code> 1140 * or <code>UBIDI_MIXED</code> 1141 * that indicates if the entire text 1142 * represented by this object is unidirectional, 1143 * and which direction, or if it is mixed-directional. 1144 * 1145 * @see UBiDiDirection 1146 * @stable ICU 2.0 1147 */ 1148 U_STABLE UBiDiDirection U_EXPORT2 1149 ubidi_getDirection(const UBiDi *pBiDi); 1150 1151 /** 1152 * Get the pointer to the text. 1153 * 1154 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1155 * 1156 * @return The pointer to the text that the UBiDi object was created for. 1157 * 1158 * @see ubidi_setPara 1159 * @see ubidi_setLine 1160 * @stable ICU 2.0 1161 */ 1162 U_STABLE const UChar * U_EXPORT2 1163 ubidi_getText(const UBiDi *pBiDi); 1164 1165 /** 1166 * Get the length of the text. 1167 * 1168 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1169 * 1170 * @return The length of the text that the UBiDi object was created for. 1171 * @stable ICU 2.0 1172 */ 1173 U_STABLE int32_t U_EXPORT2 1174 ubidi_getLength(const UBiDi *pBiDi); 1175 1176 /** 1177 * Get the paragraph level of the text. 1178 * 1179 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1180 * 1181 * @return The paragraph level. If there are multiple paragraphs, their 1182 * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or 1183 * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph 1184 * is returned. 1185 * 1186 * @see UBiDiLevel 1187 * @see ubidi_getParagraph 1188 * @see ubidi_getParagraphByIndex 1189 * @stable ICU 2.0 1190 */ 1191 U_STABLE UBiDiLevel U_EXPORT2 1192 ubidi_getParaLevel(const UBiDi *pBiDi); 1193 1194 /** 1195 * Get the number of paragraphs. 1196 * 1197 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1198 * 1199 * @return The number of paragraphs. 1200 * @stable ICU 3.4 1201 */ 1202 U_STABLE int32_t U_EXPORT2 1203 ubidi_countParagraphs(UBiDi *pBiDi); 1204 1205 /** 1206 * Get a paragraph, given a position within the text. 1207 * This function returns information about a paragraph.<br> 1208 * Note: if the paragraph index is known, it is more efficient to 1209 * retrieve the paragraph information using ubidi_getParagraphByIndex().<p> 1210 * 1211 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1212 * 1213 * @param charIndex is the index of a character within the text, in the 1214 * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>. 1215 * 1216 * @param pParaStart will receive the index of the first character of the 1217 * paragraph in the text. 1218 * This pointer can be <code>NULL</code> if this 1219 * value is not necessary. 1220 * 1221 * @param pParaLimit will receive the limit of the paragraph. 1222 * The l-value that you point to here may be the 1223 * same expression (variable) as the one for 1224 * <code>charIndex</code>. 1225 * This pointer can be <code>NULL</code> if this 1226 * value is not necessary. 1227 * 1228 * @param pParaLevel will receive the level of the paragraph. 1229 * This pointer can be <code>NULL</code> if this 1230 * value is not necessary. 1231 * 1232 * @param pErrorCode must be a valid pointer to an error code value. 1233 * 1234 * @return The index of the paragraph containing the specified position. 1235 * 1236 * @see ubidi_getProcessedLength 1237 * @stable ICU 3.4 1238 */ 1239 U_STABLE int32_t U_EXPORT2 1240 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, 1241 int32_t *pParaLimit, UBiDiLevel *pParaLevel, 1242 UErrorCode *pErrorCode); 1243 1244 /** 1245 * Get a paragraph, given the index of this paragraph. 1246 * 1247 * This function returns information about a paragraph.<p> 1248 * 1249 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1250 * 1251 * @param paraIndex is the number of the paragraph, in the 1252 * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>. 1253 * 1254 * @param pParaStart will receive the index of the first character of the 1255 * paragraph in the text. 1256 * This pointer can be <code>NULL</code> if this 1257 * value is not necessary. 1258 * 1259 * @param pParaLimit will receive the limit of the paragraph. 1260 * This pointer can be <code>NULL</code> if this 1261 * value is not necessary. 1262 * 1263 * @param pParaLevel will receive the level of the paragraph. 1264 * This pointer can be <code>NULL</code> if this 1265 * value is not necessary. 1266 * 1267 * @param pErrorCode must be a valid pointer to an error code value. 1268 * 1269 * @stable ICU 3.4 1270 */ 1271 U_STABLE void U_EXPORT2 1272 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 1273 int32_t *pParaStart, int32_t *pParaLimit, 1274 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); 1275 1276 /** 1277 * Get the level for one character. 1278 * 1279 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1280 * 1281 * @param charIndex the index of a character. It must be in the range 1282 * [0..ubidi_getProcessedLength(pBiDi)]. 1283 * 1284 * @return The level for the character at charIndex (0 if charIndex is not 1285 * in the valid range). 1286 * 1287 * @see UBiDiLevel 1288 * @see ubidi_getProcessedLength 1289 * @stable ICU 2.0 1290 */ 1291 U_STABLE UBiDiLevel U_EXPORT2 1292 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); 1293 1294 /** 1295 * Get an array of levels for each character.<p> 1296 * 1297 * Note that this function may allocate memory under some 1298 * circumstances, unlike <code>ubidi_getLevelAt()</code>. 1299 * 1300 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose 1301 * text length must be strictly positive. 1302 * 1303 * @param pErrorCode must be a valid pointer to an error code value. 1304 * 1305 * @return The levels array for the text, 1306 * or <code>NULL</code> if an error occurs. 1307 * 1308 * @see UBiDiLevel 1309 * @see ubidi_getProcessedLength 1310 * @stable ICU 2.0 1311 */ 1312 U_STABLE const UBiDiLevel * U_EXPORT2 1313 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); 1314 1315 /** 1316 * Get a logical run. 1317 * This function returns information about a run and is used 1318 * to retrieve runs in logical order.<p> 1319 * This is especially useful for line-breaking on a paragraph. 1320 * 1321 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1322 * 1323 * @param logicalPosition is a logical position within the source text. 1324 * 1325 * @param pLogicalLimit will receive the limit of the corresponding run. 1326 * The l-value that you point to here may be the 1327 * same expression (variable) as the one for 1328 * <code>logicalPosition</code>. 1329 * This pointer can be <code>NULL</code> if this 1330 * value is not necessary. 1331 * 1332 * @param pLevel will receive the level of the corresponding run. 1333 * This pointer can be <code>NULL</code> if this 1334 * value is not necessary. 1335 * 1336 * @see ubidi_getProcessedLength 1337 * @stable ICU 2.0 1338 */ 1339 U_STABLE void U_EXPORT2 1340 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, 1341 int32_t *pLogicalLimit, UBiDiLevel *pLevel); 1342 1343 /** 1344 * Get the number of runs. 1345 * This function may invoke the actual reordering on the 1346 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code> 1347 * may have resolved only the levels of the text. Therefore, 1348 * <code>ubidi_countRuns()</code> may have to allocate memory, 1349 * and may fail doing so. 1350 * 1351 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1352 * 1353 * @param pErrorCode must be a valid pointer to an error code value. 1354 * 1355 * @return The number of runs. 1356 * @stable ICU 2.0 1357 */ 1358 U_STABLE int32_t U_EXPORT2 1359 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); 1360 1361 /** 1362 * Get one run's logical start, length, and directionality, 1363 * which can be 0 for LTR or 1 for RTL. 1364 * In an RTL run, the character at the logical start is 1365 * visually on the right of the displayed run. 1366 * The length is the number of characters in the run.<p> 1367 * <code>ubidi_countRuns()</code> should be called 1368 * before the runs are retrieved. 1369 * 1370 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1371 * 1372 * @param runIndex is the number of the run in visual order, in the 1373 * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>. 1374 * 1375 * @param pLogicalStart is the first logical character index in the text. 1376 * The pointer may be <code>NULL</code> if this index is not needed. 1377 * 1378 * @param pLength is the number of characters (at least one) in the run. 1379 * The pointer may be <code>NULL</code> if this is not needed. 1380 * 1381 * @return the directionality of the run, 1382 * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>, 1383 * never <code>UBIDI_MIXED</code>. 1384 * 1385 * @see ubidi_countRuns 1386 * 1387 * Example: 1388 * <pre> 1389 * \code 1390 * int32_t i, count=ubidi_countRuns(pBiDi), 1391 * logicalStart, visualIndex=0, length; 1392 * for(i=0; i<count; ++i) { 1393 * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) { 1394 * do { // LTR 1395 * show_char(text[logicalStart++], visualIndex++); 1396 * } while(--length>0); 1397 * } else { 1398 * logicalStart+=length; // logicalLimit 1399 * do { // RTL 1400 * show_char(text[--logicalStart], visualIndex++); 1401 * } while(--length>0); 1402 * } 1403 * } 1404 *\endcode 1405 * </pre> 1406 * 1407 * Note that in right-to-left runs, code like this places 1408 * second surrogates before first ones (which is generally a bad idea) 1409 * and combining characters before base characters. 1410 * <p> 1411 * Use of <code>ubidi_writeReordered()</code>, optionally with the 1412 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order 1413 * to avoid these issues. 1414 * @stable ICU 2.0 1415 */ 1416 U_STABLE UBiDiDirection U_EXPORT2 1417 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, 1418 int32_t *pLogicalStart, int32_t *pLength); 1419 1420 /** 1421 * Get the visual position from a logical text position. 1422 * If such a mapping is used many times on the same 1423 * <code>UBiDi</code> object, then calling 1424 * <code>ubidi_getLogicalMap()</code> is more efficient.<p> 1425 * 1426 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no 1427 * visual position because the corresponding text character is a Bidi control 1428 * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. 1429 * <p> 1430 * When the visual output is altered by using options of 1431 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1432 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1433 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not 1434 * be correct. It is advised to use, when possible, reordering options 1435 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1436 * <p> 1437 * Note that in right-to-left runs, this mapping places 1438 * second surrogates before first ones (which is generally a bad idea) 1439 * and combining characters before base characters. 1440 * Use of <code>ubidi_writeReordered()</code>, optionally with the 1441 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead 1442 * of using the mapping, in order to avoid these issues. 1443 * 1444 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1445 * 1446 * @param logicalIndex is the index of a character in the text. 1447 * 1448 * @param pErrorCode must be a valid pointer to an error code value. 1449 * 1450 * @return The visual position of this character. 1451 * 1452 * @see ubidi_getLogicalMap 1453 * @see ubidi_getLogicalIndex 1454 * @see ubidi_getProcessedLength 1455 * @stable ICU 2.0 1456 */ 1457 U_STABLE int32_t U_EXPORT2 1458 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); 1459 1460 /** 1461 * Get the logical text position from a visual position. 1462 * If such a mapping is used many times on the same 1463 * <code>UBiDi</code> object, then calling 1464 * <code>ubidi_getVisualMap()</code> is more efficient.<p> 1465 * 1466 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no 1467 * logical position because the corresponding text character is a Bidi mark 1468 * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 1469 * <p> 1470 * This is the inverse function to <code>ubidi_getVisualIndex()</code>. 1471 * <p> 1472 * When the visual output is altered by using options of 1473 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1474 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1475 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not 1476 * be correct. It is advised to use, when possible, reordering options 1477 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1478 * 1479 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1480 * 1481 * @param visualIndex is the visual position of a character. 1482 * 1483 * @param pErrorCode must be a valid pointer to an error code value. 1484 * 1485 * @return The index of this character in the text. 1486 * 1487 * @see ubidi_getVisualMap 1488 * @see ubidi_getVisualIndex 1489 * @see ubidi_getResultLength 1490 * @stable ICU 2.0 1491 */ 1492 U_STABLE int32_t U_EXPORT2 1493 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); 1494 1495 /** 1496 * Get a logical-to-visual index map (array) for the characters in the UBiDi 1497 * (paragraph or line) object. 1498 * <p> 1499 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the 1500 * corresponding text characters are Bidi controls removed from the visual 1501 * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. 1502 * <p> 1503 * When the visual output is altered by using options of 1504 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1505 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1506 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not 1507 * be correct. It is advised to use, when possible, reordering options 1508 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1509 * <p> 1510 * Note that in right-to-left runs, this mapping places 1511 * second surrogates before first ones (which is generally a bad idea) 1512 * and combining characters before base characters. 1513 * Use of <code>ubidi_writeReordered()</code>, optionally with the 1514 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead 1515 * of using the mapping, in order to avoid these issues. 1516 * 1517 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1518 * 1519 * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code> 1520 * indexes which will reflect the reordering of the characters. 1521 * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number 1522 * of elements allocated in <code>indexMap</code> must be no less than 1523 * <code>ubidi_getResultLength()</code>. 1524 * The array does not need to be initialized.<br><br> 1525 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. 1526 * 1527 * @param pErrorCode must be a valid pointer to an error code value. 1528 * 1529 * @see ubidi_getVisualMap 1530 * @see ubidi_getVisualIndex 1531 * @see ubidi_getProcessedLength 1532 * @see ubidi_getResultLength 1533 * @stable ICU 2.0 1534 */ 1535 U_STABLE void U_EXPORT2 1536 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); 1537 1538 /** 1539 * Get a visual-to-logical index map (array) for the characters in the UBiDi 1540 * (paragraph or line) object. 1541 * <p> 1542 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the 1543 * corresponding text characters are Bidi marks inserted in the visual output 1544 * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 1545 * <p> 1546 * When the visual output is altered by using options of 1547 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1548 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1549 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 1550 * be correct. It is advised to use, when possible, reordering options 1551 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1552 * 1553 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1554 * 1555 * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code> 1556 * indexes which will reflect the reordering of the characters. 1557 * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number 1558 * of elements allocated in <code>indexMap</code> must be no less than 1559 * <code>ubidi_getProcessedLength()</code>. 1560 * The array does not need to be initialized.<br><br> 1561 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. 1562 * 1563 * @param pErrorCode must be a valid pointer to an error code value. 1564 * 1565 * @see ubidi_getLogicalMap 1566 * @see ubidi_getLogicalIndex 1567 * @see ubidi_getProcessedLength 1568 * @see ubidi_getResultLength 1569 * @stable ICU 2.0 1570 */ 1571 U_STABLE void U_EXPORT2 1572 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); 1573 1574 /** 1575 * This is a convenience function that does not use a UBiDi object. 1576 * It is intended to be used for when an application has determined the levels 1577 * of objects (character sequences) and just needs to have them reordered (L2). 1578 * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a 1579 * <code>UBiDi</code> object. 1580 * 1581 * @param levels is an array with <code>length</code> levels that have been determined by 1582 * the application. 1583 * 1584 * @param length is the number of levels in the array, or, semantically, 1585 * the number of objects to be reordered. 1586 * It must be <code>length>0</code>. 1587 * 1588 * @param indexMap is a pointer to an array of <code>length</code> 1589 * indexes which will reflect the reordering of the characters. 1590 * The array does not need to be initialized.<p> 1591 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. 1592 * @stable ICU 2.0 1593 */ 1594 U_STABLE void U_EXPORT2 1595 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); 1596 1597 /** 1598 * This is a convenience function that does not use a UBiDi object. 1599 * It is intended to be used for when an application has determined the levels 1600 * of objects (character sequences) and just needs to have them reordered (L2). 1601 * This is equivalent to using <code>ubidi_getVisualMap()</code> on a 1602 * <code>UBiDi</code> object. 1603 * 1604 * @param levels is an array with <code>length</code> levels that have been determined by 1605 * the application. 1606 * 1607 * @param length is the number of levels in the array, or, semantically, 1608 * the number of objects to be reordered. 1609 * It must be <code>length>0</code>. 1610 * 1611 * @param indexMap is a pointer to an array of <code>length</code> 1612 * indexes which will reflect the reordering of the characters. 1613 * The array does not need to be initialized.<p> 1614 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. 1615 * @stable ICU 2.0 1616 */ 1617 U_STABLE void U_EXPORT2 1618 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); 1619 1620 /** 1621 * Invert an index map. 1622 * The index mapping of the first map is inverted and written to 1623 * the second one. 1624 * 1625 * @param srcMap is an array with <code>length</code> elements 1626 * which defines the original mapping from a source array containing 1627 * <code>length</code> elements to a destination array. 1628 * Some elements of the source array may have no mapping in the 1629 * destination array. In that case, their value will be 1630 * the special value <code>UBIDI_MAP_NOWHERE</code>. 1631 * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>. 1632 * Some elements may have a value >= <code>length</code>, if the 1633 * destination array has more elements than the source array. 1634 * There must be no duplicate indexes (two or more elements with the 1635 * same value except <code>UBIDI_MAP_NOWHERE</code>). 1636 * 1637 * @param destMap is an array with a number of elements equal to 1 + the highest 1638 * value in <code>srcMap</code>. 1639 * <code>destMap</code> will be filled with the inverse mapping. 1640 * If element with index i in <code>srcMap</code> has a value k different 1641 * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of 1642 * the source array maps to element k in the destination array. 1643 * The inverse map will have value i in its k-th element. 1644 * For all elements of the destination array which do not map to 1645 * an element in the source array, the corresponding element in the 1646 * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>. 1647 * 1648 * @param length is the length of each array. 1649 * @see UBIDI_MAP_NOWHERE 1650 * @stable ICU 2.0 1651 */ 1652 U_STABLE void U_EXPORT2 1653 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); 1654 1655 /** option flags for ubidi_writeReordered() */ 1656 1657 /** 1658 * option bit for ubidi_writeReordered(): 1659 * keep combining characters after their base characters in RTL runs 1660 * 1661 * @see ubidi_writeReordered 1662 * @stable ICU 2.0 1663 */ 1664 #define UBIDI_KEEP_BASE_COMBINING 1 1665 1666 /** 1667 * option bit for ubidi_writeReordered(): 1668 * replace characters with the "mirrored" property in RTL runs 1669 * by their mirror-image mappings 1670 * 1671 * @see ubidi_writeReordered 1672 * @stable ICU 2.0 1673 */ 1674 #define UBIDI_DO_MIRRORING 2 1675 1676 /** 1677 * option bit for ubidi_writeReordered(): 1678 * surround the run with LRMs if necessary; 1679 * this is part of the approximate "inverse Bidi" algorithm 1680 * 1681 * <p>This option does not imply corresponding adjustment of the index 1682 * mappings.</p> 1683 * 1684 * @see ubidi_setInverse 1685 * @see ubidi_writeReordered 1686 * @stable ICU 2.0 1687 */ 1688 #define UBIDI_INSERT_LRM_FOR_NUMERIC 4 1689 1690 /** 1691 * option bit for ubidi_writeReordered(): 1692 * remove Bidi control characters 1693 * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC) 1694 * 1695 * <p>This option does not imply corresponding adjustment of the index 1696 * mappings.</p> 1697 * 1698 * @see ubidi_writeReordered 1699 * @stable ICU 2.0 1700 */ 1701 #define UBIDI_REMOVE_BIDI_CONTROLS 8 1702 1703 /** 1704 * option bit for ubidi_writeReordered(): 1705 * write the output in reverse order 1706 * 1707 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code> 1708 * first without this option, and then calling 1709 * <code>ubidi_writeReverse()</code> without mirroring. 1710 * Doing this in the same step is faster and avoids a temporary buffer. 1711 * An example for using this option is output to a character terminal that 1712 * is designed for RTL scripts and stores text in reverse order.</p> 1713 * 1714 * @see ubidi_writeReordered 1715 * @stable ICU 2.0 1716 */ 1717 #define UBIDI_OUTPUT_REVERSE 16 1718 1719 /** 1720 * Get the length of the source text processed by the last call to 1721 * <code>ubidi_setPara()</code>. This length may be different from the length 1722 * of the source text if option <code>#UBIDI_OPTION_STREAMING</code> 1723 * has been set. 1724 * <br> 1725 * Note that whenever the length of the text affects the execution or the 1726 * result of a function, it is the processed length which must be considered, 1727 * except for <code>ubidi_setPara</code> (which receives unprocessed source 1728 * text) and <code>ubidi_getLength</code> (which returns the original length 1729 * of the source text).<br> 1730 * In particular, the processed length is the one to consider in the following 1731 * cases: 1732 * <ul> 1733 * <li>maximum value of the <code>limit</code> argument of 1734 * <code>ubidi_setLine</code></li> 1735 * <li>maximum value of the <code>charIndex</code> argument of 1736 * <code>ubidi_getParagraph</code></li> 1737 * <li>maximum value of the <code>charIndex</code> argument of 1738 * <code>ubidi_getLevelAt</code></li> 1739 * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li> 1740 * <li>maximum value of the <code>logicalStart</code> argument of 1741 * <code>ubidi_getLogicalRun</code></li> 1742 * <li>maximum value of the <code>logicalIndex</code> argument of 1743 * <code>ubidi_getVisualIndex</code></li> 1744 * <li>number of elements filled in the <code>*indexMap</code> argument of 1745 * <code>ubidi_getLogicalMap</code></li> 1746 * <li>length of text processed by <code>ubidi_writeReordered</code></li> 1747 * </ul> 1748 * 1749 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1750 * 1751 * @return The length of the part of the source text processed by 1752 * the last call to <code>ubidi_setPara</code>. 1753 * @see ubidi_setPara 1754 * @see UBIDI_OPTION_STREAMING 1755 * @stable ICU 3.6 1756 */ 1757 U_STABLE int32_t U_EXPORT2 1758 ubidi_getProcessedLength(const UBiDi *pBiDi); 1759 1760 /** 1761 * Get the length of the reordered text resulting from the last call to 1762 * <code>ubidi_setPara()</code>. This length may be different from the length 1763 * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code> 1764 * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set. 1765 * <br> 1766 * This resulting length is the one to consider in the following cases: 1767 * <ul> 1768 * <li>maximum value of the <code>visualIndex</code> argument of 1769 * <code>ubidi_getLogicalIndex</code></li> 1770 * <li>number of elements of the <code>*indexMap</code> argument of 1771 * <code>ubidi_getVisualMap</code></li> 1772 * </ul> 1773 * Note that this length stays identical to the source text length if 1774 * Bidi marks are inserted or removed using option bits of 1775 * <code>ubidi_writeReordered</code>, or if option 1776 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set. 1777 * 1778 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1779 * 1780 * @return The length of the reordered text resulting from 1781 * the last call to <code>ubidi_setPara</code>. 1782 * @see ubidi_setPara 1783 * @see UBIDI_OPTION_INSERT_MARKS 1784 * @see UBIDI_OPTION_REMOVE_CONTROLS 1785 * @stable ICU 3.6 1786 */ 1787 U_STABLE int32_t U_EXPORT2 1788 ubidi_getResultLength(const UBiDi *pBiDi); 1789 1790 U_CDECL_BEGIN 1791 /** 1792 * value returned by <code>UBiDiClassCallback</code> callbacks when 1793 * there is no need to override the standard Bidi class for a given code point. 1794 * @see UBiDiClassCallback 1795 * @stable ICU 3.6 1796 */ 1797 #define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT 1798 1799 /** 1800 * Callback type declaration for overriding default Bidi class values with 1801 * custom ones. 1802 * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code> 1803 * object by calling the <code>ubidi_setClassCallback()</code> function; 1804 * then the callback will be invoked by the UBA implementation any time the 1805 * class of a character is to be determined.</p> 1806 * 1807 * @param context is a pointer to the callback private data. 1808 * 1809 * @param c is the code point to get a Bidi class for. 1810 * 1811 * @return The directional property / Bidi class for the given code point 1812 * <code>c</code> if the default class has been overridden, or 1813 * <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value 1814 * for <code>c</code> is to be used. 1815 * @see ubidi_setClassCallback 1816 * @see ubidi_getClassCallback 1817 * @stable ICU 3.6 1818 */ 1819 typedef UCharDirection U_CALLCONV 1820 UBiDiClassCallback(const void *context, UChar32 c); 1821 1822 U_CDECL_END 1823 1824 /** 1825 * Retrieve the Bidi class for a given code point. 1826 * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a 1827 * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used; 1828 * otherwise the default class determination mechanism is invoked.</p> 1829 * 1830 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1831 * 1832 * @param c is the code point whose Bidi class must be retrieved. 1833 * 1834 * @return The Bidi class for character <code>c</code> based 1835 * on the given <code>pBiDi</code> instance. 1836 * @see UBiDiClassCallback 1837 * @stable ICU 3.6 1838 */ 1839 U_STABLE UCharDirection U_EXPORT2 1840 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c); 1841 1842 /** 1843 * Set the callback function and callback data used by the UBA 1844 * implementation for Bidi class determination. 1845 * <p>This may be useful for assigning Bidi classes to PUA characters, or 1846 * for special application needs. For instance, an application may want to 1847 * handle all spaces like L or R characters (according to the base direction) 1848 * when creating the visual ordering of logical lines which are part of a report 1849 * organized in columns: there should not be interaction between adjacent 1850 * cells.<p> 1851 * 1852 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1853 * 1854 * @param newFn is the new callback function pointer. 1855 * 1856 * @param newContext is the new callback context pointer. This can be NULL. 1857 * 1858 * @param oldFn fillin: Returns the old callback function pointer. This can be 1859 * NULL. 1860 * 1861 * @param oldContext fillin: Returns the old callback's context. This can be 1862 * NULL. 1863 * 1864 * @param pErrorCode must be a valid pointer to an error code value. 1865 * 1866 * @see ubidi_getClassCallback 1867 * @stable ICU 3.6 1868 */ 1869 U_STABLE void U_EXPORT2 1870 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 1871 const void *newContext, UBiDiClassCallback **oldFn, 1872 const void **oldContext, UErrorCode *pErrorCode); 1873 1874 /** 1875 * Get the current callback function used for Bidi class determination. 1876 * 1877 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1878 * 1879 * @param fn fillin: Returns the callback function pointer. 1880 * 1881 * @param context fillin: Returns the callback's private context. 1882 * 1883 * @see ubidi_setClassCallback 1884 * @stable ICU 3.6 1885 */ 1886 U_STABLE void U_EXPORT2 1887 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context); 1888 1889 /** 1890 * Take a <code>UBiDi</code> object containing the reordering 1891 * information for a piece of text (one or more paragraphs) set by 1892 * <code>ubidi_setPara()</code> or for a line of text set by 1893 * <code>ubidi_setLine()</code> and write a reordered string to the 1894 * destination buffer. 1895 * 1896 * This function preserves the integrity of characters with multiple 1897 * code units and (optionally) combining characters. 1898 * Characters in RTL runs can be replaced by mirror-image characters 1899 * in the destination buffer. Note that "real" mirroring has 1900 * to be done in a rendering engine by glyph selection 1901 * and that for many "mirrored" characters there are no 1902 * Unicode characters as mirror-image equivalents. 1903 * There are also options to insert or remove Bidi control 1904 * characters; see the description of the <code>destSize</code> 1905 * and <code>options</code> parameters and of the option bit flags. 1906 * 1907 * @param pBiDi A pointer to a <code>UBiDi</code> object that 1908 * is set by <code>ubidi_setPara()</code> or 1909 * <code>ubidi_setLine()</code> and contains the reordering 1910 * information for the text that it was defined for, 1911 * as well as a pointer to that text.<br><br> 1912 * The text was aliased (only the pointer was stored 1913 * without copying the contents) and must not have been modified 1914 * since the <code>ubidi_setPara()</code> call. 1915 * 1916 * @param dest A pointer to where the reordered text is to be copied. 1917 * The source text and <code>dest[destSize]</code> 1918 * must not overlap. 1919 * 1920 * @param destSize The size of the <code>dest</code> buffer, 1921 * in number of UChars. 1922 * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code> 1923 * option is set, then the destination length could be 1924 * as large as 1925 * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>. 1926 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option 1927 * is set, then the destination length may be less than 1928 * <code>ubidi_getLength(pBiDi)</code>. 1929 * If none of these options is set, then the destination length 1930 * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>. 1931 * 1932 * @param options A bit set of options for the reordering that control 1933 * how the reordered text is written. 1934 * The options include mirroring the characters on a code 1935 * point basis and inserting LRM characters, which is used 1936 * especially for transforming visually stored text 1937 * to logically stored text (although this is still an 1938 * imperfect implementation of an "inverse Bidi" algorithm 1939 * because it uses the "forward Bidi" algorithm at its core). 1940 * The available options are: 1941 * <code>#UBIDI_DO_MIRRORING</code>, 1942 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1943 * <code>#UBIDI_KEEP_BASE_COMBINING</code>, 1944 * <code>#UBIDI_OUTPUT_REVERSE</code>, 1945 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> 1946 * 1947 * @param pErrorCode must be a valid pointer to an error code value. 1948 * 1949 * @return The length of the output string. 1950 * 1951 * @see ubidi_getProcessedLength 1952 * @stable ICU 2.0 1953 */ 1954 U_STABLE int32_t U_EXPORT2 1955 ubidi_writeReordered(UBiDi *pBiDi, 1956 UChar *dest, int32_t destSize, 1957 uint16_t options, 1958 UErrorCode *pErrorCode); 1959 1960 /** 1961 * Reverse a Right-To-Left run of Unicode text. 1962 * 1963 * This function preserves the integrity of characters with multiple 1964 * code units and (optionally) combining characters. 1965 * Characters can be replaced by mirror-image characters 1966 * in the destination buffer. Note that "real" mirroring has 1967 * to be done in a rendering engine by glyph selection 1968 * and that for many "mirrored" characters there are no 1969 * Unicode characters as mirror-image equivalents. 1970 * There are also options to insert or remove Bidi control 1971 * characters. 1972 * 1973 * This function is the implementation for reversing RTL runs as part 1974 * of <code>ubidi_writeReordered()</code>. For detailed descriptions 1975 * of the parameters, see there. 1976 * Since no Bidi controls are inserted here, the output string length 1977 * will never exceed <code>srcLength</code>. 1978 * 1979 * @see ubidi_writeReordered 1980 * 1981 * @param src A pointer to the RTL run text. 1982 * 1983 * @param srcLength The length of the RTL run. 1984 * 1985 * @param dest A pointer to where the reordered text is to be copied. 1986 * <code>src[srcLength]</code> and <code>dest[destSize]</code> 1987 * must not overlap. 1988 * 1989 * @param destSize The size of the <code>dest</code> buffer, 1990 * in number of UChars. 1991 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option 1992 * is set, then the destination length may be less than 1993 * <code>srcLength</code>. 1994 * If this option is not set, then the destination length 1995 * will be exactly <code>srcLength</code>. 1996 * 1997 * @param options A bit set of options for the reordering that control 1998 * how the reordered text is written. 1999 * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>. 2000 * 2001 * @param pErrorCode must be a valid pointer to an error code value. 2002 * 2003 * @return The length of the output string. 2004 * @stable ICU 2.0 2005 */ 2006 U_STABLE int32_t U_EXPORT2 2007 ubidi_writeReverse(const UChar *src, int32_t srcLength, 2008 UChar *dest, int32_t destSize, 2009 uint16_t options, 2010 UErrorCode *pErrorCode); 2011 2012 /*#define BIDI_SAMPLE_CODE*/ 2013 /*@}*/ 2014 2015 #endif 2016