1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2004-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: utext.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2004oct06 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __UTEXT_H__ 18 #define __UTEXT_H__ 19 20 /** 21 * \file 22 * \brief C API: Abstract Unicode Text API 23 * 24 * The Text Access API provides a means to allow text that is stored in alternative 25 * formats to work with ICU services. ICU normally operates on text that is 26 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type 27 * UnicodeString for C++ APIs. 28 * 29 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous 30 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. 31 * 32 * There are three general classes of usage for UText: 33 * 34 * Application Level Use. This is the simplest usage - applications would 35 * use one of the utext_open() functions on their input text, and pass 36 * the resulting UText to the desired ICU service. 37 * 38 * Second is usage in ICU Services, such as break iteration, that will need to 39 * operate on input presented to them as a UText. These implementations 40 * will need to use the iteration and related UText functions to gain 41 * access to the actual text. 42 * 43 * The third class of UText users are "text providers." These are the 44 * UText implementations for the various text storage formats. An application 45 * or system with a unique text storage format can implement a set of 46 * UText provider functions for that format, which will then allow 47 * ICU services to operate on that format. 48 * 49 * 50 * <em>Iterating over text</em> 51 * 52 * Here is sample code for a forward iteration over the contents of a UText 53 * 54 * \code 55 * UChar32 c; 56 * UText *ut = whatever(); 57 * 58 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { 59 * // do whatever with the codepoint c here. 60 * } 61 * \endcode 62 * 63 * And here is similar code to iterate in the reverse direction, from the end 64 * of the text towards the beginning. 65 * 66 * \code 67 * UChar32 c; 68 * UText *ut = whatever(); 69 * int textLength = utext_nativeLength(ut); 70 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { 71 * // do whatever with the codepoint c here. 72 * } 73 * \endcode 74 * 75 * <em>Characters and Indexing</em> 76 * 77 * Indexing into text by UText functions is nearly always in terms of the native 78 * indexing of the underlying text storage. The storage format could be UTF-8 79 * or UTF-32, for example. When coding to the UText access API, no assumptions 80 * can be made regarding the size of characters, or how far an index 81 * may move when iterating between characters. 82 * 83 * All indices supplied to UText functions are pinned to the length of the 84 * text. An out-of-bounds index is not considered to be an error, but is 85 * adjusted to be in the range 0 <= index <= length of input text. 86 * 87 * 88 * When an index position is returned from a UText function, it will be 89 * a native index to the underlying text. In the case of multi-unit characters, 90 * it will always refer to the first position of the character, 91 * never to the interior. This is essentially the same thing as saying that 92 * a returned index will always point to a boundary between characters. 93 * 94 * When a native index is supplied to a UText function, all indices that 95 * refer to any part of a multi-unit character representation are considered 96 * to be equivalent. In the case of multi-unit characters, an incoming index 97 * will be logically normalized to refer to the start of the character. 98 * 99 * It is possible to test whether a native index is on a code point boundary 100 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). 101 * If the index is returned unchanged, it was on a code point boundary. If 102 * an adjusted index is returned, the original index referred to the 103 * interior of a character. 104 * 105 * <em>Conventions for calling UText functions</em> 106 * 107 * Most UText access functions have as their first parameter a (UText *) pointer, 108 * which specifies the UText to be used. Unless otherwise noted, the 109 * pointer must refer to a valid, open UText. Attempting to 110 * use a closed UText or passing a NULL pointer is a programming error and 111 * will produce undefined results or NULL pointer exceptions. 112 * 113 * The UText_Open family of functions can either open an existing (closed) 114 * UText, or heap allocate a new UText. Here is sample code for creating 115 * a stack-allocated UText. 116 * 117 * \code 118 * char *s = whatever(); // A utf-8 string 119 * U_ErrorCode status = U_ZERO_ERROR; 120 * UText ut = UTEXT_INITIALIZER; 121 * utext_openUTF8(ut, s, -1, &status); 122 * if (U_FAILURE(status)) { 123 * // error handling 124 * } else { 125 * // work with the UText 126 * } 127 * \endcode 128 * 129 * Any existing UText passed to an open function _must_ have been initialized, 130 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated 131 * by an open function. Passing NULL will cause the open function to 132 * heap-allocate and fully initialize a new UText. 133 * 134 */ 135 136 137 138 #include "unicode/utypes.h" 139 #ifdef XP_CPLUSPLUS 140 #include "unicode/rep.h" 141 #include "unicode/unistr.h" 142 #include "unicode/chariter.h" 143 #endif 144 145 146 U_CDECL_BEGIN 147 148 struct UText; 149 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ 150 151 152 /*************************************************************************************** 153 * 154 * C Functions for creating UText wrappers around various kinds of text strings. 155 * 156 ****************************************************************************************/ 157 158 159 /** 160 * Close function for UText instances. 161 * Cleans up, releases any resources being held by an open UText. 162 * <p> 163 * If the UText was originally allocated by one of the utext_open functions, 164 * the storage associated with the utext will also be freed. 165 * If the UText storage originated with the application, as it would with 166 * a local or static instance, the storage will not be deleted. 167 * 168 * An open UText can be reset to refer to new string by using one of the utext_open() 169 * functions without first closing the UText. 170 * 171 * @param ut The UText to be closed. 172 * @return NULL if the UText struct was deleted by the close. If the UText struct 173 * was originally provided by the caller to the open function, it is 174 * returned by this function, and may be safely used again in 175 * a subsequent utext_open. 176 * 177 * @stable ICU 3.4 178 */ 179 U_STABLE UText * U_EXPORT2 180 utext_close(UText *ut); 181 182 183 /** 184 * Open a read-only UText implementation for UTF-8 strings. 185 * 186 * \htmlonly 187 * Any invalid UTF-8 in the input will be handled in this way: 188 * a sequence of bytes that has the form of a truncated, but otherwise valid, 189 * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 190 * Any other illegal bytes will each be replaced by a \uFFFD. 191 * \endhtmlonly 192 * 193 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 194 * If non-NULL, must refer to an initialized UText struct, which will then 195 * be reset to reference the specified UTF-8 string. 196 * @param s A UTF-8 string. Must not be NULL. 197 * @param length The length of the UTF-8 string in bytes, or -1 if the string is 198 * zero terminated. 199 * @param status Errors are returned here. 200 * @return A pointer to the UText. If a pre-allocated UText was provided, it 201 * will always be used and returned. 202 * @stable ICU 3.4 203 */ 204 U_STABLE UText * U_EXPORT2 205 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); 206 207 208 /** 209 * Open a read-only UText for UChar * string. 210 * 211 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 212 * If non-NULL, must refer to an initialized UText struct, which will then 213 * be reset to reference the specified UChar string. 214 * @param s A UChar (UTF-16) string 215 * @param length The number of UChars in the input string, or -1 if the string is 216 * zero terminated. 217 * @param status Errors are returned here. 218 * @return A pointer to the UText. If a pre-allocated UText was provided, it 219 * will always be used and returned. 220 * @stable ICU 3.4 221 */ 222 U_STABLE UText * U_EXPORT2 223 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); 224 225 226 #ifdef XP_CPLUSPLUS 227 /** 228 * Open a writable UText for a non-const UnicodeString. 229 * 230 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 231 * If non-NULL, must refer to an initialized UText struct, which will then 232 * be reset to reference the specified input string. 233 * @param s A UnicodeString. 234 * @param status Errors are returned here. 235 * @return Pointer to the UText. If a UText was supplied as input, this 236 * will always be used and returned. 237 * @stable ICU 3.4 238 */ 239 U_STABLE UText * U_EXPORT2 240 utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status); 241 242 243 /** 244 * Open a UText for a const UnicodeString. The resulting UText will not be writable. 245 * 246 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 247 * If non-NULL, must refer to an initialized UText struct, which will then 248 * be reset to reference the specified input string. 249 * @param s A const UnicodeString to be wrapped. 250 * @param status Errors are returned here. 251 * @return Pointer to the UText. If a UText was supplied as input, this 252 * will always be used and returned. 253 * @stable ICU 3.4 254 */ 255 U_STABLE UText * U_EXPORT2 256 utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status); 257 258 259 /** 260 * Open a writable UText implementation for an ICU Replaceable object. 261 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 262 * If non-NULL, must refer to an already existing UText, which will then 263 * be reset to reference the specified replaceable text. 264 * @param rep A Replaceable text object. 265 * @param status Errors are returned here. 266 * @return Pointer to the UText. If a UText was supplied as input, this 267 * will always be used and returned. 268 * @see Replaceable 269 * @stable ICU 3.4 270 */ 271 U_STABLE UText * U_EXPORT2 272 utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status); 273 274 /** 275 * Open a UText implementation over an ICU CharacterIterator. 276 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 277 * If non-NULL, must refer to an already existing UText, which will then 278 * be reset to reference the specified replaceable text. 279 * @param ci A Character Iterator. 280 * @param status Errors are returned here. 281 * @return Pointer to the UText. If a UText was supplied as input, this 282 * will always be used and returned. 283 * @see Replaceable 284 * @stable ICU 3.4 285 */ 286 U_STABLE UText * U_EXPORT2 287 utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status); 288 289 #endif 290 291 292 /** 293 * Clone a UText. This is much like opening a UText where the source text is itself 294 * another UText. 295 * 296 * A deep clone will copy both the UText data structures and the underlying text. 297 * The original and cloned UText will operate completely independently; modifications 298 * made to the text in one will not affect the other. Text providers are not 299 * required to support deep clones. The user of clone() must check the status return 300 * and be prepared to handle failures. 301 * 302 * The standard UText implementations for UTF8, UChar *, UnicodeString and 303 * Replaceable all support deep cloning. 304 * 305 * The UText returned from a deep clone will be writable, assuming that the text 306 * provider is able to support writing, even if the source UText had been made 307 * non-writable by means of UText_freeze(). 308 * 309 * A shallow clone replicates only the UText data structures; it does not make 310 * a copy of the underlying text. Shallow clones can be used as an efficient way to 311 * have multiple iterators active in a single text string that is not being 312 * modified. 313 * 314 * A shallow clone operation will not fail, barring truly exceptional conditions such 315 * as memory allocation failures. 316 * 317 * Shallow UText clones should be avoided if the UText functions that modify the 318 * text are expected to be used, either on the original or the cloned UText. 319 * Any such modifications can cause unpredictable behavior. Read Only 320 * shallow clones provide some protection against errors of this type by 321 * disabling text modification via the cloned UText. 322 * 323 * A shallow clone made with the readOnly parameter == FALSE will preserve the 324 * utext_isWritable() state of the source object. Note, however, that 325 * write operations must be avoided while more than one UText exists that refer 326 * to the same underlying text. 327 * 328 * A UText and its clone may be safely concurrently accessed by separate threads. 329 * This is true for read access only with shallow clones, and for both read and 330 * write access with deep clones. 331 * It is the responsibility of the Text Provider to ensure that this thread safety 332 * constraint is met. 333 * 334 * @param dest A UText struct to be filled in with the result of the clone operation, 335 * or NULL if the clone function should heap-allocate a new UText struct. 336 * If non-NULL, must refer to an already existing UText, which will then 337 * be reset to become the clone. 338 * @param src The UText to be cloned. 339 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. 340 * @param readOnly TRUE to request that the cloned UText have read only access to the 341 * underlying text. 342 343 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 344 * will be returned if the text provider is unable to clone the 345 * original text. 346 * @return The newly created clone, or NULL if the clone operation failed. 347 * @stable ICU 3.4 348 */ 349 U_STABLE UText * U_EXPORT2 350 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); 351 352 353 /** 354 * Compare two UText objects for equality. 355 * UTexts are equal if they are iterating over the same text, and 356 * have the same iteration position within the text. 357 * If either or both of the parameters are NULL, the comparison is FALSE. 358 * 359 * @param a The first of the two UTexts to compare. 360 * @param b The other UText to be compared. 361 * @return TRUE if the two UTexts are equal. 362 * @stable ICU 3.6 363 */ 364 U_STABLE UBool U_EXPORT2 365 utext_equals(const UText *a, const UText *b); 366 367 368 /***************************************************************************** 369 * 370 * Functions to work with the text represeted by a UText wrapper 371 * 372 *****************************************************************************/ 373 374 /** 375 * Get the length of the text. Depending on the characteristics 376 * of the underlying text representation, this may be expensive. 377 * @see utext_isLengthExpensive() 378 * 379 * 380 * @param ut the text to be accessed. 381 * @return the length of the text, expressed in native units. 382 * 383 * @stable ICU 3.4 384 */ 385 U_STABLE int64_t U_EXPORT2 386 utext_nativeLength(UText *ut); 387 388 /** 389 * Return TRUE if calculating the length of the text could be expensive. 390 * Finding the length of NUL terminated strings is considered to be expensive. 391 * 392 * Note that the value of this function may change 393 * as the result of other operations on a UText. 394 * Once the length of a string has been discovered, it will no longer 395 * be expensive to report it. 396 * 397 * @param ut the text to be accessed. 398 * @return TRUE if determining the length of the text could be time consuming. 399 * @stable ICU 3.4 400 */ 401 U_STABLE UBool U_EXPORT2 402 utext_isLengthExpensive(const UText *ut); 403 404 /** 405 * Returns the code point at the requested index, 406 * or U_SENTINEL (-1) if it is out of bounds. 407 * 408 * If the specified index points to the interior of a multi-unit 409 * character - one of the trail bytes of a UTF-8 sequence, for example - 410 * the complete code point will be returned. 411 * 412 * The iteration position will be set to the start of the returned code point. 413 * 414 * This function is roughly equivalent to the the sequence 415 * utext_setNativeIndex(index); 416 * utext_current32(); 417 * (There is a subtle difference if the index is out of bounds by being less than zero - 418 * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() 419 * will return the char at zero. utext_char32At(negative index), on the other hand, will 420 * return the U_SENTINEL value of -1.) 421 * 422 * @param ut the text to be accessed 423 * @param nativeIndex the native index of the character to be accessed. If the index points 424 * to other than the first unit of a multi-unit character, it will be adjusted 425 * to the start of the character. 426 * @return the code point at the specified index. 427 * @stable ICU 3.4 428 */ 429 U_STABLE UChar32 U_EXPORT2 430 utext_char32At(UText *ut, int64_t nativeIndex); 431 432 433 /** 434 * 435 * Get the code point at the current iteration position, 436 * or U_SENTINEL (-1) if the iteration has reached the end of 437 * the input text. 438 * 439 * @param ut the text to be accessed. 440 * @return the Unicode code point at the current iterator position. 441 * @stable ICU 3.4 442 */ 443 U_STABLE UChar32 U_EXPORT2 444 utext_current32(UText *ut); 445 446 447 /** 448 * Get the code point at the current iteration position of the UText, and 449 * advance the position to the first index following the character. 450 * 451 * If the position is at the end of the text (the index following 452 * the last character, which is also the length of the text), 453 * return U_SENTINEL (-1) and do not advance the index. 454 * 455 * This is a post-increment operation. 456 * 457 * An inline macro version of this function, UTEXT_NEXT32(), 458 * is available for performance critical use. 459 * 460 * @param ut the text to be accessed. 461 * @return the Unicode code point at the iteration position. 462 * @see UTEXT_NEXT32 463 * @stable ICU 3.4 464 */ 465 U_STABLE UChar32 U_EXPORT2 466 utext_next32(UText *ut); 467 468 469 /** 470 * Move the iterator position to the character (code point) whose 471 * index precedes the current position, and return that character. 472 * This is a pre-decrement operation. 473 * 474 * If the initial position is at the start of the text (index of 0) 475 * return U_SENTINEL (-1), and leave the position unchanged. 476 * 477 * An inline macro version of this function, UTEXT_PREVIOUS32(), 478 * is available for performance critical use. 479 * 480 * @param ut the text to be accessed. 481 * @return the previous UChar32 code point, or U_SENTINEL (-1) 482 * if the iteration has reached the start of the text. 483 * @see UTEXT_PREVIOUS32 484 * @stable ICU 3.4 485 */ 486 U_STABLE UChar32 U_EXPORT2 487 utext_previous32(UText *ut); 488 489 490 /** 491 * Set the iteration index and return the code point at that index. 492 * Leave the iteration index at the start of the following code point. 493 * 494 * This function is the most efficient and convenient way to 495 * begin a forward iteration. The results are identical to the those 496 * from the sequence 497 * \code 498 * utext_setIndex(); 499 * utext_next32(); 500 * \endcode 501 * 502 * @param ut the text to be accessed. 503 * @param nativeIndex Iteration index, in the native units of the text provider. 504 * @return Code point which starts at or before index, 505 * or U_SENTINEL (-1) if it is out of bounds. 506 * @stable ICU 3.4 507 */ 508 U_STABLE UChar32 U_EXPORT2 509 utext_next32From(UText *ut, int64_t nativeIndex); 510 511 512 513 /** 514 * Set the iteration index, and return the code point preceding the 515 * one specified by the initial index. Leave the iteration position 516 * at the start of the returned code point. 517 * 518 * This function is the most efficient and convenient way to 519 * begin a backwards iteration. 520 * 521 * @param ut the text to be accessed. 522 * @param nativeIndex Iteration index in the native units of the text provider. 523 * @return Code point preceding the one at the initial index, 524 * or U_SENTINEL (-1) if it is out of bounds. 525 * 526 * @stable ICU 3.4 527 */ 528 U_STABLE UChar32 U_EXPORT2 529 utext_previous32From(UText *ut, int64_t nativeIndex); 530 531 /** 532 * Get the current iterator position, which can range from 0 to 533 * the length of the text. 534 * The position is a native index into the input text, in whatever format it 535 * may have (possibly UTF-8 for example), and may not always be the same as 536 * the corresponding UChar (UTF-16) index. 537 * The returned position will always be aligned to a code point boundary. 538 * 539 * @param ut the text to be accessed. 540 * @return the current index position, in the native units of the text provider. 541 * @stable ICU 3.4 542 */ 543 U_STABLE int64_t U_EXPORT2 544 utext_getNativeIndex(const UText *ut); 545 546 /** 547 * Set the current iteration position to the nearest code point 548 * boundary at or preceding the specified index. 549 * The index is in the native units of the original input text. 550 * If the index is out of range, it will be pinned to be within 551 * the range of the input text. 552 * <p> 553 * It will usually be more efficient to begin an iteration 554 * using the functions utext_next32From() or utext_previous32From() 555 * rather than setIndex(). 556 * <p> 557 * Moving the index position to an adjacent character is best done 558 * with utext_next32(), utext_previous32() or utext_moveIndex32(). 559 * Attempting to do direct arithmetic on the index position is 560 * complicated by the fact that the size (in native units) of a 561 * character depends on the underlying representation of the character 562 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not 563 * easily knowable. 564 * 565 * @param ut the text to be accessed. 566 * @param nativeIndex the native unit index of the new iteration position. 567 * @stable ICU 3.4 568 */ 569 U_STABLE void U_EXPORT2 570 utext_setNativeIndex(UText *ut, int64_t nativeIndex); 571 572 /** 573 * Move the iterator postion by delta code points. The number of code points 574 * is a signed number; a negative delta will move the iterator backwards, 575 * towards the start of the text. 576 * <p> 577 * The index is moved by <code>delta</code> code points 578 * forward or backward, but no further backward than to 0 and 579 * no further forward than to utext_nativeLength(). 580 * The resulting index value will be in between 0 and length, inclusive. 581 * 582 * @param ut the text to be accessed. 583 * @param delta the signed number of code points to move the iteration position. 584 * @return TRUE if the position could be moved the requested number of positions while 585 * staying within the range [0 - text length]. 586 * @stable ICU 3.4 587 */ 588 U_STABLE UBool U_EXPORT2 589 utext_moveIndex32(UText *ut, int32_t delta); 590 591 /** 592 * Get the native index of the character preceeding the current position. 593 * If the iteration position is already at the start of the text, zero 594 * is returned. 595 * The value returned is the same as that obtained from the following sequence, 596 * but without the side effect of changing the iteration position. 597 * 598 * \code 599 * UText *ut = whatever; 600 * ... 601 * utext_previous(ut) 602 * utext_getNativeIndex(ut); 603 * \endcode 604 * 605 * This function is most useful during forwards iteration, where it will get the 606 * native index of the character most recently returned from utext_next(). 607 * 608 * @param ut the text to be accessed 609 * @return the native index of the character preceeding the current index position, 610 * or zero if the current position is at the start of the text. 611 * @stable ICU 3.6 612 */ 613 U_STABLE int64_t U_EXPORT2 614 utext_getPreviousNativeIndex(UText *ut); 615 616 617 /** 618 * 619 * Extract text from a UText into a UChar buffer. The range of text to be extracted 620 * is specified in the native indices of the UText provider. These may not necessarily 621 * be UTF-16 indices. 622 * <p> 623 * The size (number of 16 bit UChars) of the data to be extracted is returned. The 624 * full number of UChars is returned, even when the extracted text is truncated 625 * because the specified buffer size is too small. 626 * <p> 627 * The extracted string will (if you are a user) / must (if you are a text provider) 628 * be NUL-terminated if there is sufficient space in the destination buffer. This 629 * terminating NUL is not included in the returned length. 630 * <p> 631 * The iteration index is left at the position following the last extracted character. 632 * 633 * @param ut the UText from which to extract data. 634 * @param nativeStart the native index of the first character to extract.\ 635 * If the specified index is out of range, 636 * it will be pinned to to be within 0 <= index <= textLength 637 * @param nativeLimit the native string index of the position following the last 638 * character to extract. If the specified index is out of range, 639 * it will be pinned to to be within 0 <= index <= textLength. 640 * nativeLimit must be >= nativeStart. 641 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 642 * @param destCapacity The size, in UChars, of the destination buffer. May be zero 643 * for precomputing the required size. 644 * @param status receives any error status. 645 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 646 * buffer was too small. Returns number of UChars for preflighting. 647 * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. 648 * 649 * @stable ICU 3.4 650 */ 651 U_STABLE int32_t U_EXPORT2 652 utext_extract(UText *ut, 653 int64_t nativeStart, int64_t nativeLimit, 654 UChar *dest, int32_t destCapacity, 655 UErrorCode *status); 656 657 658 /************************************************************************************ 659 * 660 * #define inline versions of selected performance-critical text access functions 661 * Caution: do not use auto increment++ or decrement-- expressions 662 * as parameters to these macros. 663 * 664 * For most use, where there is no extreme performance constraint, the 665 * normal, non-inline functions are a better choice. The resulting code 666 * will be smaller, and, if the need ever arises, easier to debug. 667 * 668 * These are implemented as #defines rather than real functions 669 * because there is no fully portable way to do inline functions in plain C. 670 * 671 ************************************************************************************/ 672 673 /** 674 * inline version of utext_next32(), for performance-critical situations. 675 * 676 * Get the code point at the current iteration position of the UText, and 677 * advance the position to the first index following the character. 678 * This is a post-increment operation. 679 * Returns U_SENTINEL (-1) if the position is at the end of the 680 * text. 681 * 682 * @stable ICU 3.4 683 */ 684 #define UTEXT_NEXT32(ut) \ 685 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 686 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) 687 688 /** 689 * inline version of utext_previous32(), for performance-critical situations. 690 * 691 * Move the iterator position to the character (code point) whose 692 * index precedes the current position, and return that character. 693 * This is a pre-decrement operation. 694 * Returns U_SENTINEL (-1) if the position is at the start of the text. 695 * 696 * @stable ICU 3.4 697 */ 698 #define UTEXT_PREVIOUS32(ut) \ 699 ((ut)->chunkOffset > 0 && \ 700 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ 701 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) 702 703 /** 704 * inline version of utext_getNativeIndex(), for performance-critical situations. 705 * 706 * Get the current iterator position, which can range from 0 to 707 * the length of the text. 708 * The position is a native index into the input text, in whatever format it 709 * may have (possibly UTF-8 for example), and may not always be the same as 710 * the corresponding UChar (UTF-16) index. 711 * The returned position will always be aligned to a code point boundary. 712 * 713 * @stable ICU 3.6 714 */ 715 #define UTEXT_GETNATIVEINDEX(ut) \ 716 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ 717 (ut)->chunkNativeStart+(ut)->chunkOffset : \ 718 (ut)->pFuncs->mapOffsetToNative(ut)) 719 720 /** 721 * inline version of utext_setNativeIndex(), for performance-critical situations. 722 * 723 * Set the current iteration position to the nearest code point 724 * boundary at or preceding the specified index. 725 * The index is in the native units of the original input text. 726 * If the index is out of range, it will be pinned to be within 727 * the range of the input text. 728 * 729 * @stable ICU 3.8 730 */ 731 #define UTEXT_SETNATIVEINDEX(ut, ix) \ 732 { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ 733 if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ 734 (ut)->chunkOffset=(int32_t)__offset; \ 735 } else { \ 736 utext_setNativeIndex((ut), (ix)); } } 737 738 739 740 /************************************************************************************ 741 * 742 * Functions related to writing or modifying the text. 743 * These will work only with modifiable UTexts. Attempting to 744 * modify a read-only UText will return an error status. 745 * 746 ************************************************************************************/ 747 748 749 /** 750 * Return TRUE if the text can be written (modified) with utext_replace() or 751 * utext_copy(). For the text to be writable, the text provider must 752 * be of a type that supports writing and the UText must not be frozen. 753 * 754 * Attempting to modify text when utext_isWriteable() is FALSE will fail - 755 * the text will not be modified, and an error will be returned from the function 756 * that attempted the modification. 757 * 758 * @param ut the UText to be tested. 759 * @return TRUE if the text is modifiable. 760 * 761 * @see utext_freeze() 762 * @see utext_replace() 763 * @see utext_copy() 764 * @stable ICU 3.4 765 * 766 */ 767 U_STABLE UBool U_EXPORT2 768 utext_isWritable(const UText *ut); 769 770 771 /** 772 * Test whether there is meta data associated with the text. 773 * @see Replaceable::hasMetaData() 774 * 775 * @param ut The UText to be tested 776 * @return TRUE if the underlying text includes meta data. 777 * @stable ICU 3.4 778 */ 779 U_STABLE UBool U_EXPORT2 780 utext_hasMetaData(const UText *ut); 781 782 783 /** 784 * Replace a range of the original text with a replacement text. 785 * 786 * Leaves the current iteration position at the position following the 787 * newly inserted replacement text. 788 * 789 * This function is only available on UText types that support writing, 790 * that is, ones where utext_isWritable() returns TRUE. 791 * 792 * When using this function, there should be only a single UText opened onto the 793 * underlying native text string. Behavior after a replace operation 794 * on a UText is undefined for any other additional UTexts that refer to the 795 * modified string. 796 * 797 * @param ut the UText representing the text to be operated on. 798 * @param nativeStart the native index of the start of the region to be replaced 799 * @param nativeLimit the native index of the character following the region to be replaced. 800 * @param replacementText pointer to the replacement text 801 * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. 802 * @param status receives any error status. Possible errors include 803 * U_NO_WRITE_PERMISSION 804 * 805 * @return The signed number of (native) storage units by which 806 * the length of the text expanded or contracted. 807 * 808 * @stable ICU 3.4 809 */ 810 U_STABLE int32_t U_EXPORT2 811 utext_replace(UText *ut, 812 int64_t nativeStart, int64_t nativeLimit, 813 const UChar *replacementText, int32_t replacementLength, 814 UErrorCode *status); 815 816 817 818 /** 819 * 820 * Copy or move a substring from one position to another within the text, 821 * while retaining any metadata associated with the text. 822 * This function is used to duplicate or reorder substrings. 823 * The destination index must not overlap the source range. 824 * 825 * The text to be copied or moved is inserted at destIndex; 826 * it does not replace or overwrite any existing text. 827 * 828 * The iteration position is left following the newly inserted text 829 * at the destination position. 830 * 831 * This function is only available on UText types that support writing, 832 * that is, ones where utext_isWritable() returns TRUE. 833 * 834 * When using this function, there should be only a single UText opened onto the 835 * underlying native text string. Behavior after a copy operation 836 * on a UText is undefined in any other additional UTexts that refer to the 837 * modified string. 838 * 839 * @param ut The UText representing the text to be operated on. 840 * @param nativeStart The native index of the start of the region to be copied or moved 841 * @param nativeLimit The native index of the character position following the region 842 * to be copied. 843 * @param destIndex The native destination index to which the source substring is 844 * copied or moved. 845 * @param move If TRUE, then the substring is moved, not copied/duplicated. 846 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 847 * 848 * @stable ICU 3.4 849 */ 850 U_STABLE void U_EXPORT2 851 utext_copy(UText *ut, 852 int64_t nativeStart, int64_t nativeLimit, 853 int64_t destIndex, 854 UBool move, 855 UErrorCode *status); 856 857 858 /** 859 * <p> 860 * Freeze a UText. This prevents any modification to the underlying text itself 861 * by means of functions operating on this UText. 862 * </p> 863 * <p> 864 * Once frozen, a UText can not be unfrozen. The intent is to ensure 865 * that a the text underlying a frozen UText wrapper cannot be modified via that UText. 866 * </p> 867 * <p> 868 * Caution: freezing a UText will disable changes made via the specific 869 * frozen UText wrapper only; it will not have any effect on the ability to 870 * directly modify the text by bypassing the UText. Any such backdoor modifications 871 * are always an error while UText access is occuring because the underlying 872 * text can get out of sync with UText's buffering. 873 * </p> 874 * 875 * @param ut The UText to be frozen. 876 * @see utext_isWritable() 877 * @stable ICU 3.6 878 */ 879 U_STABLE void U_EXPORT2 880 utext_freeze(UText *ut); 881 882 883 /** 884 * UText provider properties (bit field indexes). 885 * 886 * @see UText 887 * @stable ICU 3.4 888 */ 889 enum { 890 /** 891 * It is potentially time consuming for the provider to determine the length of the text. 892 * @stable ICU 3.4 893 */ 894 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, 895 /** 896 * Text chunks remain valid and usable until the text object is modified or 897 * deleted, not just until the next time the access() function is called 898 * (which is the default). 899 * @stable ICU 3.4 900 */ 901 UTEXT_PROVIDER_STABLE_CHUNKS = 2, 902 /** 903 * The provider supports modifying the text via the replace() and copy() 904 * functions. 905 * @see Replaceable 906 * @stable ICU 3.4 907 */ 908 UTEXT_PROVIDER_WRITABLE = 3, 909 /** 910 * There is meta data associated with the text. 911 * @see Replaceable::hasMetaData() 912 * @stable ICU 3.4 913 */ 914 UTEXT_PROVIDER_HAS_META_DATA = 4, 915 /** 916 * Text provider owns the text storage. 917 * Generally occurs as the result of a deep clone of the UText. 918 * When closing the UText, the associated text must 919 * also be closed/deleted/freed/ whatever is appropriate. 920 * @stable ICU 3.6 921 */ 922 UTEXT_PROVIDER_OWNS_TEXT = 5 923 }; 924 925 /** 926 * Function type declaration for UText.clone(). 927 * 928 * clone a UText. Much like opening a UText where the source text is itself 929 * another UText. 930 * 931 * A deep clone will copy both the UText data structures and the underlying text. 932 * The original and cloned UText will operate completely independently; modifications 933 * made to the text in one will not effect the other. Text providers are not 934 * required to support deep clones. The user of clone() must check the status return 935 * and be prepared to handle failures. 936 * 937 * A shallow clone replicates only the UText data structures; it does not make 938 * a copy of the underlying text. Shallow clones can be used as an efficient way to 939 * have multiple iterators active in a single text string that is not being 940 * modified. 941 * 942 * A shallow clone operation must not fail except for truly exceptional conditions such 943 * as memory allocation failures. 944 * 945 * A UText and its clone may be safely concurrently accessed by separate threads. 946 * This is true for both shallow and deep clones. 947 * It is the responsibility of the Text Provider to ensure that this thread safety 948 * constraint is met. 949 950 * 951 * @param dest A UText struct to be filled in with the result of the clone operation, 952 * or NULL if the clone function should heap-allocate a new UText struct. 953 * @param src The UText to be cloned. 954 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. 955 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 956 * should be returned if the text provider is unable to clone the 957 * original text. 958 * @return The newly created clone, or NULL if the clone operation failed. 959 * 960 * @stable ICU 3.4 961 */ 962 typedef UText * U_CALLCONV 963 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); 964 965 966 /** 967 * Function type declaration for UText.nativeLength(). 968 * 969 * @param ut the UText to get the length of. 970 * @return the length, in the native units of the original text string. 971 * @see UText 972 * @stable ICU 3.4 973 */ 974 typedef int64_t U_CALLCONV 975 UTextNativeLength(UText *ut); 976 977 /** 978 * Function type declaration for UText.access(). Get the description of the text chunk 979 * containing the text at a requested native index. The UText's iteration 980 * position will be left at the requested index. If the index is out 981 * of bounds, the iteration position will be left at the start or end 982 * of the string, as appropriate. 983 * 984 * Chunks must begin and end on code point boundaries. A single code point 985 * comprised of multiple storage units must never span a chunk boundary. 986 * 987 * 988 * @param ut the UText being accessed. 989 * @param nativeIndex Requested index of the text to be accessed. 990 * @param forward If TRUE, then the returned chunk must contain text 991 * starting from the index, so that start<=index<limit. 992 * If FALSE, then the returned chunk must contain text 993 * before the index, so that start<index<=limit. 994 * @return True if the requested index could be accessed. The chunk 995 * will contain the requested text. 996 * False value if a chunk cannot be accessed 997 * (the requested index is out of bounds). 998 * 999 * @see UText 1000 * @stable ICU 3.4 1001 */ 1002 typedef UBool U_CALLCONV 1003 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); 1004 1005 /** 1006 * Function type declaration for UText.extract(). 1007 * 1008 * Extract text from a UText into a UChar buffer. The range of text to be extracted 1009 * is specified in the native indices of the UText provider. These may not necessarily 1010 * be UTF-16 indices. 1011 * <p> 1012 * The size (number of 16 bit UChars) in the data to be extracted is returned. The 1013 * full amount is returned, even when the specified buffer size is smaller. 1014 * <p> 1015 * The extracted string will (if you are a user) / must (if you are a text provider) 1016 * be NUL-terminated if there is sufficient space in the destination buffer. 1017 * 1018 * @param ut the UText from which to extract data. 1019 * @param nativeStart the native index of the first characer to extract. 1020 * @param nativeLimit the native string index of the position following the last 1021 * character to extract. 1022 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 1023 * @param destCapacity The size, in UChars, of the destination buffer. May be zero 1024 * for precomputing the required size. 1025 * @param status receives any error status. 1026 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for 1027 * preflighting. 1028 * @return Number of UChars in the data. Does not include a trailing NUL. 1029 * 1030 * @stable ICU 3.4 1031 */ 1032 typedef int32_t U_CALLCONV 1033 UTextExtract(UText *ut, 1034 int64_t nativeStart, int64_t nativeLimit, 1035 UChar *dest, int32_t destCapacity, 1036 UErrorCode *status); 1037 1038 /** 1039 * Function type declaration for UText.replace(). 1040 * 1041 * Replace a range of the original text with a replacement text. 1042 * 1043 * Leaves the current iteration position at the position following the 1044 * newly inserted replacement text. 1045 * 1046 * This function need only be implemented on UText types that support writing. 1047 * 1048 * When using this function, there should be only a single UText opened onto the 1049 * underlying native text string. The function is responsible for updating the 1050 * text chunk within the UText to reflect the updated iteration position, 1051 * taking into account any changes to the underlying string's structure caused 1052 * by the replace operation. 1053 * 1054 * @param ut the UText representing the text to be operated on. 1055 * @param nativeStart the index of the start of the region to be replaced 1056 * @param nativeLimit the index of the character following the region to be replaced. 1057 * @param replacementText pointer to the replacement text 1058 * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. 1059 * @param status receives any error status. Possible errors include 1060 * U_NO_WRITE_PERMISSION 1061 * 1062 * @return The signed number of (native) storage units by which 1063 * the length of the text expanded or contracted. 1064 * 1065 * @stable ICU 3.4 1066 */ 1067 typedef int32_t U_CALLCONV 1068 UTextReplace(UText *ut, 1069 int64_t nativeStart, int64_t nativeLimit, 1070 const UChar *replacementText, int32_t replacmentLength, 1071 UErrorCode *status); 1072 1073 /** 1074 * Function type declaration for UText.copy(). 1075 * 1076 * Copy or move a substring from one position to another within the text, 1077 * while retaining any metadata associated with the text. 1078 * This function is used to duplicate or reorder substrings. 1079 * The destination index must not overlap the source range. 1080 * 1081 * The text to be copied or moved is inserted at destIndex; 1082 * it does not replace or overwrite any existing text. 1083 * 1084 * This function need only be implemented for UText types that support writing. 1085 * 1086 * When using this function, there should be only a single UText opened onto the 1087 * underlying native text string. The function is responsible for updating the 1088 * text chunk within the UText to reflect the updated iteration position, 1089 * taking into account any changes to the underlying string's structure caused 1090 * by the replace operation. 1091 * 1092 * @param ut The UText representing the text to be operated on. 1093 * @param nativeStart The index of the start of the region to be copied or moved 1094 * @param nativeLimit The index of the character following the region to be replaced. 1095 * @param nativeDest The destination index to which the source substring is copied or moved. 1096 * @param move If TRUE, then the substring is moved, not copied/duplicated. 1097 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 1098 * 1099 * @stable ICU 3.4 1100 */ 1101 typedef void U_CALLCONV 1102 UTextCopy(UText *ut, 1103 int64_t nativeStart, int64_t nativeLimit, 1104 int64_t nativeDest, 1105 UBool move, 1106 UErrorCode *status); 1107 1108 /** 1109 * Function type declaration for UText.mapOffsetToNative(). 1110 * Map from the current UChar offset within the current text chunk to 1111 * the corresponding native index in the original source text. 1112 * 1113 * This is required only for text providers that do not use native UTF-16 indexes. 1114 * 1115 * @param ut the UText. 1116 * @return Absolute (native) index corresponding to chunkOffset in the current chunk. 1117 * The returned native index should always be to a code point boundary. 1118 * 1119 * @stable ICU 3.4 1120 */ 1121 typedef int64_t U_CALLCONV 1122 UTextMapOffsetToNative(const UText *ut); 1123 1124 /** 1125 * Function type declaration for UText.mapIndexToUTF16(). 1126 * Map from a native index to a UChar offset within a text chunk. 1127 * Behavior is undefined if the native index does not fall within the 1128 * current chunk. 1129 * 1130 * This function is required only for text providers that do not use native UTF-16 indexes. 1131 * 1132 * @param ut The UText containing the text chunk. 1133 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. 1134 * @return Chunk-relative UTF-16 offset corresponding to the specified native 1135 * index. 1136 * 1137 * @stable ICU 3.4 1138 */ 1139 typedef int32_t U_CALLCONV 1140 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); 1141 1142 1143 /** 1144 * Function type declaration for UText.utextClose(). 1145 * 1146 * A Text Provider close function is only required for provider types that make 1147 * allocations in their open function (or other functions) that must be 1148 * cleaned when the UText is closed. 1149 * 1150 * The allocation of the UText struct itself and any "extra" storage 1151 * associated with the UText is handled by the common UText implementation 1152 * and does not require provider specific cleanup in a close function. 1153 * 1154 * Most UText provider implementations do not need to implement this function. 1155 * 1156 * @param ut A UText object to be closed. 1157 * 1158 * @stable ICU 3.4 1159 */ 1160 typedef void U_CALLCONV 1161 UTextClose(UText *ut); 1162 1163 1164 /** 1165 * (public) Function dispatch table for UText. 1166 * Conceptually very much like a C++ Virtual Function Table. 1167 * This struct defines the organization of the table. 1168 * Each text provider implementation must provide an 1169 * actual table that is initialized with the appropriate functions 1170 * for the type of text being handled. 1171 * @stable ICU 3.6 1172 */ 1173 struct UTextFuncs { 1174 /** 1175 * (public) Function table size, sizeof(UTextFuncs) 1176 * Intended for use should the table grow to accomodate added 1177 * functions in the future, to allow tests for older format 1178 * function tables that do not contain the extensions. 1179 * 1180 * Fields are placed for optimal alignment on 1181 * 32/64/128-bit-pointer machines, by normally grouping together 1182 * 4 32-bit fields, 1183 * 4 pointers, 1184 * 2 64-bit fields 1185 * in sequence. 1186 * @stable ICU 3.6 1187 */ 1188 int32_t tableSize; 1189 1190 /** 1191 * (private) Alignment padding. 1192 * Do not use, reserved for use by the UText framework only. 1193 * @internal 1194 */ 1195 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; 1196 1197 1198 /** 1199 * (public) Function pointer for UTextClone 1200 * 1201 * @see UTextClone 1202 * @stable ICU 3.6 1203 */ 1204 UTextClone *clone; 1205 1206 /** 1207 * (public) function pointer for UTextLength 1208 * May be expensive to compute! 1209 * 1210 * @see UTextLength 1211 * @stable ICU 3.6 1212 */ 1213 UTextNativeLength *nativeLength; 1214 1215 /** 1216 * (public) Function pointer for UTextAccess. 1217 * 1218 * @see UTextAccess 1219 * @stable ICU 3.6 1220 */ 1221 UTextAccess *access; 1222 1223 /** 1224 * (public) Function pointer for UTextExtract. 1225 * 1226 * @see UTextExtract 1227 * @stable ICU 3.6 1228 */ 1229 UTextExtract *extract; 1230 1231 /** 1232 * (public) Function pointer for UTextReplace. 1233 * 1234 * @see UTextReplace 1235 * @stable ICU 3.6 1236 */ 1237 UTextReplace *replace; 1238 1239 /** 1240 * (public) Function pointer for UTextCopy. 1241 * 1242 * @see UTextCopy 1243 * @stable ICU 3.6 1244 */ 1245 UTextCopy *copy; 1246 1247 /** 1248 * (public) Function pointer for UTextMapOffsetToNative. 1249 * 1250 * @see UTextMapOffsetToNative 1251 * @stable ICU 3.6 1252 */ 1253 UTextMapOffsetToNative *mapOffsetToNative; 1254 1255 /** 1256 * (public) Function pointer for UTextMapNativeIndexToUTF16. 1257 * 1258 * @see UTextMapNativeIndexToUTF16 1259 * @stable ICU 3.6 1260 */ 1261 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; 1262 1263 /** 1264 * (public) Function pointer for UTextClose. 1265 * 1266 * @see UTextClose 1267 * @stable ICU 3.6 1268 */ 1269 UTextClose *close; 1270 1271 /** 1272 * (private) Spare function pointer 1273 * @internal 1274 */ 1275 1276 UTextClose *spare1; 1277 /** 1278 * (private) Spare function pointer 1279 * @internal 1280 */ 1281 UTextClose *spare2; 1282 1283 /** 1284 * (private) Spare function pointer 1285 * @internal 1286 */ 1287 UTextClose *spare3; 1288 1289 }; 1290 /** 1291 * Function dispatch table for UText 1292 * @see UTextFuncs 1293 */ 1294 typedef struct UTextFuncs UTextFuncs; 1295 1296 /** 1297 * UText struct. Provides the interface between the generic UText access code 1298 * and the UText provider code that works on specific kinds of 1299 * text (UTF-8, noncontiguous UTF-16, whatever.) 1300 * 1301 * Applications that are using predefined types of text providers 1302 * to pass text data to ICU services will have no need to view the 1303 * internals of the UText structs that they open. 1304 * 1305 * @stable ICU 3.6 1306 */ 1307 struct UText { 1308 /** 1309 * (private) Magic. Used to help detect when UText functions are handed 1310 * invalid or unitialized UText structs. 1311 * utext_openXYZ() functions take an initialized, 1312 * but not necessarily open, UText struct as an 1313 * optional fill-in parameter. This magic field 1314 * is used to check for that initialization. 1315 * Text provider close functions must NOT clear 1316 * the magic field because that would prevent 1317 * reuse of the UText struct. 1318 * @internal 1319 */ 1320 uint32_t magic; 1321 1322 1323 /** 1324 * (private) Flags for managing the allocation and freeing of 1325 * memory associated with this UText. 1326 * @internal 1327 */ 1328 int32_t flags; 1329 1330 1331 /** 1332 * Text provider properties. This set of flags is maintainted by the 1333 * text provider implementation. 1334 * @stable ICU 3.4 1335 */ 1336 int32_t providerProperties; 1337 1338 /** 1339 * (public) sizeOfStruct=sizeof(UText) 1340 * Allows possible backward compatible extension. 1341 * 1342 * @stable ICU 3.4 1343 */ 1344 int32_t sizeOfStruct; 1345 1346 /* ------ 16 byte alignment boundary ----------- */ 1347 1348 1349 /** 1350 * (protected) Native index of the first character position following 1351 * the current chunk. 1352 * @stable ICU 3.6 1353 */ 1354 int64_t chunkNativeLimit; 1355 1356 /** 1357 * (protected) Size in bytes of the extra space (pExtra). 1358 * @stable ICU 3.4 1359 */ 1360 int32_t extraSize; 1361 1362 /** 1363 * (protected) The highest chunk offset where native indexing and 1364 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value 1365 * will be equal to chunkLength. 1366 * 1367 * @stable ICU 3.6 1368 */ 1369 int32_t nativeIndexingLimit; 1370 1371 /* ---- 16 byte alignment boundary------ */ 1372 1373 /** 1374 * (protected) Native index of the first character in the text chunk. 1375 * @stable ICU 3.6 1376 */ 1377 int64_t chunkNativeStart; 1378 1379 /** 1380 * (protected) Current iteration position within the text chunk (UTF-16 buffer). 1381 * This is the index to the character that will be returned by utext_next32(). 1382 * @stable ICU 3.6 1383 */ 1384 int32_t chunkOffset; 1385 1386 /** 1387 * (protected) Length the text chunk (UTF-16 buffer), in UChars. 1388 * @stable ICU 3.6 1389 */ 1390 int32_t chunkLength; 1391 1392 /* ---- 16 byte alignment boundary-- */ 1393 1394 1395 /** 1396 * (protected) pointer to a chunk of text in UTF-16 format. 1397 * May refer either to original storage of the source of the text, or 1398 * if conversion was required, to a buffer owned by the UText. 1399 * @stable ICU 3.6 1400 */ 1401 const UChar *chunkContents; 1402 1403 /** 1404 * (public) Pointer to Dispatch table for accessing functions for this UText. 1405 * @stable ICU 3.6 1406 */ 1407 const UTextFuncs *pFuncs; 1408 1409 /** 1410 * (protected) Pointer to additional space requested by the 1411 * text provider during the utext_open operation. 1412 * @stable ICU 3.4 1413 */ 1414 void *pExtra; 1415 1416 /** 1417 * (protected) Pointer to string or text-containin object or similar. 1418 * This is the source of the text that this UText is wrapping, in a format 1419 * that is known to the text provider functions. 1420 * @stable ICU 3.4 1421 */ 1422 const void *context; 1423 1424 /* --- 16 byte alignment boundary--- */ 1425 1426 /** 1427 * (protected) Pointer fields available for use by the text provider. 1428 * Not used by UText common code. 1429 * @stable ICU 3.6 1430 */ 1431 const void *p; 1432 /** 1433 * (protected) Pointer fields available for use by the text provider. 1434 * Not used by UText common code. 1435 * @stable ICU 3.6 1436 */ 1437 const void *q; 1438 /** 1439 * (protected) Pointer fields available for use by the text provider. 1440 * Not used by UText common code. 1441 * @stable ICU 3.6 1442 */ 1443 const void *r; 1444 1445 /** 1446 * Private field reserved for future use by the UText framework 1447 * itself. This is not to be touched by the text providers. 1448 * @internal ICU 3.4 1449 */ 1450 void *privP; 1451 1452 1453 /* --- 16 byte alignment boundary--- */ 1454 1455 1456 /** 1457 * (protected) Integer field reserved for use by the text provider. 1458 * Not used by the UText framework, or by the client (user) of the UText. 1459 * @stable ICU 3.4 1460 */ 1461 int64_t a; 1462 1463 /** 1464 * (protected) Integer field reserved for use by the text provider. 1465 * Not used by the UText framework, or by the client (user) of the UText. 1466 * @stable ICU 3.4 1467 */ 1468 int32_t b; 1469 1470 /** 1471 * (protected) Integer field reserved for use by the text provider. 1472 * Not used by the UText framework, or by the client (user) of the UText. 1473 * @stable ICU 3.4 1474 */ 1475 int32_t c; 1476 1477 /* ---- 16 byte alignment boundary---- */ 1478 1479 1480 /** 1481 * Private field reserved for future use by the UText framework 1482 * itself. This is not to be touched by the text providers. 1483 * @internal ICU 3.4 1484 */ 1485 int64_t privA; 1486 /** 1487 * Private field reserved for future use by the UText framework 1488 * itself. This is not to be touched by the text providers. 1489 * @internal ICU 3.4 1490 */ 1491 int32_t privB; 1492 /** 1493 * Private field reserved for future use by the UText framework 1494 * itself. This is not to be touched by the text providers. 1495 * @internal ICU 3.4 1496 */ 1497 int32_t privC; 1498 }; 1499 1500 1501 /** 1502 * Common function for use by Text Provider implementations to allocate and/or initialize 1503 * a new UText struct. To be called in the implementation of utext_open() functions. 1504 * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. 1505 * If the supplied UText is already open, the provider's close function will be called 1506 * so that the struct can be reused by the open that is in progress. 1507 * 1508 * @param ut pointer to a UText struct to be re-used, or null if a new UText 1509 * should be allocated. 1510 * @param extraSpace The amount of additional space to be allocated as part 1511 * of this UText, for use by types of providers that require 1512 * additional storage. 1513 * @param status Errors are returned here. 1514 * @return pointer to the UText, allocated if necessary, with extra space set up if requested. 1515 * @stable ICU 3.4 1516 */ 1517 U_STABLE UText * U_EXPORT2 1518 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); 1519 1520 /** 1521 * @internal 1522 * Value used to help identify correctly initialized UText structs. 1523 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. 1524 */ 1525 enum { 1526 UTEXT_MAGIC = 0x345ad82c 1527 }; 1528 1529 /** 1530 * initializer to be used with local (stack) instances of a UText 1531 * struct. UText structs must be initialized before passing 1532 * them to one of the utext_open functions. 1533 * 1534 * @stable ICU 3.6 1535 */ 1536 #define UTEXT_INITIALIZER { \ 1537 UTEXT_MAGIC, /* magic */ \ 1538 0, /* flags */ \ 1539 0, /* providerProps */ \ 1540 sizeof(UText), /* sizeOfStruct */ \ 1541 0, /* chunkNativeLimit */ \ 1542 0, /* extraSize */ \ 1543 0, /* nativeIndexingLimit */ \ 1544 0, /* chunkNativeStart */ \ 1545 0, /* chunkOffset */ \ 1546 0, /* chunkLength */ \ 1547 NULL, /* chunkContents */ \ 1548 NULL, /* pFuncs */ \ 1549 NULL, /* pExtra */ \ 1550 NULL, /* context */ \ 1551 NULL, NULL, NULL, /* p, q, r */ \ 1552 NULL, /* privP */ \ 1553 0, 0, 0, /* a, b, c */ \ 1554 0, 0, 0 /* privA,B,C, */ \ 1555 } 1556 1557 1558 U_CDECL_END 1559 1560 1561 1562 #endif 1563