1 /* 2 ******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 1996-2015, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************** 7 */ 8 9 #ifndef NORMLZR_H 10 #define NORMLZR_H 11 12 #include "unicode/utypes.h" 13 14 /** 15 * \file 16 * \brief C++ API: Unicode Normalization 17 */ 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/chariter.h" 22 #include "unicode/normalizer2.h" 23 #include "unicode/unistr.h" 24 #include "unicode/unorm.h" 25 #include "unicode/uobject.h" 26 27 U_NAMESPACE_BEGIN 28 /** 29 * Old Unicode normalization API. 30 * 31 * This API has been replaced by the Normalizer2 class and is only available 32 * for backward compatibility. This class simply delegates to the Normalizer2 class. 33 * There is one exception: The new API does not provide a replacement for Normalizer::compare(). 34 * 35 * The Normalizer class supports the standard normalization forms described in 36 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> 37 * Unicode Standard Annex #15: Unicode Normalization Forms</a>. 38 * 39 * The Normalizer class consists of two parts: 40 * - static functions that normalize strings or test if strings are normalized 41 * - a Normalizer object is an iterator that takes any kind of text and 42 * provides iteration over its normalized form 43 * 44 * The Normalizer class is not suitable for subclassing. 45 * 46 * For basic information about normalization forms and details about the C API 47 * please see the documentation in unorm.h. 48 * 49 * The iterator API with the Normalizer constructors and the non-static functions 50 * use a CharacterIterator as input. It is possible to pass a string which 51 * is then internally wrapped in a CharacterIterator. 52 * The input text is not normalized all at once, but incrementally where needed 53 * (providing efficient random access). 54 * This allows to pass in a large text but spend only a small amount of time 55 * normalizing a small part of that text. 56 * However, if the entire text is normalized, then the iterator will be 57 * slower than normalizing the entire text at once and iterating over the result. 58 * A possible use of the Normalizer iterator is also to report an index into the 59 * original text that is close to where the normalized characters come from. 60 * 61 * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0. 62 * The earlier implementation reported the getIndex() inconsistently, 63 * and previous() could not be used after setIndex(), next(), first(), and current(). 64 * 65 * Normalizer allows to start normalizing from anywhere in the input text by 66 * calling setIndexOnly(), first(), or last(). 67 * Without calling any of these, the iterator will start at the beginning of the text. 68 * 69 * At any time, next() returns the next normalized code point (UChar32), 70 * with post-increment semantics (like CharacterIterator::next32PostInc()). 71 * previous() returns the previous normalized code point (UChar32), 72 * with pre-decrement semantics (like CharacterIterator::previous32()). 73 * 74 * current() returns the current code point 75 * (respectively the one at the newly set index) without moving 76 * the getIndex(). Note that if the text at the current position 77 * needs to be normalized, then these functions will do that. 78 * (This is why current() is not const.) 79 * It is more efficient to call setIndexOnly() instead, which does not 80 * normalize. 81 * 82 * getIndex() always refers to the position in the input text where the normalized 83 * code points are returned from. It does not always change with each returned 84 * code point. 85 * The code point that is returned from any of the functions 86 * corresponds to text at or after getIndex(), according to the 87 * function's iteration semantics (post-increment or pre-decrement). 88 * 89 * next() returns a code point from at or after the getIndex() 90 * from before the next() call. After the next() call, the getIndex() 91 * might have moved to where the next code point will be returned from 92 * (from a next() or current() call). 93 * This is semantically equivalent to array access with array[index++] 94 * (post-increment semantics). 95 * 96 * previous() returns a code point from at or after the getIndex() 97 * from after the previous() call. 98 * This is semantically equivalent to array access with array[--index] 99 * (pre-decrement semantics). 100 * 101 * Internally, the Normalizer iterator normalizes a small piece of text 102 * starting at the getIndex() and ending at a following "safe" index. 103 * The normalized results is stored in an internal string buffer, and 104 * the code points are iterated from there. 105 * With multiple iteration calls, this is repeated until the next piece 106 * of text needs to be normalized, and the getIndex() needs to be moved. 107 * 108 * The following "safe" index, the internal buffer, and the secondary 109 * iteration index into that buffer are not exposed on the API. 110 * This also means that it is currently not practical to return to 111 * a particular, arbitrary position in the text because one would need to 112 * know, and be able to set, in addition to the getIndex(), at least also the 113 * current index into the internal buffer. 114 * It is currently only possible to observe when getIndex() changes 115 * (with careful consideration of the iteration semantics), 116 * at which time the internal index will be 0. 117 * For example, if getIndex() is different after next() than before it, 118 * then the internal index is 0 and one can return to this getIndex() 119 * later with setIndexOnly(). 120 * 121 * Note: While the setIndex() and getIndex() refer to indices in the 122 * underlying Unicode input text, the next() and previous() methods 123 * iterate through characters in the normalized output. 124 * This means that there is not necessarily a one-to-one correspondence 125 * between characters returned by next() and previous() and the indices 126 * passed to and returned from setIndex() and getIndex(). 127 * It is for this reason that Normalizer does not implement the CharacterIterator interface. 128 * 129 * @author Laura Werner, Mark Davis, Markus Scherer 130 * @stable ICU 2.0 131 */ 132 class U_COMMON_API Normalizer : public UObject { 133 public: 134 #ifndef U_HIDE_DEPRECATED_API 135 /** 136 * If DONE is returned from an iteration function that returns a code point, 137 * then there are no more normalization results available. 138 * @deprecated ICU 56 Use Normalizer2 instead. 139 */ 140 enum { 141 DONE=0xffff 142 }; 143 144 // Constructors 145 146 /** 147 * Creates a new <code>Normalizer</code> object for iterating over the 148 * normalized form of a given string. 149 * <p> 150 * @param str The string to be normalized. The normalization 151 * will start at the beginning of the string. 152 * 153 * @param mode The normalization mode. 154 * @deprecated ICU 56 Use Normalizer2 instead. 155 */ 156 Normalizer(const UnicodeString& str, UNormalizationMode mode); 157 158 /** 159 * Creates a new <code>Normalizer</code> object for iterating over the 160 * normalized form of a given string. 161 * <p> 162 * @param str The string to be normalized. The normalization 163 * will start at the beginning of the string. 164 * 165 * @param length Length of the string, or -1 if NUL-terminated. 166 * @param mode The normalization mode. 167 * @deprecated ICU 56 Use Normalizer2 instead. 168 */ 169 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); 170 171 /** 172 * Creates a new <code>Normalizer</code> object for iterating over the 173 * normalized form of the given text. 174 * <p> 175 * @param iter The input text to be normalized. The normalization 176 * will start at the beginning of the string. 177 * 178 * @param mode The normalization mode. 179 * @deprecated ICU 56 Use Normalizer2 instead. 180 */ 181 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 182 183 /** 184 * Copy constructor. 185 * @param copy The object to be copied. 186 * @deprecated ICU 56 Use Normalizer2 instead. 187 */ 188 Normalizer(const Normalizer& copy); 189 #endif /* U_HIDE_DEPRECATED_API */ 190 191 /** 192 * Destructor 193 * @deprecated ICU 56 Use Normalizer2 instead. 194 */ 195 virtual ~Normalizer(); 196 197 198 //------------------------------------------------------------------------- 199 // Static utility methods 200 //------------------------------------------------------------------------- 201 202 #ifndef U_HIDE_DEPRECATED_API 203 /** 204 * Normalizes a <code>UnicodeString</code> according to the specified normalization mode. 205 * This is a wrapper for unorm_normalize(), using UnicodeString's. 206 * 207 * The <code>options</code> parameter specifies which optional 208 * <code>Normalizer</code> features are to be enabled for this operation. 209 * 210 * @param source the input string to be normalized. 211 * @param mode the normalization mode 212 * @param options the optional features to be enabled (0 for no options) 213 * @param result The normalized string (on output). 214 * @param status The error code. 215 * @deprecated ICU 56 Use Normalizer2 instead. 216 */ 217 static void U_EXPORT2 normalize(const UnicodeString& source, 218 UNormalizationMode mode, int32_t options, 219 UnicodeString& result, 220 UErrorCode &status); 221 222 /** 223 * Compose a <code>UnicodeString</code>. 224 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. 225 * This is a wrapper for unorm_normalize(), using UnicodeString's. 226 * 227 * The <code>options</code> parameter specifies which optional 228 * <code>Normalizer</code> features are to be enabled for this operation. 229 * 230 * @param source the string to be composed. 231 * @param compat Perform compatibility decomposition before composition. 232 * If this argument is <code>FALSE</code>, only canonical 233 * decomposition will be performed. 234 * @param options the optional features to be enabled (0 for no options) 235 * @param result The composed string (on output). 236 * @param status The error code. 237 * @deprecated ICU 56 Use Normalizer2 instead. 238 */ 239 static void U_EXPORT2 compose(const UnicodeString& source, 240 UBool compat, int32_t options, 241 UnicodeString& result, 242 UErrorCode &status); 243 244 /** 245 * Static method to decompose a <code>UnicodeString</code>. 246 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. 247 * This is a wrapper for unorm_normalize(), using UnicodeString's. 248 * 249 * The <code>options</code> parameter specifies which optional 250 * <code>Normalizer</code> features are to be enabled for this operation. 251 * 252 * @param source the string to be decomposed. 253 * @param compat Perform compatibility decomposition. 254 * If this argument is <code>FALSE</code>, only canonical 255 * decomposition will be performed. 256 * @param options the optional features to be enabled (0 for no options) 257 * @param result The decomposed string (on output). 258 * @param status The error code. 259 * @deprecated ICU 56 Use Normalizer2 instead. 260 */ 261 static void U_EXPORT2 decompose(const UnicodeString& source, 262 UBool compat, int32_t options, 263 UnicodeString& result, 264 UErrorCode &status); 265 266 /** 267 * Performing quick check on a string, to quickly determine if the string is 268 * in a particular normalization format. 269 * This is a wrapper for unorm_quickCheck(), using a UnicodeString. 270 * 271 * Three types of result can be returned UNORM_YES, UNORM_NO or 272 * UNORM_MAYBE. Result UNORM_YES indicates that the argument 273 * string is in the desired normalized format, UNORM_NO determines that 274 * argument string is not in the desired normalized format. A 275 * UNORM_MAYBE result indicates that a more thorough check is required, 276 * the user may have to put the string in its normalized form and compare the 277 * results. 278 * @param source string for determining if it is in a normalized format 279 * @param mode normalization format 280 * @param status A reference to a UErrorCode to receive any errors 281 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 282 * 283 * @see isNormalized 284 * @deprecated ICU 56 Use Normalizer2 instead. 285 */ 286 static inline UNormalizationCheckResult 287 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); 288 289 /** 290 * Performing quick check on a string; same as the other version of quickCheck 291 * but takes an extra options parameter like most normalization functions. 292 * 293 * @param source string for determining if it is in a normalized format 294 * @param mode normalization format 295 * @param options the optional features to be enabled (0 for no options) 296 * @param status A reference to a UErrorCode to receive any errors 297 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 298 * 299 * @see isNormalized 300 * @deprecated ICU 56 Use Normalizer2 instead. 301 */ 302 static UNormalizationCheckResult 303 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); 304 305 /** 306 * Test if a string is in a given normalization form. 307 * This is semantically equivalent to source.equals(normalize(source, mode)) . 308 * 309 * Unlike unorm_quickCheck(), this function returns a definitive result, 310 * never a "maybe". 311 * For NFD, NFKD, and FCD, both functions work exactly the same. 312 * For NFC and NFKC where quickCheck may return "maybe", this function will 313 * perform further tests to arrive at a TRUE/FALSE result. 314 * 315 * @param src String that is to be tested if it is in a normalization format. 316 * @param mode Which normalization form to test for. 317 * @param errorCode ICU error code in/out parameter. 318 * Must fulfill U_SUCCESS before the function call. 319 * @return Boolean value indicating whether the source string is in the 320 * "mode" normalization form. 321 * 322 * @see quickCheck 323 * @deprecated ICU 56 Use Normalizer2 instead. 324 */ 325 static inline UBool 326 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); 327 328 /** 329 * Test if a string is in a given normalization form; same as the other version of isNormalized 330 * but takes an extra options parameter like most normalization functions. 331 * 332 * @param src String that is to be tested if it is in a normalization format. 333 * @param mode Which normalization form to test for. 334 * @param options the optional features to be enabled (0 for no options) 335 * @param errorCode ICU error code in/out parameter. 336 * Must fulfill U_SUCCESS before the function call. 337 * @return Boolean value indicating whether the source string is in the 338 * "mode" normalization form. 339 * 340 * @see quickCheck 341 * @deprecated ICU 56 Use Normalizer2 instead. 342 */ 343 static UBool 344 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); 345 346 /** 347 * Concatenate normalized strings, making sure that the result is normalized as well. 348 * 349 * If both the left and the right strings are in 350 * the normalization form according to "mode/options", 351 * then the result will be 352 * 353 * \code 354 * dest=normalize(left+right, mode, options) 355 * \endcode 356 * 357 * For details see unorm_concatenate in unorm.h. 358 * 359 * @param left Left source string. 360 * @param right Right source string. 361 * @param result The output string. 362 * @param mode The normalization mode. 363 * @param options A bit set of normalization options. 364 * @param errorCode ICU error code in/out parameter. 365 * Must fulfill U_SUCCESS before the function call. 366 * @return result 367 * 368 * @see unorm_concatenate 369 * @see normalize 370 * @see unorm_next 371 * @see unorm_previous 372 * 373 * @deprecated ICU 56 Use Normalizer2 instead. 374 */ 375 static UnicodeString & 376 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, 377 UnicodeString &result, 378 UNormalizationMode mode, int32_t options, 379 UErrorCode &errorCode); 380 #endif /* U_HIDE_DEPRECATED_API */ 381 382 /** 383 * Compare two strings for canonical equivalence. 384 * Further options include case-insensitive comparison and 385 * code point order (as opposed to code unit order). 386 * 387 * Canonical equivalence between two strings is defined as their normalized 388 * forms (NFD or NFC) being identical. 389 * This function compares strings incrementally instead of normalizing 390 * (and optionally case-folding) both strings entirely, 391 * improving performance significantly. 392 * 393 * Bulk normalization is only necessary if the strings do not fulfill the FCD 394 * conditions. Only in this case, and only if the strings are relatively long, 395 * is memory allocated temporarily. 396 * For FCD strings and short non-FCD strings there is no memory allocation. 397 * 398 * Semantically, this is equivalent to 399 * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) 400 * where code point order and foldCase are all optional. 401 * 402 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 403 * the case folding must be performed first, then the normalization. 404 * 405 * @param s1 First source string. 406 * @param s2 Second source string. 407 * 408 * @param options A bit set of options: 409 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 410 * Case-sensitive comparison in code unit order, and the input strings 411 * are quick-checked for FCD. 412 * 413 * - UNORM_INPUT_IS_FCD 414 * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 415 * If not set, the function will quickCheck for FCD 416 * and normalize if necessary. 417 * 418 * - U_COMPARE_CODE_POINT_ORDER 419 * Set to choose code point order instead of code unit order 420 * (see u_strCompare for details). 421 * 422 * - U_COMPARE_IGNORE_CASE 423 * Set to compare strings case-insensitively using case folding, 424 * instead of case-sensitively. 425 * If set, then the following case folding options are used. 426 * 427 * - Options as used with case-insensitive comparisons, currently: 428 * 429 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 430 * (see u_strCaseCompare for details) 431 * 432 * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 433 * 434 * @param errorCode ICU error code in/out parameter. 435 * Must fulfill U_SUCCESS before the function call. 436 * @return <0 or 0 or >0 as usual for string comparisons 437 * 438 * @see unorm_compare 439 * @see normalize 440 * @see UNORM_FCD 441 * @see u_strCompare 442 * @see u_strCaseCompare 443 * 444 * @stable ICU 2.2 445 */ 446 static inline int32_t 447 compare(const UnicodeString &s1, const UnicodeString &s2, 448 uint32_t options, 449 UErrorCode &errorCode); 450 451 #ifndef U_HIDE_DEPRECATED_API 452 //------------------------------------------------------------------------- 453 // Iteration API 454 //------------------------------------------------------------------------- 455 456 /** 457 * Return the current character in the normalized text. 458 * current() may need to normalize some text at getIndex(). 459 * The getIndex() is not changed. 460 * 461 * @return the current normalized code point 462 * @deprecated ICU 56 Use Normalizer2 instead. 463 */ 464 UChar32 current(void); 465 466 /** 467 * Return the first character in the normalized text. 468 * This is equivalent to setIndexOnly(startIndex()) followed by next(). 469 * (Post-increment semantics.) 470 * 471 * @return the first normalized code point 472 * @deprecated ICU 56 Use Normalizer2 instead. 473 */ 474 UChar32 first(void); 475 476 /** 477 * Return the last character in the normalized text. 478 * This is equivalent to setIndexOnly(endIndex()) followed by previous(). 479 * (Pre-decrement semantics.) 480 * 481 * @return the last normalized code point 482 * @deprecated ICU 56 Use Normalizer2 instead. 483 */ 484 UChar32 last(void); 485 486 /** 487 * Return the next character in the normalized text. 488 * (Post-increment semantics.) 489 * If the end of the text has already been reached, DONE is returned. 490 * The DONE value could be confused with a U+FFFF non-character code point 491 * in the text. If this is possible, you can test getIndex()<endIndex() 492 * before calling next(), or (getIndex()<endIndex() || last()!=DONE) 493 * after calling next(). (Calling last() will change the iterator state!) 494 * 495 * The C API unorm_next() is more efficient and does not have this ambiguity. 496 * 497 * @return the next normalized code point 498 * @deprecated ICU 56 Use Normalizer2 instead. 499 */ 500 UChar32 next(void); 501 502 /** 503 * Return the previous character in the normalized text and decrement. 504 * (Pre-decrement semantics.) 505 * If the beginning of the text has already been reached, DONE is returned. 506 * The DONE value could be confused with a U+FFFF non-character code point 507 * in the text. If this is possible, you can test 508 * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change 509 * the iterator state!) 510 * 511 * The C API unorm_previous() is more efficient and does not have this ambiguity. 512 * 513 * @return the previous normalized code point 514 * @deprecated ICU 56 Use Normalizer2 instead. 515 */ 516 UChar32 previous(void); 517 518 /** 519 * Set the iteration position in the input text that is being normalized, 520 * without any immediate normalization. 521 * After setIndexOnly(), getIndex() will return the same index that is 522 * specified here. 523 * 524 * @param index the desired index in the input text. 525 * @deprecated ICU 56 Use Normalizer2 instead. 526 */ 527 void setIndexOnly(int32_t index); 528 529 /** 530 * Reset the index to the beginning of the text. 531 * This is equivalent to setIndexOnly(startIndex)). 532 * @deprecated ICU 56 Use Normalizer2 instead. 533 */ 534 void reset(void); 535 536 /** 537 * Retrieve the current iteration position in the input text that is 538 * being normalized. 539 * 540 * A following call to next() will return a normalized code point from 541 * the input text at or after this index. 542 * 543 * After a call to previous(), getIndex() will point at or before the 544 * position in the input text where the normalized code point 545 * was returned from with previous(). 546 * 547 * @return the current index in the input text 548 * @deprecated ICU 56 Use Normalizer2 instead. 549 */ 550 int32_t getIndex(void) const; 551 552 /** 553 * Retrieve the index of the start of the input text. This is the begin index 554 * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string 555 * over which this <code>Normalizer</code> is iterating. 556 * 557 * @return the smallest index in the input text where the Normalizer operates 558 * @deprecated ICU 56 Use Normalizer2 instead. 559 */ 560 int32_t startIndex(void) const; 561 562 /** 563 * Retrieve the index of the end of the input text. This is the end index 564 * of the <code>CharacterIterator</code> or the length of the string 565 * over which this <code>Normalizer</code> is iterating. 566 * This end index is exclusive, i.e., the Normalizer operates only on characters 567 * before this index. 568 * 569 * @return the first index in the input text where the Normalizer does not operate 570 * @deprecated ICU 56 Use Normalizer2 instead. 571 */ 572 int32_t endIndex(void) const; 573 574 /** 575 * Returns TRUE when both iterators refer to the same character in the same 576 * input text. 577 * 578 * @param that a Normalizer object to compare this one to 579 * @return comparison result 580 * @deprecated ICU 56 Use Normalizer2 instead. 581 */ 582 UBool operator==(const Normalizer& that) const; 583 584 /** 585 * Returns FALSE when both iterators refer to the same character in the same 586 * input text. 587 * 588 * @param that a Normalizer object to compare this one to 589 * @return comparison result 590 * @deprecated ICU 56 Use Normalizer2 instead. 591 */ 592 inline UBool operator!=(const Normalizer& that) const; 593 594 /** 595 * Returns a pointer to a new Normalizer that is a clone of this one. 596 * The caller is responsible for deleting the new clone. 597 * @return a pointer to a new Normalizer 598 * @deprecated ICU 56 Use Normalizer2 instead. 599 */ 600 Normalizer* clone(void) const; 601 602 /** 603 * Generates a hash code for this iterator. 604 * 605 * @return the hash code 606 * @deprecated ICU 56 Use Normalizer2 instead. 607 */ 608 int32_t hashCode(void) const; 609 610 //------------------------------------------------------------------------- 611 // Property access methods 612 //------------------------------------------------------------------------- 613 614 /** 615 * Set the normalization mode for this object. 616 * <p> 617 * <b>Note:</b>If the normalization mode is changed while iterating 618 * over a string, calls to {@link #next() } and {@link #previous() } may 619 * return previously buffers characters in the old normalization mode 620 * until the iteration is able to re-sync at the next base character. 621 * It is safest to call {@link #setIndexOnly }, {@link #reset() }, 622 * {@link #setText }, {@link #first() }, 623 * {@link #last() }, etc. after calling <code>setMode</code>. 624 * <p> 625 * @param newMode the new mode for this <code>Normalizer</code>. 626 * @see #getUMode 627 * @deprecated ICU 56 Use Normalizer2 instead. 628 */ 629 void setMode(UNormalizationMode newMode); 630 631 /** 632 * Return the normalization mode for this object. 633 * 634 * This is an unusual name because there used to be a getMode() that 635 * returned a different type. 636 * 637 * @return the mode for this <code>Normalizer</code> 638 * @see #setMode 639 * @deprecated ICU 56 Use Normalizer2 instead. 640 */ 641 UNormalizationMode getUMode(void) const; 642 643 /** 644 * Set options that affect this <code>Normalizer</code>'s operation. 645 * Options do not change the basic composition or decomposition operation 646 * that is being performed, but they control whether 647 * certain optional portions of the operation are done. 648 * Currently the only available option is obsolete. 649 * 650 * It is possible to specify multiple options that are all turned on or off. 651 * 652 * @param option the option(s) whose value is/are to be set. 653 * @param value the new setting for the option. Use <code>TRUE</code> to 654 * turn the option(s) on and <code>FALSE</code> to turn it/them off. 655 * 656 * @see #getOption 657 * @deprecated ICU 56 Use Normalizer2 instead. 658 */ 659 void setOption(int32_t option, 660 UBool value); 661 662 /** 663 * Determine whether an option is turned on or off. 664 * If multiple options are specified, then the result is TRUE if any 665 * of them are set. 666 * <p> 667 * @param option the option(s) that are to be checked 668 * @return TRUE if any of the option(s) are set 669 * @see #setOption 670 * @deprecated ICU 56 Use Normalizer2 instead. 671 */ 672 UBool getOption(int32_t option) const; 673 674 /** 675 * Set the input text over which this <code>Normalizer</code> will iterate. 676 * The iteration position is set to the beginning. 677 * 678 * @param newText a string that replaces the current input text 679 * @param status a UErrorCode 680 * @deprecated ICU 56 Use Normalizer2 instead. 681 */ 682 void setText(const UnicodeString& newText, 683 UErrorCode &status); 684 685 /** 686 * Set the input text over which this <code>Normalizer</code> will iterate. 687 * The iteration position is set to the beginning. 688 * 689 * @param newText a CharacterIterator object that replaces the current input text 690 * @param status a UErrorCode 691 * @deprecated ICU 56 Use Normalizer2 instead. 692 */ 693 void setText(const CharacterIterator& newText, 694 UErrorCode &status); 695 696 /** 697 * Set the input text over which this <code>Normalizer</code> will iterate. 698 * The iteration position is set to the beginning. 699 * 700 * @param newText a string that replaces the current input text 701 * @param length the length of the string, or -1 if NUL-terminated 702 * @param status a UErrorCode 703 * @deprecated ICU 56 Use Normalizer2 instead. 704 */ 705 void setText(const UChar* newText, 706 int32_t length, 707 UErrorCode &status); 708 /** 709 * Copies the input text into the UnicodeString argument. 710 * 711 * @param result Receives a copy of the text under iteration. 712 * @deprecated ICU 56 Use Normalizer2 instead. 713 */ 714 void getText(UnicodeString& result); 715 716 /** 717 * ICU "poor man's RTTI", returns a UClassID for this class. 718 * @returns a UClassID for this class. 719 * @deprecated ICU 56 Use Normalizer2 instead. 720 */ 721 static UClassID U_EXPORT2 getStaticClassID(); 722 #endif /* U_HIDE_DEPRECATED_API */ 723 724 /** 725 * ICU "poor man's RTTI", returns a UClassID for the actual class. 726 * @return a UClassID for the actual class. 727 * @deprecated ICU 56 Use Normalizer2 instead. 728 */ 729 virtual UClassID getDynamicClassID() const; 730 731 private: 732 //------------------------------------------------------------------------- 733 // Private functions 734 //------------------------------------------------------------------------- 735 736 Normalizer(); // default constructor not implemented 737 Normalizer &operator=(const Normalizer &that); // assignment operator not implemented 738 739 // Private utility methods for iteration 740 // For documentation, see the source code 741 UBool nextNormalize(); 742 UBool previousNormalize(); 743 744 void init(); 745 void clearBuffer(void); 746 747 //------------------------------------------------------------------------- 748 // Private data 749 //------------------------------------------------------------------------- 750 751 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL 752 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 753 #ifndef U_HIDE_DEPRECATED_API 754 UNormalizationMode fUMode; 755 #endif /* U_HIDE_DEPRECATED_API */ 756 int32_t fOptions; 757 758 // The input text and our position in it 759 CharacterIterator *text; 760 761 // The normalization buffer is the result of normalization 762 // of the source in [currentIndex..nextIndex[ . 763 int32_t currentIndex, nextIndex; 764 765 // A buffer for holding intermediate results 766 UnicodeString buffer; 767 int32_t bufferPos; 768 }; 769 770 //------------------------------------------------------------------------- 771 // Inline implementations 772 //------------------------------------------------------------------------- 773 774 #ifndef U_HIDE_DEPRECATED_API 775 inline UBool 776 Normalizer::operator!= (const Normalizer& other) const 777 { return ! operator==(other); } 778 779 inline UNormalizationCheckResult 780 Normalizer::quickCheck(const UnicodeString& source, 781 UNormalizationMode mode, 782 UErrorCode &status) { 783 return quickCheck(source, mode, 0, status); 784 } 785 786 inline UBool 787 Normalizer::isNormalized(const UnicodeString& source, 788 UNormalizationMode mode, 789 UErrorCode &status) { 790 return isNormalized(source, mode, 0, status); 791 } 792 #endif /* U_HIDE_DEPRECATED_API */ 793 794 inline int32_t 795 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 796 uint32_t options, 797 UErrorCode &errorCode) { 798 // all argument checking is done in unorm_compare 799 return unorm_compare(s1.getBuffer(), s1.length(), 800 s2.getBuffer(), s2.length(), 801 options, 802 &errorCode); 803 } 804 805 U_NAMESPACE_END 806 807 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 808 809 #endif // NORMLZR_H 810