1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************** 5 * COPYRIGHT: 6 * Copyright (c) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************** 9 */ 10 11 #ifndef NORMLZR_H 12 #define NORMLZR_H 13 14 #include "unicode/utypes.h" 15 16 /** 17 * \file 18 * \brief C++ API: Unicode Normalization 19 */ 20 21 #if !UCONFIG_NO_NORMALIZATION 22 23 #include "unicode/chariter.h" 24 #include "unicode/normalizer2.h" 25 #include "unicode/unistr.h" 26 #include "unicode/unorm.h" 27 #include "unicode/uobject.h" 28 29 U_NAMESPACE_BEGIN 30 /** 31 * Old Unicode normalization API. 32 * 33 * This API has been replaced by the Normalizer2 class and is only available 34 * for backward compatibility. This class simply delegates to the Normalizer2 class. 35 * There is one exception: The new API does not provide a replacement for Normalizer::compare(). 36 * 37 * The Normalizer class supports the standard normalization forms described in 38 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> 39 * Unicode Standard Annex #15: Unicode Normalization Forms</a>. 40 * 41 * The Normalizer class consists of two parts: 42 * - static functions that normalize strings or test if strings are normalized 43 * - a Normalizer object is an iterator that takes any kind of text and 44 * provides iteration over its normalized form 45 * 46 * The Normalizer class is not suitable for subclassing. 47 * 48 * For basic information about normalization forms and details about the C API 49 * please see the documentation in unorm.h. 50 * 51 * The iterator API with the Normalizer constructors and the non-static functions 52 * use a CharacterIterator as input. It is possible to pass a string which 53 * is then internally wrapped in a CharacterIterator. 54 * The input text is not normalized all at once, but incrementally where needed 55 * (providing efficient random access). 56 * This allows to pass in a large text but spend only a small amount of time 57 * normalizing a small part of that text. 58 * However, if the entire text is normalized, then the iterator will be 59 * slower than normalizing the entire text at once and iterating over the result. 60 * A possible use of the Normalizer iterator is also to report an index into the 61 * original text that is close to where the normalized characters come from. 62 * 63 * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0. 64 * The earlier implementation reported the getIndex() inconsistently, 65 * and previous() could not be used after setIndex(), next(), first(), and current(). 66 * 67 * Normalizer allows to start normalizing from anywhere in the input text by 68 * calling setIndexOnly(), first(), or last(). 69 * Without calling any of these, the iterator will start at the beginning of the text. 70 * 71 * At any time, next() returns the next normalized code point (UChar32), 72 * with post-increment semantics (like CharacterIterator::next32PostInc()). 73 * previous() returns the previous normalized code point (UChar32), 74 * with pre-decrement semantics (like CharacterIterator::previous32()). 75 * 76 * current() returns the current code point 77 * (respectively the one at the newly set index) without moving 78 * the getIndex(). Note that if the text at the current position 79 * needs to be normalized, then these functions will do that. 80 * (This is why current() is not const.) 81 * It is more efficient to call setIndexOnly() instead, which does not 82 * normalize. 83 * 84 * getIndex() always refers to the position in the input text where the normalized 85 * code points are returned from. It does not always change with each returned 86 * code point. 87 * The code point that is returned from any of the functions 88 * corresponds to text at or after getIndex(), according to the 89 * function's iteration semantics (post-increment or pre-decrement). 90 * 91 * next() returns a code point from at or after the getIndex() 92 * from before the next() call. After the next() call, the getIndex() 93 * might have moved to where the next code point will be returned from 94 * (from a next() or current() call). 95 * This is semantically equivalent to array access with array[index++] 96 * (post-increment semantics). 97 * 98 * previous() returns a code point from at or after the getIndex() 99 * from after the previous() call. 100 * This is semantically equivalent to array access with array[--index] 101 * (pre-decrement semantics). 102 * 103 * Internally, the Normalizer iterator normalizes a small piece of text 104 * starting at the getIndex() and ending at a following "safe" index. 105 * The normalized results is stored in an internal string buffer, and 106 * the code points are iterated from there. 107 * With multiple iteration calls, this is repeated until the next piece 108 * of text needs to be normalized, and the getIndex() needs to be moved. 109 * 110 * The following "safe" index, the internal buffer, and the secondary 111 * iteration index into that buffer are not exposed on the API. 112 * This also means that it is currently not practical to return to 113 * a particular, arbitrary position in the text because one would need to 114 * know, and be able to set, in addition to the getIndex(), at least also the 115 * current index into the internal buffer. 116 * It is currently only possible to observe when getIndex() changes 117 * (with careful consideration of the iteration semantics), 118 * at which time the internal index will be 0. 119 * For example, if getIndex() is different after next() than before it, 120 * then the internal index is 0 and one can return to this getIndex() 121 * later with setIndexOnly(). 122 * 123 * Note: While the setIndex() and getIndex() refer to indices in the 124 * underlying Unicode input text, the next() and previous() methods 125 * iterate through characters in the normalized output. 126 * This means that there is not necessarily a one-to-one correspondence 127 * between characters returned by next() and previous() and the indices 128 * passed to and returned from setIndex() and getIndex(). 129 * It is for this reason that Normalizer does not implement the CharacterIterator interface. 130 * 131 * @author Laura Werner, Mark Davis, Markus Scherer 132 * @stable ICU 2.0 133 */ 134 class U_COMMON_API Normalizer : public UObject { 135 public: 136 #ifndef U_HIDE_DEPRECATED_API 137 /** 138 * If DONE is returned from an iteration function that returns a code point, 139 * then there are no more normalization results available. 140 * @deprecated ICU 56 Use Normalizer2 instead. 141 */ 142 enum { 143 DONE=0xffff 144 }; 145 146 // Constructors 147 148 /** 149 * Creates a new <code>Normalizer</code> object for iterating over the 150 * normalized form of a given string. 151 * <p> 152 * @param str The string to be normalized. The normalization 153 * will start at the beginning of the string. 154 * 155 * @param mode The normalization mode. 156 * @deprecated ICU 56 Use Normalizer2 instead. 157 */ 158 Normalizer(const UnicodeString& str, UNormalizationMode mode); 159 160 /** 161 * Creates a new <code>Normalizer</code> object for iterating over the 162 * normalized form of a given string. 163 * <p> 164 * @param str The string to be normalized. The normalization 165 * will start at the beginning of the string. 166 * 167 * @param length Length of the string, or -1 if NUL-terminated. 168 * @param mode The normalization mode. 169 * @deprecated ICU 56 Use Normalizer2 instead. 170 */ 171 Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode); 172 173 /** 174 * Creates a new <code>Normalizer</code> object for iterating over the 175 * normalized form of the given text. 176 * <p> 177 * @param iter The input text to be normalized. The normalization 178 * will start at the beginning of the string. 179 * 180 * @param mode The normalization mode. 181 * @deprecated ICU 56 Use Normalizer2 instead. 182 */ 183 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 184 #endif /* U_HIDE_DEPRECATED_API */ 185 186 /** 187 * Copy constructor. 188 * @param copy The object to be copied. 189 * @deprecated ICU 56 Use Normalizer2 instead. 190 */ 191 Normalizer(const Normalizer& copy); 192 193 /** 194 * Destructor 195 * @deprecated ICU 56 Use Normalizer2 instead. 196 */ 197 virtual ~Normalizer(); 198 199 200 //------------------------------------------------------------------------- 201 // Static utility methods 202 //------------------------------------------------------------------------- 203 204 #ifndef U_HIDE_DEPRECATED_API 205 /** 206 * Normalizes a <code>UnicodeString</code> according to the specified normalization mode. 207 * This is a wrapper for unorm_normalize(), using UnicodeString's. 208 * 209 * The <code>options</code> parameter specifies which optional 210 * <code>Normalizer</code> features are to be enabled for this operation. 211 * 212 * @param source the input string to be normalized. 213 * @param mode the normalization mode 214 * @param options the optional features to be enabled (0 for no options) 215 * @param result The normalized string (on output). 216 * @param status The error code. 217 * @deprecated ICU 56 Use Normalizer2 instead. 218 */ 219 static void U_EXPORT2 normalize(const UnicodeString& source, 220 UNormalizationMode mode, int32_t options, 221 UnicodeString& result, 222 UErrorCode &status); 223 224 /** 225 * Compose a <code>UnicodeString</code>. 226 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. 227 * This is a wrapper for unorm_normalize(), using UnicodeString's. 228 * 229 * The <code>options</code> parameter specifies which optional 230 * <code>Normalizer</code> features are to be enabled for this operation. 231 * 232 * @param source the string to be composed. 233 * @param compat Perform compatibility decomposition before composition. 234 * If this argument is <code>FALSE</code>, only canonical 235 * decomposition will be performed. 236 * @param options the optional features to be enabled (0 for no options) 237 * @param result The composed string (on output). 238 * @param status The error code. 239 * @deprecated ICU 56 Use Normalizer2 instead. 240 */ 241 static void U_EXPORT2 compose(const UnicodeString& source, 242 UBool compat, int32_t options, 243 UnicodeString& result, 244 UErrorCode &status); 245 246 /** 247 * Static method to decompose a <code>UnicodeString</code>. 248 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. 249 * This is a wrapper for unorm_normalize(), using UnicodeString's. 250 * 251 * The <code>options</code> parameter specifies which optional 252 * <code>Normalizer</code> features are to be enabled for this operation. 253 * 254 * @param source the string to be decomposed. 255 * @param compat Perform compatibility decomposition. 256 * If this argument is <code>FALSE</code>, only canonical 257 * decomposition will be performed. 258 * @param options the optional features to be enabled (0 for no options) 259 * @param result The decomposed string (on output). 260 * @param status The error code. 261 * @deprecated ICU 56 Use Normalizer2 instead. 262 */ 263 static void U_EXPORT2 decompose(const UnicodeString& source, 264 UBool compat, int32_t options, 265 UnicodeString& result, 266 UErrorCode &status); 267 268 /** 269 * Performing quick check on a string, to quickly determine if the string is 270 * in a particular normalization format. 271 * This is a wrapper for unorm_quickCheck(), using a UnicodeString. 272 * 273 * Three types of result can be returned UNORM_YES, UNORM_NO or 274 * UNORM_MAYBE. Result UNORM_YES indicates that the argument 275 * string is in the desired normalized format, UNORM_NO determines that 276 * argument string is not in the desired normalized format. A 277 * UNORM_MAYBE result indicates that a more thorough check is required, 278 * the user may have to put the string in its normalized form and compare the 279 * results. 280 * @param source string for determining if it is in a normalized format 281 * @param mode normalization format 282 * @param status A reference to a UErrorCode to receive any errors 283 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 284 * 285 * @see isNormalized 286 * @deprecated ICU 56 Use Normalizer2 instead. 287 */ 288 static inline UNormalizationCheckResult 289 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); 290 291 /** 292 * Performing quick check on a string; same as the other version of quickCheck 293 * but takes an extra options parameter like most normalization functions. 294 * 295 * @param source string for determining if it is in a normalized format 296 * @param mode normalization format 297 * @param options the optional features to be enabled (0 for no options) 298 * @param status A reference to a UErrorCode to receive any errors 299 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 300 * 301 * @see isNormalized 302 * @deprecated ICU 56 Use Normalizer2 instead. 303 */ 304 static UNormalizationCheckResult 305 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); 306 307 /** 308 * Test if a string is in a given normalization form. 309 * This is semantically equivalent to source.equals(normalize(source, mode)) . 310 * 311 * Unlike unorm_quickCheck(), this function returns a definitive result, 312 * never a "maybe". 313 * For NFD, NFKD, and FCD, both functions work exactly the same. 314 * For NFC and NFKC where quickCheck may return "maybe", this function will 315 * perform further tests to arrive at a TRUE/FALSE result. 316 * 317 * @param src String that is to be tested if it is in a normalization format. 318 * @param mode Which normalization form to test for. 319 * @param errorCode ICU error code in/out parameter. 320 * Must fulfill U_SUCCESS before the function call. 321 * @return Boolean value indicating whether the source string is in the 322 * "mode" normalization form. 323 * 324 * @see quickCheck 325 * @deprecated ICU 56 Use Normalizer2 instead. 326 */ 327 static inline UBool 328 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); 329 330 /** 331 * Test if a string is in a given normalization form; same as the other version of isNormalized 332 * but takes an extra options parameter like most normalization functions. 333 * 334 * @param src String that is to be tested if it is in a normalization format. 335 * @param mode Which normalization form to test for. 336 * @param options the optional features to be enabled (0 for no options) 337 * @param errorCode ICU error code in/out parameter. 338 * Must fulfill U_SUCCESS before the function call. 339 * @return Boolean value indicating whether the source string is in the 340 * "mode" normalization form. 341 * 342 * @see quickCheck 343 * @deprecated ICU 56 Use Normalizer2 instead. 344 */ 345 static UBool 346 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); 347 348 /** 349 * Concatenate normalized strings, making sure that the result is normalized as well. 350 * 351 * If both the left and the right strings are in 352 * the normalization form according to "mode/options", 353 * then the result will be 354 * 355 * \code 356 * dest=normalize(left+right, mode, options) 357 * \endcode 358 * 359 * For details see unorm_concatenate in unorm.h. 360 * 361 * @param left Left source string. 362 * @param right Right source string. 363 * @param result The output string. 364 * @param mode The normalization mode. 365 * @param options A bit set of normalization options. 366 * @param errorCode ICU error code in/out parameter. 367 * Must fulfill U_SUCCESS before the function call. 368 * @return result 369 * 370 * @see unorm_concatenate 371 * @see normalize 372 * @see unorm_next 373 * @see unorm_previous 374 * 375 * @deprecated ICU 56 Use Normalizer2 instead. 376 */ 377 static UnicodeString & 378 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, 379 UnicodeString &result, 380 UNormalizationMode mode, int32_t options, 381 UErrorCode &errorCode); 382 #endif /* U_HIDE_DEPRECATED_API */ 383 384 /** 385 * Compare two strings for canonical equivalence. 386 * Further options include case-insensitive comparison and 387 * code point order (as opposed to code unit order). 388 * 389 * Canonical equivalence between two strings is defined as their normalized 390 * forms (NFD or NFC) being identical. 391 * This function compares strings incrementally instead of normalizing 392 * (and optionally case-folding) both strings entirely, 393 * improving performance significantly. 394 * 395 * Bulk normalization is only necessary if the strings do not fulfill the FCD 396 * conditions. Only in this case, and only if the strings are relatively long, 397 * is memory allocated temporarily. 398 * For FCD strings and short non-FCD strings there is no memory allocation. 399 * 400 * Semantically, this is equivalent to 401 * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) 402 * where code point order and foldCase are all optional. 403 * 404 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 405 * the case folding must be performed first, then the normalization. 406 * 407 * @param s1 First source string. 408 * @param s2 Second source string. 409 * 410 * @param options A bit set of options: 411 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 412 * Case-sensitive comparison in code unit order, and the input strings 413 * are quick-checked for FCD. 414 * 415 * - UNORM_INPUT_IS_FCD 416 * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 417 * If not set, the function will quickCheck for FCD 418 * and normalize if necessary. 419 * 420 * - U_COMPARE_CODE_POINT_ORDER 421 * Set to choose code point order instead of code unit order 422 * (see u_strCompare for details). 423 * 424 * - U_COMPARE_IGNORE_CASE 425 * Set to compare strings case-insensitively using case folding, 426 * instead of case-sensitively. 427 * If set, then the following case folding options are used. 428 * 429 * - Options as used with case-insensitive comparisons, currently: 430 * 431 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 432 * (see u_strCaseCompare for details) 433 * 434 * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 435 * 436 * @param errorCode ICU error code in/out parameter. 437 * Must fulfill U_SUCCESS before the function call. 438 * @return <0 or 0 or >0 as usual for string comparisons 439 * 440 * @see unorm_compare 441 * @see normalize 442 * @see UNORM_FCD 443 * @see u_strCompare 444 * @see u_strCaseCompare 445 * 446 * @stable ICU 2.2 447 */ 448 static inline int32_t 449 compare(const UnicodeString &s1, const UnicodeString &s2, 450 uint32_t options, 451 UErrorCode &errorCode); 452 453 #ifndef U_HIDE_DEPRECATED_API 454 //------------------------------------------------------------------------- 455 // Iteration API 456 //------------------------------------------------------------------------- 457 458 /** 459 * Return the current character in the normalized text. 460 * current() may need to normalize some text at getIndex(). 461 * The getIndex() is not changed. 462 * 463 * @return the current normalized code point 464 * @deprecated ICU 56 Use Normalizer2 instead. 465 */ 466 UChar32 current(void); 467 468 /** 469 * Return the first character in the normalized text. 470 * This is equivalent to setIndexOnly(startIndex()) followed by next(). 471 * (Post-increment semantics.) 472 * 473 * @return the first normalized code point 474 * @deprecated ICU 56 Use Normalizer2 instead. 475 */ 476 UChar32 first(void); 477 478 /** 479 * Return the last character in the normalized text. 480 * This is equivalent to setIndexOnly(endIndex()) followed by previous(). 481 * (Pre-decrement semantics.) 482 * 483 * @return the last normalized code point 484 * @deprecated ICU 56 Use Normalizer2 instead. 485 */ 486 UChar32 last(void); 487 488 /** 489 * Return the next character in the normalized text. 490 * (Post-increment semantics.) 491 * If the end of the text has already been reached, DONE is returned. 492 * The DONE value could be confused with a U+FFFF non-character code point 493 * in the text. If this is possible, you can test getIndex()<endIndex() 494 * before calling next(), or (getIndex()<endIndex() || last()!=DONE) 495 * after calling next(). (Calling last() will change the iterator state!) 496 * 497 * The C API unorm_next() is more efficient and does not have this ambiguity. 498 * 499 * @return the next normalized code point 500 * @deprecated ICU 56 Use Normalizer2 instead. 501 */ 502 UChar32 next(void); 503 504 /** 505 * Return the previous character in the normalized text and decrement. 506 * (Pre-decrement semantics.) 507 * If the beginning of the text has already been reached, DONE is returned. 508 * The DONE value could be confused with a U+FFFF non-character code point 509 * in the text. If this is possible, you can test 510 * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change 511 * the iterator state!) 512 * 513 * The C API unorm_previous() is more efficient and does not have this ambiguity. 514 * 515 * @return the previous normalized code point 516 * @deprecated ICU 56 Use Normalizer2 instead. 517 */ 518 UChar32 previous(void); 519 520 /** 521 * Set the iteration position in the input text that is being normalized, 522 * without any immediate normalization. 523 * After setIndexOnly(), getIndex() will return the same index that is 524 * specified here. 525 * 526 * @param index the desired index in the input text. 527 * @deprecated ICU 56 Use Normalizer2 instead. 528 */ 529 void setIndexOnly(int32_t index); 530 531 /** 532 * Reset the index to the beginning of the text. 533 * This is equivalent to setIndexOnly(startIndex)). 534 * @deprecated ICU 56 Use Normalizer2 instead. 535 */ 536 void reset(void); 537 538 /** 539 * Retrieve the current iteration position in the input text that is 540 * being normalized. 541 * 542 * A following call to next() will return a normalized code point from 543 * the input text at or after this index. 544 * 545 * After a call to previous(), getIndex() will point at or before the 546 * position in the input text where the normalized code point 547 * was returned from with previous(). 548 * 549 * @return the current index in the input text 550 * @deprecated ICU 56 Use Normalizer2 instead. 551 */ 552 int32_t getIndex(void) const; 553 554 /** 555 * Retrieve the index of the start of the input text. This is the begin index 556 * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string 557 * over which this <code>Normalizer</code> is iterating. 558 * 559 * @return the smallest index in the input text where the Normalizer operates 560 * @deprecated ICU 56 Use Normalizer2 instead. 561 */ 562 int32_t startIndex(void) const; 563 564 /** 565 * Retrieve the index of the end of the input text. This is the end index 566 * of the <code>CharacterIterator</code> or the length of the string 567 * over which this <code>Normalizer</code> is iterating. 568 * This end index is exclusive, i.e., the Normalizer operates only on characters 569 * before this index. 570 * 571 * @return the first index in the input text where the Normalizer does not operate 572 * @deprecated ICU 56 Use Normalizer2 instead. 573 */ 574 int32_t endIndex(void) const; 575 576 /** 577 * Returns TRUE when both iterators refer to the same character in the same 578 * input text. 579 * 580 * @param that a Normalizer object to compare this one to 581 * @return comparison result 582 * @deprecated ICU 56 Use Normalizer2 instead. 583 */ 584 UBool operator==(const Normalizer& that) const; 585 586 /** 587 * Returns FALSE when both iterators refer to the same character in the same 588 * input text. 589 * 590 * @param that a Normalizer object to compare this one to 591 * @return comparison result 592 * @deprecated ICU 56 Use Normalizer2 instead. 593 */ 594 inline UBool operator!=(const Normalizer& that) const; 595 596 /** 597 * Returns a pointer to a new Normalizer that is a clone of this one. 598 * The caller is responsible for deleting the new clone. 599 * @return a pointer to a new Normalizer 600 * @deprecated ICU 56 Use Normalizer2 instead. 601 */ 602 Normalizer* clone(void) const; 603 604 /** 605 * Generates a hash code for this iterator. 606 * 607 * @return the hash code 608 * @deprecated ICU 56 Use Normalizer2 instead. 609 */ 610 int32_t hashCode(void) const; 611 612 //------------------------------------------------------------------------- 613 // Property access methods 614 //------------------------------------------------------------------------- 615 616 /** 617 * Set the normalization mode for this object. 618 * <p> 619 * <b>Note:</b>If the normalization mode is changed while iterating 620 * over a string, calls to {@link #next() } and {@link #previous() } may 621 * return previously buffers characters in the old normalization mode 622 * until the iteration is able to re-sync at the next base character. 623 * It is safest to call {@link #setIndexOnly }, {@link #reset() }, 624 * {@link #setText }, {@link #first() }, 625 * {@link #last() }, etc. after calling <code>setMode</code>. 626 * <p> 627 * @param newMode the new mode for this <code>Normalizer</code>. 628 * @see #getUMode 629 * @deprecated ICU 56 Use Normalizer2 instead. 630 */ 631 void setMode(UNormalizationMode newMode); 632 633 /** 634 * Return the normalization mode for this object. 635 * 636 * This is an unusual name because there used to be a getMode() that 637 * returned a different type. 638 * 639 * @return the mode for this <code>Normalizer</code> 640 * @see #setMode 641 * @deprecated ICU 56 Use Normalizer2 instead. 642 */ 643 UNormalizationMode getUMode(void) const; 644 645 /** 646 * Set options that affect this <code>Normalizer</code>'s operation. 647 * Options do not change the basic composition or decomposition operation 648 * that is being performed, but they control whether 649 * certain optional portions of the operation are done. 650 * Currently the only available option is obsolete. 651 * 652 * It is possible to specify multiple options that are all turned on or off. 653 * 654 * @param option the option(s) whose value is/are to be set. 655 * @param value the new setting for the option. Use <code>TRUE</code> to 656 * turn the option(s) on and <code>FALSE</code> to turn it/them off. 657 * 658 * @see #getOption 659 * @deprecated ICU 56 Use Normalizer2 instead. 660 */ 661 void setOption(int32_t option, 662 UBool value); 663 664 /** 665 * Determine whether an option is turned on or off. 666 * If multiple options are specified, then the result is TRUE if any 667 * of them are set. 668 * <p> 669 * @param option the option(s) that are to be checked 670 * @return TRUE if any of the option(s) are set 671 * @see #setOption 672 * @deprecated ICU 56 Use Normalizer2 instead. 673 */ 674 UBool getOption(int32_t option) const; 675 676 /** 677 * Set the input text over which this <code>Normalizer</code> will iterate. 678 * The iteration position is set to the beginning. 679 * 680 * @param newText a string that replaces the current input text 681 * @param status a UErrorCode 682 * @deprecated ICU 56 Use Normalizer2 instead. 683 */ 684 void setText(const UnicodeString& newText, 685 UErrorCode &status); 686 687 /** 688 * Set the input text over which this <code>Normalizer</code> will iterate. 689 * The iteration position is set to the beginning. 690 * 691 * @param newText a CharacterIterator object that replaces the current input text 692 * @param status a UErrorCode 693 * @deprecated ICU 56 Use Normalizer2 instead. 694 */ 695 void setText(const CharacterIterator& newText, 696 UErrorCode &status); 697 698 /** 699 * Set the input text over which this <code>Normalizer</code> will iterate. 700 * The iteration position is set to the beginning. 701 * 702 * @param newText a string that replaces the current input text 703 * @param length the length of the string, or -1 if NUL-terminated 704 * @param status a UErrorCode 705 * @deprecated ICU 56 Use Normalizer2 instead. 706 */ 707 void setText(ConstChar16Ptr newText, 708 int32_t length, 709 UErrorCode &status); 710 /** 711 * Copies the input text into the UnicodeString argument. 712 * 713 * @param result Receives a copy of the text under iteration. 714 * @deprecated ICU 56 Use Normalizer2 instead. 715 */ 716 void getText(UnicodeString& result); 717 718 /** 719 * ICU "poor man's RTTI", returns a UClassID for this class. 720 * @returns a UClassID for this class. 721 * @deprecated ICU 56 Use Normalizer2 instead. 722 */ 723 static UClassID U_EXPORT2 getStaticClassID(); 724 #endif /* U_HIDE_DEPRECATED_API */ 725 726 /** 727 * ICU "poor man's RTTI", returns a UClassID for the actual class. 728 * @return a UClassID for the actual class. 729 * @deprecated ICU 56 Use Normalizer2 instead. 730 */ 731 virtual UClassID getDynamicClassID() const; 732 733 private: 734 //------------------------------------------------------------------------- 735 // Private functions 736 //------------------------------------------------------------------------- 737 738 Normalizer(); // default constructor not implemented 739 Normalizer &operator=(const Normalizer &that); // assignment operator not implemented 740 741 // Private utility methods for iteration 742 // For documentation, see the source code 743 UBool nextNormalize(); 744 UBool previousNormalize(); 745 746 void init(); 747 void clearBuffer(void); 748 749 //------------------------------------------------------------------------- 750 // Private data 751 //------------------------------------------------------------------------- 752 753 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL 754 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 755 UNormalizationMode fUMode; // deprecated 756 int32_t fOptions; 757 758 // The input text and our position in it 759 CharacterIterator *text; 760 761 // The normalization buffer is the result of normalization 762 // of the source in [currentIndex..nextIndex[ . 763 int32_t currentIndex, nextIndex; 764 765 // A buffer for holding intermediate results 766 UnicodeString buffer; 767 int32_t bufferPos; 768 }; 769 770 //------------------------------------------------------------------------- 771 // Inline implementations 772 //------------------------------------------------------------------------- 773 774 #ifndef U_HIDE_DEPRECATED_API 775 inline UBool 776 Normalizer::operator!= (const Normalizer& other) const 777 { return ! operator==(other); } 778 779 inline UNormalizationCheckResult 780 Normalizer::quickCheck(const UnicodeString& source, 781 UNormalizationMode mode, 782 UErrorCode &status) { 783 return quickCheck(source, mode, 0, status); 784 } 785 786 inline UBool 787 Normalizer::isNormalized(const UnicodeString& source, 788 UNormalizationMode mode, 789 UErrorCode &status) { 790 return isNormalized(source, mode, 0, status); 791 } 792 #endif /* U_HIDE_DEPRECATED_API */ 793 794 inline int32_t 795 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 796 uint32_t options, 797 UErrorCode &errorCode) { 798 // all argument checking is done in unorm_compare 799 return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), 800 toUCharPtr(s2.getBuffer()), s2.length(), 801 options, 802 &errorCode); 803 } 804 805 U_NAMESPACE_END 806 807 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 808 809 #endif // NORMLZR_H 810