1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2009-2013, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: normalizer2.h 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2009nov22 16 * created by: Markus W. Scherer 17 */ 18 19 #ifndef __NORMALIZER2_H__ 20 #define __NORMALIZER2_H__ 21 22 /** 23 * \file 24 * \brief C++ API: New API for Unicode Normalization. 25 */ 26 27 #include "unicode/utypes.h" 28 29 #if !UCONFIG_NO_NORMALIZATION 30 31 #include "unicode/uniset.h" 32 #include "unicode/unistr.h" 33 #include "unicode/unorm2.h" 34 35 U_NAMESPACE_BEGIN 36 37 /** 38 * Unicode normalization functionality for standard Unicode normalization or 39 * for using custom mapping tables. 40 * All instances of this class are unmodifiable/immutable. 41 * Instances returned by getInstance() are singletons that must not be deleted by the caller. 42 * The Normalizer2 class is not intended for public subclassing. 43 * 44 * The primary functions are to produce a normalized string and to detect whether 45 * a string is already normalized. 46 * The most commonly used normalization forms are those defined in 47 * http://www.unicode.org/unicode/reports/tr15/ 48 * However, this API supports additional normalization forms for specialized purposes. 49 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) 50 * and can be used in implementations of UTS #46. 51 * 52 * Not only are the standard compose and decompose modes supplied, 53 * but additional modes are provided as documented in the Mode enum. 54 * 55 * Some of the functions in this class identify normalization boundaries. 56 * At a normalization boundary, the portions of the string 57 * before it and starting from it do not interact and can be handled independently. 58 * 59 * The spanQuickCheckYes() stops at a normalization boundary. 60 * When the goal is a normalized string, then the text before the boundary 61 * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). 62 * 63 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether 64 * a character is guaranteed to be at a normalization boundary, 65 * regardless of context. 66 * This is used for moving from one normalization boundary to the next 67 * or preceding boundary, and for performing iterative normalization. 68 * 69 * Iterative normalization is useful when only a small portion of a 70 * longer string needs to be processed. 71 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator 72 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() 73 * (to process only the substring for which sort key bytes are computed). 74 * 75 * The set of normalization boundaries returned by these functions may not be 76 * complete: There may be more boundaries that could be returned. 77 * Different functions may return different boundaries. 78 * @stable ICU 4.4 79 */ 80 class U_COMMON_API Normalizer2 : public UObject { 81 public: 82 /** 83 * Destructor. 84 * @stable ICU 4.4 85 */ 86 ~Normalizer2(); 87 88 /** 89 * Returns a Normalizer2 instance for Unicode NFC normalization. 90 * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). 91 * Returns an unmodifiable singleton instance. Do not delete it. 92 * @param errorCode Standard ICU error code. Its input value must 93 * pass the U_SUCCESS() test, or else the function returns 94 * immediately. Check for U_FAILURE() on output or use with 95 * function chaining. (See User Guide for details.) 96 * @return the requested Normalizer2, if successful 97 * @stable ICU 49 98 */ 99 static const Normalizer2 * 100 getNFCInstance(UErrorCode &errorCode); 101 102 /** 103 * Returns a Normalizer2 instance for Unicode NFD normalization. 104 * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). 105 * Returns an unmodifiable singleton instance. Do not delete it. 106 * @param errorCode Standard ICU error code. Its input value must 107 * pass the U_SUCCESS() test, or else the function returns 108 * immediately. Check for U_FAILURE() on output or use with 109 * function chaining. (See User Guide for details.) 110 * @return the requested Normalizer2, if successful 111 * @stable ICU 49 112 */ 113 static const Normalizer2 * 114 getNFDInstance(UErrorCode &errorCode); 115 116 /** 117 * Returns a Normalizer2 instance for Unicode NFKC normalization. 118 * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). 119 * Returns an unmodifiable singleton instance. Do not delete it. 120 * @param errorCode Standard ICU error code. Its input value must 121 * pass the U_SUCCESS() test, or else the function returns 122 * immediately. Check for U_FAILURE() on output or use with 123 * function chaining. (See User Guide for details.) 124 * @return the requested Normalizer2, if successful 125 * @stable ICU 49 126 */ 127 static const Normalizer2 * 128 getNFKCInstance(UErrorCode &errorCode); 129 130 /** 131 * Returns a Normalizer2 instance for Unicode NFKD normalization. 132 * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). 133 * Returns an unmodifiable singleton instance. Do not delete it. 134 * @param errorCode Standard ICU error code. Its input value must 135 * pass the U_SUCCESS() test, or else the function returns 136 * immediately. Check for U_FAILURE() on output or use with 137 * function chaining. (See User Guide for details.) 138 * @return the requested Normalizer2, if successful 139 * @stable ICU 49 140 */ 141 static const Normalizer2 * 142 getNFKDInstance(UErrorCode &errorCode); 143 144 /** 145 * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. 146 * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). 147 * Returns an unmodifiable singleton instance. Do not delete it. 148 * @param errorCode Standard ICU error code. Its input value must 149 * pass the U_SUCCESS() test, or else the function returns 150 * immediately. Check for U_FAILURE() on output or use with 151 * function chaining. (See User Guide for details.) 152 * @return the requested Normalizer2, if successful 153 * @stable ICU 49 154 */ 155 static const Normalizer2 * 156 getNFKCCasefoldInstance(UErrorCode &errorCode); 157 158 /** 159 * Returns a Normalizer2 instance which uses the specified data file 160 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 161 * and which composes or decomposes text according to the specified mode. 162 * Returns an unmodifiable singleton instance. Do not delete it. 163 * 164 * Use packageName=NULL for data files that are part of ICU's own data. 165 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 166 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 167 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 168 * 169 * @param packageName NULL for ICU built-in data, otherwise application data package name 170 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 171 * @param mode normalization mode (compose or decompose etc.) 172 * @param errorCode Standard ICU error code. Its input value must 173 * pass the U_SUCCESS() test, or else the function returns 174 * immediately. Check for U_FAILURE() on output or use with 175 * function chaining. (See User Guide for details.) 176 * @return the requested Normalizer2, if successful 177 * @stable ICU 4.4 178 */ 179 static const Normalizer2 * 180 getInstance(const char *packageName, 181 const char *name, 182 UNormalization2Mode mode, 183 UErrorCode &errorCode); 184 185 /** 186 * Returns the normalized form of the source string. 187 * @param src source string 188 * @param errorCode Standard ICU error code. Its input value must 189 * pass the U_SUCCESS() test, or else the function returns 190 * immediately. Check for U_FAILURE() on output or use with 191 * function chaining. (See User Guide for details.) 192 * @return normalized src 193 * @stable ICU 4.4 194 */ 195 UnicodeString 196 normalize(const UnicodeString &src, UErrorCode &errorCode) const { 197 UnicodeString result; 198 normalize(src, result, errorCode); 199 return result; 200 } 201 /** 202 * Writes the normalized form of the source string to the destination string 203 * (replacing its contents) and returns the destination string. 204 * The source and destination strings must be different objects. 205 * @param src source string 206 * @param dest destination string; its contents is replaced with normalized src 207 * @param errorCode Standard ICU error code. Its input value must 208 * pass the U_SUCCESS() test, or else the function returns 209 * immediately. Check for U_FAILURE() on output or use with 210 * function chaining. (See User Guide for details.) 211 * @return dest 212 * @stable ICU 4.4 213 */ 214 virtual UnicodeString & 215 normalize(const UnicodeString &src, 216 UnicodeString &dest, 217 UErrorCode &errorCode) const = 0; 218 /** 219 * Appends the normalized form of the second string to the first string 220 * (merging them at the boundary) and returns the first string. 221 * The result is normalized if the first string was normalized. 222 * The first and second strings must be different objects. 223 * @param first string, should be normalized 224 * @param second string, will be normalized 225 * @param errorCode Standard ICU error code. Its input value must 226 * pass the U_SUCCESS() test, or else the function returns 227 * immediately. Check for U_FAILURE() on output or use with 228 * function chaining. (See User Guide for details.) 229 * @return first 230 * @stable ICU 4.4 231 */ 232 virtual UnicodeString & 233 normalizeSecondAndAppend(UnicodeString &first, 234 const UnicodeString &second, 235 UErrorCode &errorCode) const = 0; 236 /** 237 * Appends the second string to the first string 238 * (merging them at the boundary) and returns the first string. 239 * The result is normalized if both the strings were normalized. 240 * The first and second strings must be different objects. 241 * @param first string, should be normalized 242 * @param second string, should be normalized 243 * @param errorCode Standard ICU error code. Its input value must 244 * pass the U_SUCCESS() test, or else the function returns 245 * immediately. Check for U_FAILURE() on output or use with 246 * function chaining. (See User Guide for details.) 247 * @return first 248 * @stable ICU 4.4 249 */ 250 virtual UnicodeString & 251 append(UnicodeString &first, 252 const UnicodeString &second, 253 UErrorCode &errorCode) const = 0; 254 255 /** 256 * Gets the decomposition mapping of c. 257 * Roughly equivalent to normalizing the String form of c 258 * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function 259 * returns FALSE and does not write a string 260 * if c does not have a decomposition mapping in this instance's data. 261 * This function is independent of the mode of the Normalizer2. 262 * @param c code point 263 * @param decomposition String object which will be set to c's 264 * decomposition mapping, if there is one. 265 * @return TRUE if c has a decomposition, otherwise FALSE 266 * @stable ICU 4.6 267 */ 268 virtual UBool 269 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 270 271 /** 272 * Gets the raw decomposition mapping of c. 273 * 274 * This is similar to the getDecomposition() method but returns the 275 * raw decomposition mapping as specified in UnicodeData.txt or 276 * (for custom data) in the mapping files processed by the gennorm2 tool. 277 * By contrast, getDecomposition() returns the processed, 278 * recursively-decomposed version of this mapping. 279 * 280 * When used on a standard NFKC Normalizer2 instance, 281 * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 282 * 283 * When used on a standard NFC Normalizer2 instance, 284 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 285 * in this case, the result contains either one or two code points (=1..4 UChars). 286 * 287 * This function is independent of the mode of the Normalizer2. 288 * The default implementation returns FALSE. 289 * @param c code point 290 * @param decomposition String object which will be set to c's 291 * raw decomposition mapping, if there is one. 292 * @return TRUE if c has a decomposition, otherwise FALSE 293 * @stable ICU 49 294 */ 295 virtual UBool 296 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 297 298 /** 299 * Performs pairwise composition of a & b and returns the composite if there is one. 300 * 301 * Returns a composite code point c only if c has a two-way mapping to a+b. 302 * In standard Unicode normalization, this means that 303 * c has a canonical decomposition to a+b 304 * and c does not have the Full_Composition_Exclusion property. 305 * 306 * This function is independent of the mode of the Normalizer2. 307 * The default implementation returns a negative value. 308 * @param a A (normalization starter) code point. 309 * @param b Another code point. 310 * @return The non-negative composite code point if there is one; otherwise a negative value. 311 * @stable ICU 49 312 */ 313 virtual UChar32 314 composePair(UChar32 a, UChar32 b) const; 315 316 /** 317 * Gets the combining class of c. 318 * The default implementation returns 0 319 * but all standard implementations return the Unicode Canonical_Combining_Class value. 320 * @param c code point 321 * @return c's combining class 322 * @stable ICU 49 323 */ 324 virtual uint8_t 325 getCombiningClass(UChar32 c) const; 326 327 /** 328 * Tests if the string is normalized. 329 * Internally, in cases where the quickCheck() method would return "maybe" 330 * (which is only possible for the two COMPOSE modes) this method 331 * resolves to "yes" or "no" to provide a definitive result, 332 * at the cost of doing more work in those cases. 333 * @param s input string 334 * @param errorCode Standard ICU error code. Its input value must 335 * pass the U_SUCCESS() test, or else the function returns 336 * immediately. Check for U_FAILURE() on output or use with 337 * function chaining. (See User Guide for details.) 338 * @return TRUE if s is normalized 339 * @stable ICU 4.4 340 */ 341 virtual UBool 342 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 343 344 /** 345 * Tests if the string is normalized. 346 * For the two COMPOSE modes, the result could be "maybe" in cases that 347 * would take a little more work to resolve definitively. 348 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 349 * combination of quick check + normalization, to avoid 350 * re-checking the "yes" prefix. 351 * @param s input string 352 * @param errorCode Standard ICU error code. Its input value must 353 * pass the U_SUCCESS() test, or else the function returns 354 * immediately. Check for U_FAILURE() on output or use with 355 * function chaining. (See User Guide for details.) 356 * @return UNormalizationCheckResult 357 * @stable ICU 4.4 358 */ 359 virtual UNormalizationCheckResult 360 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 361 362 /** 363 * Returns the end of the normalized substring of the input string. 364 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 365 * the substring <code>UnicodeString(s, 0, end)</code> 366 * will pass the quick check with a "yes" result. 367 * 368 * The returned end index is usually one or more characters before the 369 * "no" or "maybe" character: The end index is at a normalization boundary. 370 * (See the class documentation for more about normalization boundaries.) 371 * 372 * When the goal is a normalized string and most input strings are expected 373 * to be normalized already, then call this method, 374 * and if it returns a prefix shorter than the input string, 375 * copy that prefix and use normalizeSecondAndAppend() for the remainder. 376 * @param s input string 377 * @param errorCode Standard ICU error code. Its input value must 378 * pass the U_SUCCESS() test, or else the function returns 379 * immediately. Check for U_FAILURE() on output or use with 380 * function chaining. (See User Guide for details.) 381 * @return "yes" span end index 382 * @stable ICU 4.4 383 */ 384 virtual int32_t 385 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 386 387 /** 388 * Tests if the character always has a normalization boundary before it, 389 * regardless of context. 390 * If true, then the character does not normalization-interact with 391 * preceding characters. 392 * In other words, a string containing this character can be normalized 393 * by processing portions before this character and starting from this 394 * character independently. 395 * This is used for iterative normalization. See the class documentation for details. 396 * @param c character to test 397 * @return TRUE if c has a normalization boundary before it 398 * @stable ICU 4.4 399 */ 400 virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 401 402 /** 403 * Tests if the character always has a normalization boundary after it, 404 * regardless of context. 405 * If true, then the character does not normalization-interact with 406 * following characters. 407 * In other words, a string containing this character can be normalized 408 * by processing portions up to this character and after this 409 * character independently. 410 * This is used for iterative normalization. See the class documentation for details. 411 * Note that this operation may be significantly slower than hasBoundaryBefore(). 412 * @param c character to test 413 * @return TRUE if c has a normalization boundary after it 414 * @stable ICU 4.4 415 */ 416 virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 417 418 /** 419 * Tests if the character is normalization-inert. 420 * If true, then the character does not change, nor normalization-interact with 421 * preceding or following characters. 422 * In other words, a string containing this character can be normalized 423 * by processing portions before this character and after this 424 * character independently. 425 * This is used for iterative normalization. See the class documentation for details. 426 * Note that this operation may be significantly slower than hasBoundaryBefore(). 427 * @param c character to test 428 * @return TRUE if c is normalization-inert 429 * @stable ICU 4.4 430 */ 431 virtual UBool isInert(UChar32 c) const = 0; 432 }; 433 434 /** 435 * Normalization filtered by a UnicodeSet. 436 * Normalizes portions of the text contained in the filter set and leaves 437 * portions not contained in the filter set unchanged. 438 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). 439 * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 440 * This class implements all of (and only) the Normalizer2 API. 441 * An instance of this class is unmodifiable/immutable but is constructed and 442 * must be destructed by the owner. 443 * @stable ICU 4.4 444 */ 445 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 446 public: 447 /** 448 * Constructs a filtered normalizer wrapping any Normalizer2 instance 449 * and a filter set. 450 * Both are aliased and must not be modified or deleted while this object 451 * is used. 452 * The filter set should be frozen; otherwise the performance will suffer greatly. 453 * @param n2 wrapped Normalizer2 instance 454 * @param filterSet UnicodeSet which determines the characters to be normalized 455 * @stable ICU 4.4 456 */ 457 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 458 norm2(n2), set(filterSet) {} 459 460 /** 461 * Destructor. 462 * @stable ICU 4.4 463 */ 464 ~FilteredNormalizer2(); 465 466 /** 467 * Writes the normalized form of the source string to the destination string 468 * (replacing its contents) and returns the destination string. 469 * The source and destination strings must be different objects. 470 * @param src source string 471 * @param dest destination string; its contents is replaced with normalized src 472 * @param errorCode Standard ICU error code. Its input value must 473 * pass the U_SUCCESS() test, or else the function returns 474 * immediately. Check for U_FAILURE() on output or use with 475 * function chaining. (See User Guide for details.) 476 * @return dest 477 * @stable ICU 4.4 478 */ 479 virtual UnicodeString & 480 normalize(const UnicodeString &src, 481 UnicodeString &dest, 482 UErrorCode &errorCode) const; 483 /** 484 * Appends the normalized form of the second string to the first string 485 * (merging them at the boundary) and returns the first string. 486 * The result is normalized if the first string was normalized. 487 * The first and second strings must be different objects. 488 * @param first string, should be normalized 489 * @param second string, will be normalized 490 * @param errorCode Standard ICU error code. Its input value must 491 * pass the U_SUCCESS() test, or else the function returns 492 * immediately. Check for U_FAILURE() on output or use with 493 * function chaining. (See User Guide for details.) 494 * @return first 495 * @stable ICU 4.4 496 */ 497 virtual UnicodeString & 498 normalizeSecondAndAppend(UnicodeString &first, 499 const UnicodeString &second, 500 UErrorCode &errorCode) const; 501 /** 502 * Appends the second string to the first string 503 * (merging them at the boundary) and returns the first string. 504 * The result is normalized if both the strings were normalized. 505 * The first and second strings must be different objects. 506 * @param first string, should be normalized 507 * @param second string, should be normalized 508 * @param errorCode Standard ICU error code. Its input value must 509 * pass the U_SUCCESS() test, or else the function returns 510 * immediately. Check for U_FAILURE() on output or use with 511 * function chaining. (See User Guide for details.) 512 * @return first 513 * @stable ICU 4.4 514 */ 515 virtual UnicodeString & 516 append(UnicodeString &first, 517 const UnicodeString &second, 518 UErrorCode &errorCode) const; 519 520 /** 521 * Gets the decomposition mapping of c. 522 * For details see the base class documentation. 523 * 524 * This function is independent of the mode of the Normalizer2. 525 * @param c code point 526 * @param decomposition String object which will be set to c's 527 * decomposition mapping, if there is one. 528 * @return TRUE if c has a decomposition, otherwise FALSE 529 * @stable ICU 4.6 530 */ 531 virtual UBool 532 getDecomposition(UChar32 c, UnicodeString &decomposition) const; 533 534 /** 535 * Gets the raw decomposition mapping of c. 536 * For details see the base class documentation. 537 * 538 * This function is independent of the mode of the Normalizer2. 539 * @param c code point 540 * @param decomposition String object which will be set to c's 541 * raw decomposition mapping, if there is one. 542 * @return TRUE if c has a decomposition, otherwise FALSE 543 * @stable ICU 49 544 */ 545 virtual UBool 546 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 547 548 /** 549 * Performs pairwise composition of a & b and returns the composite if there is one. 550 * For details see the base class documentation. 551 * 552 * This function is independent of the mode of the Normalizer2. 553 * @param a A (normalization starter) code point. 554 * @param b Another code point. 555 * @return The non-negative composite code point if there is one; otherwise a negative value. 556 * @stable ICU 49 557 */ 558 virtual UChar32 559 composePair(UChar32 a, UChar32 b) const; 560 561 /** 562 * Gets the combining class of c. 563 * The default implementation returns 0 564 * but all standard implementations return the Unicode Canonical_Combining_Class value. 565 * @param c code point 566 * @return c's combining class 567 * @stable ICU 49 568 */ 569 virtual uint8_t 570 getCombiningClass(UChar32 c) const; 571 572 /** 573 * Tests if the string is normalized. 574 * For details see the Normalizer2 base class documentation. 575 * @param s input string 576 * @param errorCode Standard ICU error code. Its input value must 577 * pass the U_SUCCESS() test, or else the function returns 578 * immediately. Check for U_FAILURE() on output or use with 579 * function chaining. (See User Guide for details.) 580 * @return TRUE if s is normalized 581 * @stable ICU 4.4 582 */ 583 virtual UBool 584 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 585 /** 586 * Tests if the string is normalized. 587 * For details see the Normalizer2 base class documentation. 588 * @param s input string 589 * @param errorCode Standard ICU error code. Its input value must 590 * pass the U_SUCCESS() test, or else the function returns 591 * immediately. Check for U_FAILURE() on output or use with 592 * function chaining. (See User Guide for details.) 593 * @return UNormalizationCheckResult 594 * @stable ICU 4.4 595 */ 596 virtual UNormalizationCheckResult 597 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 598 /** 599 * Returns the end of the normalized substring of the input string. 600 * For details see the Normalizer2 base class documentation. 601 * @param s input string 602 * @param errorCode Standard ICU error code. Its input value must 603 * pass the U_SUCCESS() test, or else the function returns 604 * immediately. Check for U_FAILURE() on output or use with 605 * function chaining. (See User Guide for details.) 606 * @return "yes" span end index 607 * @stable ICU 4.4 608 */ 609 virtual int32_t 610 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 611 612 /** 613 * Tests if the character always has a normalization boundary before it, 614 * regardless of context. 615 * For details see the Normalizer2 base class documentation. 616 * @param c character to test 617 * @return TRUE if c has a normalization boundary before it 618 * @stable ICU 4.4 619 */ 620 virtual UBool hasBoundaryBefore(UChar32 c) const; 621 622 /** 623 * Tests if the character always has a normalization boundary after it, 624 * regardless of context. 625 * For details see the Normalizer2 base class documentation. 626 * @param c character to test 627 * @return TRUE if c has a normalization boundary after it 628 * @stable ICU 4.4 629 */ 630 virtual UBool hasBoundaryAfter(UChar32 c) const; 631 632 /** 633 * Tests if the character is normalization-inert. 634 * For details see the Normalizer2 base class documentation. 635 * @param c character to test 636 * @return TRUE if c is normalization-inert 637 * @stable ICU 4.4 638 */ 639 virtual UBool isInert(UChar32 c) const; 640 private: 641 UnicodeString & 642 normalize(const UnicodeString &src, 643 UnicodeString &dest, 644 USetSpanCondition spanCondition, 645 UErrorCode &errorCode) const; 646 647 UnicodeString & 648 normalizeSecondAndAppend(UnicodeString &first, 649 const UnicodeString &second, 650 UBool doNormalize, 651 UErrorCode &errorCode) const; 652 653 const Normalizer2 &norm2; 654 const UnicodeSet &set; 655 }; 656 657 U_NAMESPACE_END 658 659 #endif // !UCONFIG_NO_NORMALIZATION 660 #endif // __NORMALIZER2_H__ 661