Home | History | Annotate | Download | only in unicode
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ***************************************************************************
      5 * Copyright (C) 2008-2016, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 ***************************************************************************
      8 *   file name:  uspoof.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2008Feb13
     14 *   created by: Andy Heninger
     15 *
     16 *   Unicode Spoof Detection
     17 */
     18 
     19 #ifndef USPOOF_H
     20 #define USPOOF_H
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/uset.h"
     24 #include "unicode/parseerr.h"
     25 #include "unicode/localpointer.h"
     26 
     27 #if !UCONFIG_NO_NORMALIZATION
     28 
     29 
     30 #if U_SHOW_CPLUSPLUS_API
     31 #include "unicode/unistr.h"
     32 #include "unicode/uniset.h"
     33 #endif
     34 
     35 
     36 /**
     37  * \file
     38  * \brief Unicode Security and Spoofing Detection, C API.
     39  *
     40  * <p>
     41  * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
     42  * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
     43  *
     44  * <ol>
     45  * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
     46  * &quot;&Eta;arvest&quot;, where the second string starts with the Greek capital letter Eta.</li>
     47  * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
     48  * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
     49  * </ol>
     50  *
     51  * <p>
     52  * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
     53  * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
     54  * content filters.
     55  *
     56  * <p>
     57  * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
     58  *
     59  * <h2>Confusables</h2>
     60  *
     61  * <p>
     62  * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
     63  *
     64  * \code{.c}
     65  * UErrorCode status = U_ZERO_ERROR;
     66  * UChar* str1 = (UChar*) u"Harvest";
     67  * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
     68  *
     69  * USpoofChecker* sc = uspoof_open(&status);
     70  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
     71  *
     72  * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
     73  * UBool result = bitmask != 0;
     74  * // areConfusable: 1 (status: U_ZERO_ERROR)
     75  * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
     76  * uspoof_close(sc);
     77  * \endcode
     78  *
     79  * <p>
     80  * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
     81  * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
     82  * confusability test; and the following line extracts the result out of the return value. For best performance,
     83  * the instance should be created once (e.g., upon application startup), and the efficient
     84  * {@link uspoof_areConfusable} method can be used at runtime.
     85  *
     86  * <p>
     87  * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers.  It will automatically call
     88  * {@link uspoof_close} when the object goes out of scope:
     89  *
     90  * \code{.cpp}
     91  * UErrorCode status = U_ZERO_ERROR;
     92  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
     93  * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
     94  * // ...
     95  * \endcode
     96  *
     97  * <p>
     98  * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
     99  * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
    100  * the following snippet is equivalent to the example above:
    101  *
    102  * \code{.c}
    103  * UErrorCode status = U_ZERO_ERROR;
    104  * UChar* str1 = (UChar*) u"Harvest";
    105  * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
    106  *
    107  * USpoofChecker* sc = uspoof_open(&status);
    108  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    109  *
    110  * // Get skeleton 1
    111  * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
    112  * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
    113  * status = U_ZERO_ERROR;
    114  * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
    115  *
    116  * // Get skeleton 2
    117  * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
    118  * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
    119  * status = U_ZERO_ERROR;
    120  * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
    121  *
    122  * // Are the skeletons the same?
    123  * UBool result = u_strcmp(skel1, skel2) == 0;
    124  * // areConfusable: 1 (status: U_ZERO_ERROR)
    125  * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
    126  * uspoof_close(sc);
    127  * free(skel1);
    128  * free(skel2);
    129  * \endcode
    130  *
    131  * <p>
    132  * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
    133  * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
    134  *
    135  * \code{.c}
    136  * UErrorCode status = U_ZERO_ERROR;
    137  * #define DICTIONARY_LENGTH 2
    138  * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
    139  * UChar* skeletons[DICTIONARY_LENGTH];
    140  * UChar* str = (UChar*) u"1orern";
    141  *
    142  * // Setup:
    143  * USpoofChecker* sc = uspoof_open(&status);
    144  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    145  * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    146  *     UChar* word = dictionary[i];
    147  *     int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
    148  *     skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
    149  *     status = U_ZERO_ERROR;
    150  *     uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
    151  * }
    152  *
    153  * // Live Check:
    154  * {
    155  *     int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
    156  *     UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
    157  *     status = U_ZERO_ERROR;
    158  *     uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
    159  *     UBool result = FALSE;
    160  *     for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    161  *         result = u_strcmp(skel, skeletons[i]) == 0;
    162  *         if (result == TRUE) { break; }
    163  *     }
    164  *     // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
    165  *     printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
    166  *     free(skel);
    167  * }
    168  *
    169  * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    170  *     free(skeletons[i]);
    171  * }
    172  * uspoof_close(sc);
    173  * \endcode
    174  *
    175  * <p>
    176  * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
    177  * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
    178  * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
    179  *
    180  * <h2>Spoof Detection</h2>
    181  *
    182  * <p>
    183  * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
    184  * string:
    185  *
    186  * \code{.c}
    187  * UErrorCode status = U_ZERO_ERROR;
    188  * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
    189  *
    190  * // Get the default set of allowable characters:
    191  * USet* allowed = uset_openEmpty();
    192  * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
    193  * uset_addAll(allowed, uspoof_getInclusionSet(&status));
    194  *
    195  * USpoofChecker* sc = uspoof_open(&status);
    196  * uspoof_setAllowedChars(sc, allowed, &status);
    197  * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
    198  *
    199  * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
    200  * UBool result = bitmask != 0;
    201  * // fails checks: 1 (status: U_ZERO_ERROR)
    202  * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
    203  * uspoof_close(sc);
    204  * uset_close(allowed);
    205  * \endcode
    206  *
    207  * <p>
    208  * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
    209  * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
    210  * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
    211  *
    212  * <p>
    213  * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
    214  * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
    215  *
    216  * <p>
    217  * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
    218  * is available in the returned bitmask.  For complete information, use the {@link uspoof_check2} class of functions
    219  * with a {@link USpoofCheckResult} parameter:
    220  *
    221  * \code{.c}
    222  * UErrorCode status = U_ZERO_ERROR;
    223  * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
    224  *
    225  * // Get the default set of allowable characters:
    226  * USet* allowed = uset_openEmpty();
    227  * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
    228  * uset_addAll(allowed, uspoof_getInclusionSet(&status));
    229  *
    230  * USpoofChecker* sc = uspoof_open(&status);
    231  * uspoof_setAllowedChars(sc, allowed, &status);
    232  * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
    233  *
    234  * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
    235  * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
    236  *
    237  * int32_t failures1 = bitmask;
    238  * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
    239  * assert(failures1 == failures2);
    240  * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
    241  * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
    242  *
    243  * // Cleanup:
    244  * uspoof_close(sc);
    245  * uset_close(allowed);
    246  * uspoof_closeCheckResult(checkResult);
    247  * \endcode
    248  *
    249  * C++ users can take advantage of a few syntactical conveniences.  The following snippet is functionally
    250  * equivalent to the one above:
    251  *
    252  * \code{.cpp}
    253  * UErrorCode status = U_ZERO_ERROR;
    254  * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
    255  *
    256  * // Get the default set of allowable characters:
    257  * UnicodeSet allowed;
    258  * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
    259  * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
    260  *
    261  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    262  * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
    263  * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
    264  *
    265  * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
    266  * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
    267  *
    268  * int32_t failures1 = bitmask;
    269  * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
    270  * assert(failures1 == failures2);
    271  * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
    272  * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
    273  *
    274  * // Explicit cleanup not necessary.
    275  * \endcode
    276  *
    277  * <p>
    278  * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
    279  * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
    280  *
    281  * <ul>
    282  * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
    283  * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
    284  * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
    285  * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
    286  * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
    287  * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
    288  * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
    289  * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
    290  * </ul>
    291  *
    292  * <p>
    293  * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
    294  * INVISIBLE and MIXED_NUMBERS conditions, you could do:
    295  *
    296  * \code{.c}
    297  * UErrorCode status = U_ZERO_ERROR;
    298  * UChar* str = (UChar*) u"8\u09EA";  // 8 mixed with U+09EA BENGALI DIGIT FOUR
    299  *
    300  * USpoofChecker* sc = uspoof_open(&status);
    301  * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
    302  *
    303  * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
    304  * UBool result = bitmask != 0;
    305  * // fails checks: 1 (status: U_ZERO_ERROR)
    306  * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
    307  * uspoof_close(sc);
    308  * \endcode
    309  *
    310  * <p>
    311  * Here is an example in C++ showing how to compute the restriction level of a string:
    312  *
    313  * \code{.cpp}
    314  * UErrorCode status = U_ZERO_ERROR;
    315  * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
    316  *
    317  * // Get the default set of allowable characters:
    318  * UnicodeSet allowed;
    319  * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
    320  * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
    321  *
    322  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    323  * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
    324  * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
    325  * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
    326  *
    327  * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
    328  * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
    329  *
    330  * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
    331  * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
    332  * assert((restrictionLevel & bitmask) == restrictionLevel);
    333  * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
    334  * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
    335  * \endcode
    336  *
    337  * <p>
    338  * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE.  Since
    339  * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
    340  *
    341  * <p>
    342  * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
    343  * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
    344  * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
    345  * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
    346  * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
    347  * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
    348  * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
    349  * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
    350  * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
    351  * scripts.
    352  *
    353  * <h2>Additional Information</h2>
    354  *
    355  * <p>
    356  * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
    357  *
    358  * <p>
    359  * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
    360  * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
    361  * using the same USpoofChecker instance.
    362  *
    363  * <p>
    364  * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
    365  * thread safe. Those that take a non-const USpoofChecker are not thread safe..
    366  *
    367  * @stable ICU 4.6
    368  */
    369 
    370 struct USpoofChecker;
    371 typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
    372 
    373 #ifndef U_HIDE_DRAFT_API
    374 /**
    375  * @see uspoof_openCheckResult
    376  */
    377 struct USpoofCheckResult;
    378 /**
    379  * @see uspoof_openCheckResult
    380  */
    381 typedef struct USpoofCheckResult USpoofCheckResult;
    382 #endif /* U_HIDE_DRAFT_API */
    383 
    384 /**
    385  * Enum for the kinds of checks that USpoofChecker can perform.
    386  * These enum values are used both to select the set of checks that
    387  * will be performed, and to report results from the check function.
    388  *
    389  * @stable ICU 4.2
    390  */
    391 typedef enum USpoofChecks {
    392     /**
    393      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    394      * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
    395      * 4.
    396      *
    397      * @see uspoof_areConfusable
    398      * @stable ICU 4.2
    399      */
    400     USPOOF_SINGLE_SCRIPT_CONFUSABLE =   1,
    401 
    402     /**
    403      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    404      * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
    405      * 39 section 4.
    406      *
    407      * @see uspoof_areConfusable
    408      * @stable ICU 4.2
    409      */
    410     USPOOF_MIXED_SCRIPT_CONFUSABLE  =   2,
    411 
    412     /**
    413      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    414      * that the two strings are visually confusable and that they are not from the same script but both of them are
    415      * single-script strings, according to UTS 39 section 4.
    416      *
    417      * @see uspoof_areConfusable
    418      * @stable ICU 4.2
    419      */
    420     USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
    421 
    422 #ifndef U_HIDE_DRAFT_API
    423     /**
    424      * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables.  You may set
    425      * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
    426      * make {@link uspoof_areConfusable} return only those types of confusables.
    427      *
    428      * @see uspoof_areConfusable
    429      * @see uspoof_getSkeleton
    430      * @draft ICU 58
    431      */
    432     USPOOF_CONFUSABLE               =   USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
    433 #endif /* U_HIDE_DRAFT_API */
    434 
    435 #ifndef U_HIDE_DEPRECATED_API
    436     /**
    437       * This flag is deprecated and no longer affects the behavior of SpoofChecker.
    438       *
    439       * @deprecated ICU 58  Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
    440       */
    441     USPOOF_ANY_CASE                 =   8,
    442 #endif  /* U_HIDE_DEPRECATED_API */
    443 
    444     /**
    445       * Check that an identifier is no looser than the specified RestrictionLevel.
    446       * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
    447       *
    448       * If USPOOF_AUX_INFO is enabled the actual restriction level of the
    449       * identifier being tested will also be returned by uspoof_check().
    450       *
    451       * @see URestrictionLevel
    452       * @see uspoof_setRestrictionLevel
    453       * @see USPOOF_AUX_INFO
    454       *
    455       * @stable ICU 51
    456       */
    457     USPOOF_RESTRICTION_LEVEL        = 16,
    458 
    459 #ifndef U_HIDE_DEPRECATED_API
    460     /** Check that an identifier contains only characters from a
    461       * single script (plus chars from the common and inherited scripts.)
    462       * Applies to checks of a single identifier check only.
    463       * @deprecated ICU 51  Use RESTRICTION_LEVEL instead.
    464       */
    465     USPOOF_SINGLE_SCRIPT            =  USPOOF_RESTRICTION_LEVEL,
    466 #endif  /* U_HIDE_DEPRECATED_API */
    467 
    468     /** Check an identifier for the presence of invisible characters,
    469       * such as zero-width spaces, or character sequences that are
    470       * likely not to display, such as multiple occurrences of the same
    471       * non-spacing mark.  This check does not test the input string as a whole
    472       * for conformance to any particular syntax for identifiers.
    473       */
    474     USPOOF_INVISIBLE                =  32,
    475 
    476     /** Check that an identifier contains only characters from a specified set
    477       * of acceptable characters.  See {@link uspoof_setAllowedChars} and
    478       * {@link uspoof_setAllowedLocales}.  Note that a string that fails this check
    479       * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
    480       */
    481     USPOOF_CHAR_LIMIT               =  64,
    482 
    483    /**
    484      * Check that an identifier does not mix numbers from different numbering systems.
    485      * For more information, see UTS 39 section 5.3.
    486      *
    487      * @stable ICU 51
    488      */
    489     USPOOF_MIXED_NUMBERS            = 128,
    490 
    491    /**
    492      * Enable all spoof checks.
    493      *
    494      * @stable ICU 4.6
    495      */
    496     USPOOF_ALL_CHECKS               = 0xFFFF,
    497 
    498     /**
    499       * Enable the return of auxillary (non-error) information in the
    500       * upper bits of the check results value.
    501       *
    502       * If this "check" is not enabled, the results of {@link uspoof_check} will be
    503       * zero when an identifier passes all of the enabled checks.
    504       *
    505       * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
    506       * be zero when an identifier passes all checks.
    507       *
    508       * @stable ICU 51
    509       */
    510     USPOOF_AUX_INFO                  = 0x40000000
    511 
    512     } USpoofChecks;
    513 
    514 
    515     /**
    516      * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
    517      * for returned identifier restriction levels in check results.
    518      *
    519      * @stable ICU 51
    520      *
    521      * @see uspoof_setRestrictionLevel
    522      * @see uspoof_check
    523      */
    524     typedef enum URestrictionLevel {
    525         /**
    526          * All characters in the string are in the identifier profile and all characters in the string are in the
    527          * ASCII range.
    528          *
    529          * @stable ICU 51
    530          */
    531         USPOOF_ASCII = 0x10000000,
    532         /**
    533          * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
    534          * the string is single-script, according to the definition in UTS 39 section 5.1.
    535          *
    536          * @stable ICU 53
    537          */
    538         USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
    539         /**
    540          * The string classifies as Single Script, or all characters in the string are in the identifier profile and
    541          * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
    542          * section 5.1:
    543          * <ul>
    544          *   <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
    545          *   <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
    546          *   <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
    547          * </ul>
    548          * This is the default restriction in ICU.
    549          *
    550          * @stable ICU 51
    551          */
    552         USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
    553         /**
    554          * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
    555          * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
    556          * Greek, and Cherokee.
    557          *
    558          * @stable ICU 51
    559          */
    560         USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
    561         /**
    562          * All characters in the string are in the identifier profile.  Allow arbitrary mixtures of scripts.
    563          *
    564          * @stable ICU 51
    565          */
    566         USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
    567         /**
    568          * Any valid identifiers, including characters outside of the Identifier Profile.
    569          *
    570          * @stable ICU 51
    571          */
    572         USPOOF_UNRESTRICTIVE = 0x60000000,
    573         /**
    574          * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
    575          *
    576          * @stable ICU 53
    577          */
    578         USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
    579 #ifndef U_HIDE_INTERNAL_API
    580         /**
    581          * An undefined restriction level.
    582          * @internal
    583          */
    584         USPOOF_UNDEFINED_RESTRICTIVE = -1
    585 #endif  /* U_HIDE_INTERNAL_API */
    586     } URestrictionLevel;
    587 
    588 /**
    589  *  Create a Unicode Spoof Checker, configured to perform all
    590  *  checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
    591  *  Note that additional checks may be added in the future,
    592  *  resulting in the changes to the default checking behavior.
    593  *
    594  *  @param status  The error code, set if this function encounters a problem.
    595  *  @return        the newly created Spoof Checker
    596  *  @stable ICU 4.2
    597  */
    598 U_STABLE USpoofChecker * U_EXPORT2
    599 uspoof_open(UErrorCode *status);
    600 
    601 
    602 /**
    603  * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
    604  * Inverse of uspoof_serialize().
    605  * The memory containing the serialized data must remain valid and unchanged
    606  * as long as the spoof checker, or any cloned copies of the spoof checker,
    607  * are in use.  Ownership of the memory remains with the caller.
    608  * The spoof checker (and any clones) must be closed prior to deleting the
    609  * serialized data.
    610  *
    611  * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
    612  * @param length the number of bytes available at data;
    613  *               can be more than necessary
    614  * @param pActualLength receives the actual number of bytes at data taken up by the data;
    615  *                      can be NULL
    616  * @param pErrorCode ICU error code
    617  * @return the spoof checker.
    618  *
    619  * @see uspoof_open
    620  * @see uspoof_serialize
    621  * @stable ICU 4.2
    622  */
    623 U_STABLE USpoofChecker * U_EXPORT2
    624 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
    625                           UErrorCode *pErrorCode);
    626 
    627 /**
    628   * Open a Spoof Checker from the source form of the spoof data.
    629   * The input corresponds to the Unicode data file confusables.txt
    630   * as described in Unicode UAX #39.  The syntax of the source data
    631   * is as described in UAX #39 for this file, and the content of
    632   * this file is acceptable input.
    633   *
    634   * The character encoding of the (char *) input text is UTF-8.
    635   *
    636   * @param confusables a pointer to the confusable characters definitions,
    637   *                    as found in file confusables.txt from unicode.org.
    638   * @param confusablesLen The length of the confusables text, or -1 if the
    639   *                    input string is zero terminated.
    640   * @param confusablesWholeScript
    641   *                    Deprecated in ICU 58.  No longer used.
    642   * @param confusablesWholeScriptLen
    643   *                    Deprecated in ICU 58.  No longer used.
    644   * @param errType     In the event of an error in the input, indicates
    645   *                    which of the input files contains the error.
    646   *                    The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
    647   *                    USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
    648   *                    zero if no errors are found.
    649   * @param pe          In the event of an error in the input, receives the position
    650   *                    in the input text (line, offset) of the error.
    651   * @param status      an in/out ICU UErrorCode.  Among the possible errors is
    652   *                    U_PARSE_ERROR, which is used to report syntax errors
    653   *                    in the input.
    654   * @return            A spoof checker that uses the rules from the input files.
    655   * @stable ICU 4.2
    656   */
    657 U_STABLE USpoofChecker * U_EXPORT2
    658 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
    659                       const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
    660                       int32_t *errType, UParseError *pe, UErrorCode *status);
    661 
    662 
    663 /**
    664   * Close a Spoof Checker, freeing any memory that was being held by
    665   *   its implementation.
    666   * @stable ICU 4.2
    667   */
    668 U_STABLE void U_EXPORT2
    669 uspoof_close(USpoofChecker *sc);
    670 
    671 #if U_SHOW_CPLUSPLUS_API
    672 
    673 U_NAMESPACE_BEGIN
    674 
    675 /**
    676  * \class LocalUSpoofCheckerPointer
    677  * "Smart pointer" class, closes a USpoofChecker via uspoof_close().
    678  * For most methods see the LocalPointerBase base class.
    679  *
    680  * @see LocalPointerBase
    681  * @see LocalPointer
    682  * @stable ICU 4.4
    683  */
    684 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);
    685 
    686 U_NAMESPACE_END
    687 
    688 #endif
    689 
    690 /**
    691  * Clone a Spoof Checker.  The clone will be set to perform the same checks
    692  *   as the original source.
    693  *
    694  * @param sc       The source USpoofChecker
    695  * @param status   The error code, set if this function encounters a problem.
    696  * @return
    697  * @stable ICU 4.2
    698  */
    699 U_STABLE USpoofChecker * U_EXPORT2
    700 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
    701 
    702 
    703 /**
    704  * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
    705  * overwrites any checks that may have already been enabled. By default, all checks are enabled.
    706  *
    707  * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
    708  * example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and
    709  * also strings that contain digits from mixed numbering systems:
    710  *
    711  * <pre>
    712  * {@code
    713  * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
    714  * }
    715  * </pre>
    716  *
    717  * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
    718  * ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
    719  * it is good practice to disable the CONFUSABLE check:
    720  *
    721  * <pre>
    722  * {@code
    723  * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
    724  * }
    725  * </pre>
    726  *
    727  * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
    728  * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
    729  * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
    730  * methods.
    731  *
    732  * @param sc       The USpoofChecker
    733  * @param checks         The set of checks that this spoof checker will perform.
    734  *                 The value is a bit set, obtained by OR-ing together
    735  *                 values from enum USpoofChecks.
    736  * @param status   The error code, set if this function encounters a problem.
    737  * @stable ICU 4.2
    738  *
    739  */
    740 U_STABLE void U_EXPORT2
    741 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
    742 
    743 /**
    744  * Get the set of checks that this Spoof Checker has been configured to perform.
    745  *
    746  * @param sc       The USpoofChecker
    747  * @param status   The error code, set if this function encounters a problem.
    748  * @return         The set of checks that this spoof checker will perform.
    749  *                 The value is a bit set, obtained by OR-ing together
    750  *                 values from enum USpoofChecks.
    751  * @stable ICU 4.2
    752  *
    753  */
    754 U_STABLE int32_t U_EXPORT2
    755 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
    756 
    757 /**
    758  * Set the loosest restriction level allowed for strings. The default if this is not called is
    759  * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
    760  * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
    761  * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
    762  *
    763  * @param sc       The USpoofChecker
    764  * @param restrictionLevel The loosest restriction level allowed.
    765  * @see URestrictionLevel
    766  * @stable ICU 51
    767  */
    768 U_STABLE void U_EXPORT2
    769 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
    770 
    771 
    772 /**
    773   * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
    774   *
    775   * @return The restriction level
    776   * @see URestrictionLevel
    777   * @stable ICU 51
    778   */
    779 U_STABLE URestrictionLevel U_EXPORT2
    780 uspoof_getRestrictionLevel(const USpoofChecker *sc);
    781 
    782 /**
    783  * Limit characters that are acceptable in identifiers being checked to those
    784  * normally used with the languages associated with the specified locales.
    785  * Any previously specified list of locales is replaced by the new settings.
    786  *
    787  * A set of languages is determined from the locale(s), and
    788  * from those a set of acceptable Unicode scripts is determined.
    789  * Characters from this set of scripts, along with characters from
    790  * the "common" and "inherited" Unicode Script categories
    791  * will be permitted.
    792  *
    793  * Supplying an empty string removes all restrictions;
    794  * characters from any script will be allowed.
    795  *
    796  * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
    797  * USpoofChecker when calling this function with a non-empty list
    798  * of locales.
    799  *
    800  * The Unicode Set of characters that will be allowed is accessible
    801  * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
    802  * will <i>replace</i> any previously applied set of allowed characters.
    803  *
    804  * Adjustments, such as additions or deletions of certain classes of characters,
    805  * can be made to the result of uspoof_setAllowedLocales() by
    806  * fetching the resulting set with uspoof_getAllowedChars(),
    807  * manipulating it with the Unicode Set API, then resetting the
    808  * spoof detectors limits with uspoof_setAllowedChars().
    809  *
    810  * @param sc           The USpoofChecker
    811  * @param localesList  A list list of locales, from which the language
    812  *                     and associated script are extracted.  The locales
    813  *                     are comma-separated if there is more than one.
    814  *                     White space may not appear within an individual locale,
    815  *                     but is ignored otherwise.
    816  *                     The locales are syntactically like those from the
    817  *                     HTTP Accept-Language header.
    818  *                     If the localesList is empty, no restrictions will be placed on
    819  *                     the allowed characters.
    820  *
    821  * @param status       The error code, set if this function encounters a problem.
    822  * @stable ICU 4.2
    823  */
    824 U_STABLE void U_EXPORT2
    825 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
    826 
    827 /**
    828  * Get a list of locales for the scripts that are acceptable in strings
    829  *  to be checked.  If no limitations on scripts have been specified,
    830  *  an empty string will be returned.
    831  *
    832  *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
    833  *
    834  *  The format of the returned list is the same as that supplied to
    835  *  uspoof_setAllowedLocales(), but returned list may not be identical
    836  *  to the originally specified string; the string may be reformatted,
    837  *  and information other than languages from
    838  *  the originally specified locales may be omitted.
    839  *
    840  * @param sc           The USpoofChecker
    841  * @param status       The error code, set if this function encounters a problem.
    842  * @return             A string containing a list of  locales corresponding
    843  *                     to the acceptable scripts, formatted like an
    844  *                     HTTP Accept Language value.
    845  *
    846  * @stable ICU 4.2
    847  */
    848 U_STABLE const char * U_EXPORT2
    849 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
    850 
    851 
    852 /**
    853  * Limit the acceptable characters to those specified by a Unicode Set.
    854  *   Any previously specified character limit is
    855  *   is replaced by the new settings.  This includes limits on
    856  *   characters that were set with the uspoof_setAllowedLocales() function.
    857  *
    858  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
    859  * USpoofChecker by this function.
    860  *
    861  * @param sc       The USpoofChecker
    862  * @param chars    A Unicode Set containing the list of
    863  *                 characters that are permitted.  Ownership of the set
    864  *                 remains with the caller.  The incoming set is cloned by
    865  *                 this function, so there are no restrictions on modifying
    866  *                 or deleting the USet after calling this function.
    867  * @param status   The error code, set if this function encounters a problem.
    868  * @stable ICU 4.2
    869  */
    870 U_STABLE void U_EXPORT2
    871 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
    872 
    873 
    874 /**
    875  * Get a USet for the characters permitted in an identifier.
    876  * This corresponds to the limits imposed by the Set Allowed Characters
    877  * functions. Limitations imposed by other checks will not be
    878  * reflected in the set returned by this function.
    879  *
    880  * The returned set will be frozen, meaning that it cannot be modified
    881  * by the caller.
    882  *
    883  * Ownership of the returned set remains with the Spoof Detector.  The
    884  * returned set will become invalid if the spoof detector is closed,
    885  * or if a new set of allowed characters is specified.
    886  *
    887  *
    888  * @param sc       The USpoofChecker
    889  * @param status   The error code, set if this function encounters a problem.
    890  * @return         A USet containing the characters that are permitted by
    891  *                 the USPOOF_CHAR_LIMIT test.
    892  * @stable ICU 4.2
    893  */
    894 U_STABLE const USet * U_EXPORT2
    895 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
    896 
    897 
    898 #if U_SHOW_CPLUSPLUS_API
    899 /**
    900  * Limit the acceptable characters to those specified by a Unicode Set.
    901  *   Any previously specified character limit is
    902  *   is replaced by the new settings.    This includes limits on
    903  *   characters that were set with the uspoof_setAllowedLocales() function.
    904  *
    905  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
    906  * USoofChecker by this function.
    907  *
    908  * @param sc       The USpoofChecker
    909  * @param chars    A Unicode Set containing the list of
    910  *                 characters that are permitted.  Ownership of the set
    911  *                 remains with the caller.  The incoming set is cloned by
    912  *                 this function, so there are no restrictions on modifying
    913  *                 or deleting the UnicodeSet after calling this function.
    914  * @param status   The error code, set if this function encounters a problem.
    915  * @stable ICU 4.2
    916  */
    917 U_STABLE void U_EXPORT2
    918 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
    919 
    920 
    921 /**
    922  * Get a UnicodeSet for the characters permitted in an identifier.
    923  * This corresponds to the limits imposed by the Set Allowed Characters /
    924  * UnicodeSet functions. Limitations imposed by other checks will not be
    925  * reflected in the set returned by this function.
    926  *
    927  * The returned set will be frozen, meaning that it cannot be modified
    928  * by the caller.
    929  *
    930  * Ownership of the returned set remains with the Spoof Detector.  The
    931  * returned set will become invalid if the spoof detector is closed,
    932  * or if a new set of allowed characters is specified.
    933  *
    934  *
    935  * @param sc       The USpoofChecker
    936  * @param status   The error code, set if this function encounters a problem.
    937  * @return         A UnicodeSet containing the characters that are permitted by
    938  *                 the USPOOF_CHAR_LIMIT test.
    939  * @stable ICU 4.2
    940  */
    941 U_STABLE const icu::UnicodeSet * U_EXPORT2
    942 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
    943 #endif
    944 
    945 
    946 /**
    947  * Check the specified string for possible security issues.
    948  * The text to be checked will typically be an identifier of some sort.
    949  * The set of checks to be performed is specified with uspoof_setChecks().
    950  *
    951  * \note
    952  *   Consider using the newer API, {@link uspoof_check2}, instead.
    953  *   The newer API exposes additional information from the check procedure
    954  *   and is otherwise identical to this method.
    955  *
    956  * @param sc      The USpoofChecker
    957  * @param id      The identifier to be checked for possible security issues,
    958  *                in UTF-16 format.
    959  * @param length  the length of the string to be checked, expressed in
    960  *                16 bit UTF-16 code units, or -1 if the string is
    961  *                zero terminated.
    962  * @param position  Deprecated in ICU 51.  Always returns zero.
    963  *                Originally, an out parameter for the index of the first
    964  *                string position that failed a check.
    965  *                This parameter may be NULL.
    966  * @param status  The error code, set if an error occurred while attempting to
    967  *                perform the check.
    968  *                Spoofing or security issues detected with the input string are
    969  *                not reported here, but through the function's return value.
    970  * @return        An integer value with bits set for any potential security
    971  *                or spoofing issues detected.  The bits are defined by
    972  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
    973  *                will be zero if the input string passes all of the
    974  *                enabled checks.
    975  * @see uspoof_check2
    976  * @stable ICU 4.2
    977  */
    978 U_STABLE int32_t U_EXPORT2
    979 uspoof_check(const USpoofChecker *sc,
    980                          const UChar *id, int32_t length,
    981                          int32_t *position,
    982                          UErrorCode *status);
    983 
    984 
    985 /**
    986  * Check the specified string for possible security issues.
    987  * The text to be checked will typically be an identifier of some sort.
    988  * The set of checks to be performed is specified with uspoof_setChecks().
    989  *
    990  * \note
    991  *   Consider using the newer API, {@link uspoof_check2UTF8}, instead.
    992  *   The newer API exposes additional information from the check procedure
    993  *   and is otherwise identical to this method.
    994  *
    995  * @param sc      The USpoofChecker
    996  * @param id      A identifier to be checked for possible security issues, in UTF8 format.
    997  * @param length  the length of the string to be checked, or -1 if the string is
    998  *                zero terminated.
    999  * @param position  Deprecated in ICU 51.  Always returns zero.
   1000  *                Originally, an out parameter for the index of the first
   1001  *                string position that failed a check.
   1002  *                This parameter may be NULL.
   1003  * @param status  The error code, set if an error occurred while attempting to
   1004  *                perform the check.
   1005  *                Spoofing or security issues detected with the input string are
   1006  *                not reported here, but through the function's return value.
   1007  *                If the input contains invalid UTF-8 sequences,
   1008  *                a status of U_INVALID_CHAR_FOUND will be returned.
   1009  * @return        An integer value with bits set for any potential security
   1010  *                or spoofing issues detected.  The bits are defined by
   1011  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1012  *                will be zero if the input string passes all of the
   1013  *                enabled checks.
   1014  * @see uspoof_check2UTF8
   1015  * @stable ICU 4.2
   1016  */
   1017 U_STABLE int32_t U_EXPORT2
   1018 uspoof_checkUTF8(const USpoofChecker *sc,
   1019                  const char *id, int32_t length,
   1020                  int32_t *position,
   1021                  UErrorCode *status);
   1022 
   1023 
   1024 #if U_SHOW_CPLUSPLUS_API
   1025 /**
   1026  * Check the specified string for possible security issues.
   1027  * The text to be checked will typically be an identifier of some sort.
   1028  * The set of checks to be performed is specified with uspoof_setChecks().
   1029  *
   1030  * \note
   1031  *   Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
   1032  *   The newer API exposes additional information from the check procedure
   1033  *   and is otherwise identical to this method.
   1034  *
   1035  * @param sc      The USpoofChecker
   1036  * @param id      A identifier to be checked for possible security issues.
   1037  * @param position  Deprecated in ICU 51.  Always returns zero.
   1038  *                Originally, an out parameter for the index of the first
   1039  *                string position that failed a check.
   1040  *                This parameter may be NULL.
   1041  * @param status  The error code, set if an error occurred while attempting to
   1042  *                perform the check.
   1043  *                Spoofing or security issues detected with the input string are
   1044  *                not reported here, but through the function's return value.
   1045  * @return        An integer value with bits set for any potential security
   1046  *                or spoofing issues detected.  The bits are defined by
   1047  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1048  *                will be zero if the input string passes all of the
   1049  *                enabled checks.
   1050  * @see uspoof_check2UnicodeString
   1051  * @stable ICU 4.2
   1052  */
   1053 U_STABLE int32_t U_EXPORT2
   1054 uspoof_checkUnicodeString(const USpoofChecker *sc,
   1055                           const icu::UnicodeString &id,
   1056                           int32_t *position,
   1057                           UErrorCode *status);
   1058 #endif
   1059 
   1060 
   1061 #ifndef U_HIDE_DRAFT_API
   1062 /**
   1063  * Check the specified string for possible security issues.
   1064  * The text to be checked will typically be an identifier of some sort.
   1065  * The set of checks to be performed is specified with uspoof_setChecks().
   1066  *
   1067  * @param sc      The USpoofChecker
   1068  * @param id      The identifier to be checked for possible security issues,
   1069  *                in UTF-16 format.
   1070  * @param length  the length of the string to be checked, or -1 if the string is
   1071  *                zero terminated.
   1072  * @param checkResult  An instance of USpoofCheckResult to be filled with
   1073  *                details about the identifier.  Can be NULL.
   1074  * @param status  The error code, set if an error occurred while attempting to
   1075  *                perform the check.
   1076  *                Spoofing or security issues detected with the input string are
   1077  *                not reported here, but through the function's return value.
   1078  * @return        An integer value with bits set for any potential security
   1079  *                or spoofing issues detected.  The bits are defined by
   1080  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1081  *                will be zero if the input string passes all of the
   1082  *                enabled checks.  Any information in this bitmask will be
   1083  *                consistent with the information saved in the optional
   1084  *                checkResult parameter.
   1085  * @see uspoof_openCheckResult
   1086  * @see uspoof_check2UTF8
   1087  * @see uspoof_check2UnicodeString
   1088  * @draft ICU 58
   1089  */
   1090 U_DRAFT int32_t U_EXPORT2
   1091 uspoof_check2(const USpoofChecker *sc,
   1092     const UChar* id, int32_t length,
   1093     USpoofCheckResult* checkResult,
   1094     UErrorCode *status);
   1095 
   1096 /**
   1097  * Check the specified string for possible security issues.
   1098  * The text to be checked will typically be an identifier of some sort.
   1099  * The set of checks to be performed is specified with uspoof_setChecks().
   1100  *
   1101  * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
   1102  * returns additional information about the identifier.  For more
   1103  * information, see {@link uspoof_openCheckResult}.
   1104  *
   1105  * @param sc      The USpoofChecker
   1106  * @param id      A identifier to be checked for possible security issues, in UTF8 format.
   1107  * @param length  the length of the string to be checked, or -1 if the string is
   1108  *                zero terminated.
   1109  * @param checkResult  An instance of USpoofCheckResult to be filled with
   1110  *                details about the identifier.  Can be NULL.
   1111  * @param status  The error code, set if an error occurred while attempting to
   1112  *                perform the check.
   1113  *                Spoofing or security issues detected with the input string are
   1114  *                not reported here, but through the function's return value.
   1115  * @return        An integer value with bits set for any potential security
   1116  *                or spoofing issues detected.  The bits are defined by
   1117  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1118  *                will be zero if the input string passes all of the
   1119  *                enabled checks.  Any information in this bitmask will be
   1120  *                consistent with the information saved in the optional
   1121  *                checkResult parameter.
   1122  * @see uspoof_openCheckResult
   1123  * @see uspoof_check2
   1124  * @see uspoof_check2UnicodeString
   1125  * @draft ICU 58
   1126  */
   1127 U_DRAFT int32_t U_EXPORT2
   1128 uspoof_check2UTF8(const USpoofChecker *sc,
   1129     const char *id, int32_t length,
   1130     USpoofCheckResult* checkResult,
   1131     UErrorCode *status);
   1132 
   1133 #if U_SHOW_CPLUSPLUS_API
   1134 /**
   1135  * Check the specified string for possible security issues.
   1136  * The text to be checked will typically be an identifier of some sort.
   1137  * The set of checks to be performed is specified with uspoof_setChecks().
   1138  *
   1139  * @param sc      The USpoofChecker
   1140  * @param id      A identifier to be checked for possible security issues.
   1141  * @param checkResult  An instance of USpoofCheckResult to be filled with
   1142  *                details about the identifier.  Can be NULL.
   1143  * @param status  The error code, set if an error occurred while attempting to
   1144  *                perform the check.
   1145  *                Spoofing or security issues detected with the input string are
   1146  *                not reported here, but through the function's return value.
   1147  * @return        An integer value with bits set for any potential security
   1148  *                or spoofing issues detected.  The bits are defined by
   1149  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1150  *                will be zero if the input string passes all of the
   1151  *                enabled checks.  Any information in this bitmask will be
   1152  *                consistent with the information saved in the optional
   1153  *                checkResult parameter.
   1154  * @see uspoof_openCheckResult
   1155  * @see uspoof_check2
   1156  * @see uspoof_check2UTF8
   1157  * @draft ICU 58
   1158  */
   1159 U_DRAFT int32_t U_EXPORT2
   1160 uspoof_check2UnicodeString(const USpoofChecker *sc,
   1161     const icu::UnicodeString &id,
   1162     USpoofCheckResult* checkResult,
   1163     UErrorCode *status);
   1164 #endif
   1165 
   1166 /**
   1167  * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
   1168  * information about the identifier.  Information includes:
   1169  * <ul>
   1170  *   <li>A bitmask of the checks that failed</li>
   1171  *   <li>The identifier's restriction level (UTS 39 section 5.2)</li>
   1172  *   <li>The set of numerics in the string (UTS 39 section 5.3)</li>
   1173  * </ul>
   1174  * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
   1175  * of {@link uspoof_check2}.
   1176  *
   1177  * @param status  The error code, set if this function encounters a problem.
   1178  * @return        the newly created USpoofCheckResult
   1179  * @see uspoof_check2
   1180  * @see uspoof_check2UTF8
   1181  * @see uspoof_check2UnicodeString
   1182  * @draft ICU 58
   1183  */
   1184 U_DRAFT USpoofCheckResult* U_EXPORT2
   1185 uspoof_openCheckResult(UErrorCode *status);
   1186 
   1187 /**
   1188  * Close a USpoofCheckResult, freeing any memory that was being held by
   1189  *   its implementation.
   1190  *
   1191  * @param checkResult  The instance of USpoofCheckResult to close
   1192  * @draft ICU 58
   1193  */
   1194 U_DRAFT void U_EXPORT2
   1195 uspoof_closeCheckResult(USpoofCheckResult *checkResult);
   1196 
   1197 #if U_SHOW_CPLUSPLUS_API
   1198 
   1199 U_NAMESPACE_BEGIN
   1200 
   1201 /**
   1202  * \class LocalUSpoofCheckResultPointer
   1203  * "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}.
   1204  * For most methods see the LocalPointerBase base class.
   1205  *
   1206  * @see LocalPointerBase
   1207  * @see LocalPointer
   1208  * @draft ICU 58
   1209  */
   1210 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
   1211 
   1212 U_NAMESPACE_END
   1213 
   1214 #endif
   1215 
   1216 /**
   1217  * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
   1218  * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
   1219  *
   1220  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1221  * @param status       The error code, set if an error occurred.
   1222  * @return        An integer value with bits set for any potential security
   1223  *                or spoofing issues detected.  The bits are defined by
   1224  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1225  *                will be zero if the input string passes all of the
   1226  *                enabled checks.
   1227  * @see uspoof_setChecks
   1228  * @draft ICU 58
   1229  */
   1230 U_DRAFT int32_t U_EXPORT2
   1231 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
   1232 
   1233 /**
   1234  * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
   1235  * was enabled; otherwise, undefined.
   1236  *
   1237  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1238  * @param status       The error code, set if an error occurred.
   1239  * @return             The restriction level contained in the USpoofCheckResult
   1240  * @see uspoof_setRestrictionLevel
   1241  * @draft ICU 58
   1242  */
   1243 U_DRAFT URestrictionLevel U_EXPORT2
   1244 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
   1245 
   1246 /**
   1247  * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
   1248  * otherwise, undefined.  The set will contain the zero digit from each decimal number system found
   1249  * in the input string.  Ownership of the returned USet remains with the USpoofCheckResult.
   1250  * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
   1251  *
   1252  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1253  * @return             The set of numerics contained in the USpoofCheckResult
   1254  * @param status       The error code, set if an error occurred.
   1255  * @draft ICU 58
   1256  */
   1257 U_DRAFT const USet* U_EXPORT2
   1258 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
   1259 #endif /* U_HIDE_DRAFT_API */
   1260 
   1261 
   1262 /**
   1263  * Check the whether two specified strings are visually confusable.
   1264  *
   1265  * If the strings are confusable, the return value will be nonzero, as long as
   1266  * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
   1267  *
   1268  * The bits in the return value correspond to flags for each of the classes of
   1269  * confusables applicable to the two input strings.  According to UTS 39
   1270  * section 4, the possible flags are:
   1271  *
   1272  * <ul>
   1273  *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
   1274  *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
   1275  *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
   1276  * </ul>
   1277  *
   1278  * If one or more of the above flags were not listed in uspoof_setChecks(), this
   1279  * function will never report that class of confusable.  The check
   1280  * {@link USPOOF_CONFUSABLE} enables all three flags.
   1281  *
   1282  *
   1283  * @param sc      The USpoofChecker
   1284  * @param id1     The first of the two identifiers to be compared for
   1285  *                confusability.  The strings are in UTF-16 format.
   1286  * @param length1 the length of the first identifer, expressed in
   1287  *                16 bit UTF-16 code units, or -1 if the string is
   1288  *                nul terminated.
   1289  * @param id2     The second of the two identifiers to be compared for
   1290  *                confusability.  The identifiers are in UTF-16 format.
   1291  * @param length2 The length of the second identifiers, expressed in
   1292  *                16 bit UTF-16 code units, or -1 if the string is
   1293  *                nul terminated.
   1294  * @param status  The error code, set if an error occurred while attempting to
   1295  *                perform the check.
   1296  *                Confusability of the identifiers is not reported here,
   1297  *                but through this function's return value.
   1298  * @return        An integer value with bit(s) set corresponding to
   1299  *                the type of confusability found, as defined by
   1300  *                enum USpoofChecks.  Zero is returned if the identifiers
   1301  *                are not confusable.
   1302  *
   1303  * @stable ICU 4.2
   1304  */
   1305 U_STABLE int32_t U_EXPORT2
   1306 uspoof_areConfusable(const USpoofChecker *sc,
   1307                      const UChar *id1, int32_t length1,
   1308                      const UChar *id2, int32_t length2,
   1309                      UErrorCode *status);
   1310 
   1311 
   1312 
   1313 /**
   1314  * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
   1315  *
   1316  * @param sc      The USpoofChecker
   1317  * @param id1     The first of the two identifiers to be compared for
   1318  *                confusability.  The strings are in UTF-8 format.
   1319  * @param length1 the length of the first identifiers, in bytes, or -1
   1320  *                if the string is nul terminated.
   1321  * @param id2     The second of the two identifiers to be compared for
   1322  *                confusability.  The strings are in UTF-8 format.
   1323  * @param length2 The length of the second string in bytes, or -1
   1324  *                if the string is nul terminated.
   1325  * @param status  The error code, set if an error occurred while attempting to
   1326  *                perform the check.
   1327  *                Confusability of the strings is not reported here,
   1328  *                but through this function's return value.
   1329  * @return        An integer value with bit(s) set corresponding to
   1330  *                the type of confusability found, as defined by
   1331  *                enum USpoofChecks.  Zero is returned if the strings
   1332  *                are not confusable.
   1333  *
   1334  * @stable ICU 4.2
   1335  *
   1336  * @see uspoof_areConfusable
   1337  */
   1338 U_STABLE int32_t U_EXPORT2
   1339 uspoof_areConfusableUTF8(const USpoofChecker *sc,
   1340                          const char *id1, int32_t length1,
   1341                          const char *id2, int32_t length2,
   1342                          UErrorCode *status);
   1343 
   1344 
   1345 
   1346 
   1347 #if U_SHOW_CPLUSPLUS_API
   1348 /**
   1349  * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
   1350  *
   1351  * @param sc      The USpoofChecker
   1352  * @param s1     The first of the two identifiers to be compared for
   1353  *                confusability.  The strings are in UTF-8 format.
   1354  * @param s2     The second of the two identifiers to be compared for
   1355  *                confusability.  The strings are in UTF-8 format.
   1356  * @param status  The error code, set if an error occurred while attempting to
   1357  *                perform the check.
   1358  *                Confusability of the identifiers is not reported here,
   1359  *                but through this function's return value.
   1360  * @return        An integer value with bit(s) set corresponding to
   1361  *                the type of confusability found, as defined by
   1362  *                enum USpoofChecks.  Zero is returned if the identifiers
   1363  *                are not confusable.
   1364  *
   1365  * @stable ICU 4.2
   1366  *
   1367  * @see uspoof_areConfusable
   1368  */
   1369 U_STABLE int32_t U_EXPORT2
   1370 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
   1371                                   const icu::UnicodeString &s1,
   1372                                   const icu::UnicodeString &s2,
   1373                                   UErrorCode *status);
   1374 #endif
   1375 
   1376 
   1377 /**
   1378  *  Get the "skeleton" for an identifier.
   1379  *  Skeletons are a transformation of the input identifier;
   1380  * Two identifiers are confusable if their skeletons are identical.
   1381  *  See Unicode UAX #39 for additional information.
   1382  *
   1383  *  Using skeletons directly makes it possible to quickly check
   1384  *  whether an identifier is confusable with any of some large
   1385  *  set of existing identifiers, by creating an efficiently
   1386  *  searchable collection of the skeletons.
   1387  *
   1388  * @param sc      The USpoofChecker
   1389  * @param type    Deprecated in ICU 58.  You may pass any number.
   1390  *                Originally, controlled which of the Unicode confusable data
   1391  *                tables to use.
   1392  * @param id      The input identifier whose skeleton will be computed.
   1393  * @param length  The length of the input identifier, expressed in 16 bit
   1394  *                UTF-16 code units, or -1 if the string is zero terminated.
   1395  * @param dest    The output buffer, to receive the skeleton string.
   1396  * @param destCapacity  The length of the output buffer, in 16 bit units.
   1397  *                The destCapacity may be zero, in which case the function will
   1398  *                return the actual length of the skeleton.
   1399  * @param status  The error code, set if an error occurred while attempting to
   1400  *                perform the check.
   1401  * @return        The length of the skeleton string.  The returned length
   1402  *                is always that of the complete skeleton, even when the
   1403  *                supplied buffer is too small (or of zero length)
   1404  *
   1405  * @stable ICU 4.2
   1406  * @see uspoof_areConfusable
   1407  */
   1408 U_STABLE int32_t U_EXPORT2
   1409 uspoof_getSkeleton(const USpoofChecker *sc,
   1410                    uint32_t type,
   1411                    const UChar *id,  int32_t length,
   1412                    UChar *dest, int32_t destCapacity,
   1413                    UErrorCode *status);
   1414 
   1415 /**
   1416  *  Get the "skeleton" for an identifier.
   1417  *  Skeletons are a transformation of the input identifier;
   1418  *  Two identifiers are confusable if their skeletons are identical.
   1419  *  See Unicode UAX #39 for additional information.
   1420  *
   1421  *  Using skeletons directly makes it possible to quickly check
   1422  *  whether an identifier is confusable with any of some large
   1423  *  set of existing identifiers, by creating an efficiently
   1424  *  searchable collection of the skeletons.
   1425  *
   1426  * @param sc      The USpoofChecker
   1427  * @param type    Deprecated in ICU 58.  You may pass any number.
   1428  *                Originally, controlled which of the Unicode confusable data
   1429  *                tables to use.
   1430  * @param id      The UTF-8 format identifier whose skeleton will be computed.
   1431  * @param length  The length of the input string, in bytes,
   1432  *                or -1 if the string is zero terminated.
   1433  * @param dest    The output buffer, to receive the skeleton string.
   1434  * @param destCapacity  The length of the output buffer, in bytes.
   1435  *                The destCapacity may be zero, in which case the function will
   1436  *                return the actual length of the skeleton.
   1437  * @param status  The error code, set if an error occurred while attempting to
   1438  *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
   1439  *                   for invalid UTF-8 sequences, and
   1440  *                   U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
   1441  *                   to hold the complete skeleton.
   1442  * @return        The length of the skeleton string, in bytes.  The returned length
   1443  *                is always that of the complete skeleton, even when the
   1444  *                supplied buffer is too small (or of zero length)
   1445  *
   1446  * @stable ICU 4.2
   1447  */
   1448 U_STABLE int32_t U_EXPORT2
   1449 uspoof_getSkeletonUTF8(const USpoofChecker *sc,
   1450                        uint32_t type,
   1451                        const char *id,  int32_t length,
   1452                        char *dest, int32_t destCapacity,
   1453                        UErrorCode *status);
   1454 
   1455 #if U_SHOW_CPLUSPLUS_API
   1456 /**
   1457  *  Get the "skeleton" for an identifier.
   1458  *  Skeletons are a transformation of the input identifier;
   1459  *  Two identifiers are confusable if their skeletons are identical.
   1460  *  See Unicode UAX #39 for additional information.
   1461  *
   1462  *  Using skeletons directly makes it possible to quickly check
   1463  *  whether an identifier is confusable with any of some large
   1464  *  set of existing identifiers, by creating an efficiently
   1465  *  searchable collection of the skeletons.
   1466  *
   1467  * @param sc      The USpoofChecker.
   1468  * @param type    Deprecated in ICU 58.  You may pass any number.
   1469  *                Originally, controlled which of the Unicode confusable data
   1470  *                tables to use.
   1471  * @param id      The input identifier whose skeleton will be computed.
   1472  * @param dest    The output identifier, to receive the skeleton string.
   1473  * @param status  The error code, set if an error occurred while attempting to
   1474  *                perform the check.
   1475  * @return        A reference to the destination (skeleton) string.
   1476  *
   1477  * @stable ICU 4.2
   1478  */
   1479 U_I18N_API icu::UnicodeString & U_EXPORT2
   1480 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
   1481                                 uint32_t type,
   1482                                 const icu::UnicodeString &id,
   1483                                 icu::UnicodeString &dest,
   1484                                 UErrorCode *status);
   1485 #endif   /* U_SHOW_CPLUSPLUS_API */
   1486 
   1487 /**
   1488   * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
   1489   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1490   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1491   *
   1492   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1493   * be deleted by the caller.
   1494   *
   1495   * @param status The error code, set if a problem occurs while creating the set.
   1496   *
   1497   * @stable ICU 51
   1498   */
   1499 U_STABLE const USet * U_EXPORT2
   1500 uspoof_getInclusionSet(UErrorCode *status);
   1501 
   1502 /**
   1503   * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
   1504   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1505   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1506   *
   1507   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1508   * be deleted by the caller.
   1509   *
   1510   * @param status The error code, set if a problem occurs while creating the set.
   1511   *
   1512   * @stable ICU 51
   1513   */
   1514 U_STABLE const USet * U_EXPORT2
   1515 uspoof_getRecommendedSet(UErrorCode *status);
   1516 
   1517 #if U_SHOW_CPLUSPLUS_API
   1518 
   1519 /**
   1520   * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
   1521   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1522   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1523   *
   1524   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1525   * be deleted by the caller.
   1526   *
   1527   * @param status The error code, set if a problem occurs while creating the set.
   1528   *
   1529   * @stable ICU 51
   1530   */
   1531 U_STABLE const icu::UnicodeSet * U_EXPORT2
   1532 uspoof_getInclusionUnicodeSet(UErrorCode *status);
   1533 
   1534 /**
   1535   * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
   1536   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1537   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1538   *
   1539   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1540   * be deleted by the caller.
   1541   *
   1542   * @param status The error code, set if a problem occurs while creating the set.
   1543   *
   1544   * @stable ICU 51
   1545   */
   1546 U_STABLE const icu::UnicodeSet * U_EXPORT2
   1547 uspoof_getRecommendedUnicodeSet(UErrorCode *status);
   1548 
   1549 #endif /* U_SHOW_CPLUSPLUS_API */
   1550 
   1551 /**
   1552  * Serialize the data for a spoof detector into a chunk of memory.
   1553  * The flattened spoof detection tables can later be used to efficiently
   1554  * instantiate a new Spoof Detector.
   1555  *
   1556  * The serialized spoof checker includes only the data compiled from the
   1557  * Unicode data tables by uspoof_openFromSource(); it does not include
   1558  * include any other state or configuration that may have been set.
   1559  *
   1560  * @param sc   the Spoof Detector whose data is to be serialized.
   1561  * @param data a pointer to 32-bit-aligned memory to be filled with the data,
   1562  *             can be NULL if capacity==0
   1563  * @param capacity the number of bytes available at data,
   1564  *                 or 0 for preflighting
   1565  * @param status an in/out ICU UErrorCode; possible errors include:
   1566  * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
   1567  * - U_ILLEGAL_ARGUMENT_ERROR  the data or capacity parameters are bad
   1568  * @return the number of bytes written or needed for the spoof data
   1569  *
   1570  * @see utrie2_openFromSerialized()
   1571  * @stable ICU 4.2
   1572  */
   1573 U_STABLE int32_t U_EXPORT2
   1574 uspoof_serialize(USpoofChecker *sc,
   1575                  void *data, int32_t capacity,
   1576                  UErrorCode *status);
   1577 
   1578 
   1579 #endif
   1580 
   1581 #endif   /* USPOOF_H */
   1582