Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ***************************************************************************
      5 * Copyright (C) 2008-2016, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 ***************************************************************************
      8 *   file name:  uspoof.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2008Feb13
     14 *   created by: Andy Heninger
     15 *
     16 *   Unicode Spoof Detection
     17 */
     18 
     19 #ifndef USPOOF_H
     20 #define USPOOF_H
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/uset.h"
     24 #include "unicode/parseerr.h"
     25 #include "unicode/localpointer.h"
     26 
     27 #if !UCONFIG_NO_NORMALIZATION
     28 
     29 
     30 #if U_SHOW_CPLUSPLUS_API
     31 #include "unicode/unistr.h"
     32 #include "unicode/uniset.h"
     33 #endif
     34 
     35 
     36 /**
     37  * \file
     38  * \brief Unicode Security and Spoofing Detection, C API.
     39  *
     40  * <p>
     41  * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
     42  * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
     43  *
     44  * <ol>
     45  * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
     46  * &quot;&Eta;arvest&quot;, where the second string starts with the Greek capital letter Eta.</li>
     47  * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
     48  * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
     49  * </ol>
     50  *
     51  * <p>
     52  * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
     53  * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
     54  * content filters.
     55  *
     56  * <p>
     57  * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
     58  *
     59  * <h2>Confusables</h2>
     60  *
     61  * <p>
     62  * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
     63  *
     64  * \code{.c}
     65  * UErrorCode status = U_ZERO_ERROR;
     66  * UChar* str1 = (UChar*) u"Harvest";
     67  * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
     68  *
     69  * USpoofChecker* sc = uspoof_open(&status);
     70  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
     71  *
     72  * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
     73  * UBool result = bitmask != 0;
     74  * // areConfusable: 1 (status: U_ZERO_ERROR)
     75  * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
     76  * uspoof_close(sc);
     77  * \endcode
     78  *
     79  * <p>
     80  * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
     81  * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
     82  * confusability test; and the following line extracts the result out of the return value. For best performance,
     83  * the instance should be created once (e.g., upon application startup), and the efficient
     84  * {@link uspoof_areConfusable} method can be used at runtime.
     85  *
     86  * <p>
     87  * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers.  It will automatically call
     88  * {@link uspoof_close} when the object goes out of scope:
     89  *
     90  * \code{.cpp}
     91  * UErrorCode status = U_ZERO_ERROR;
     92  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
     93  * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
     94  * // ...
     95  * \endcode
     96  *
     97  * <p>
     98  * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
     99  * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
    100  * the following snippet is equivalent to the example above:
    101  *
    102  * \code{.c}
    103  * UErrorCode status = U_ZERO_ERROR;
    104  * UChar* str1 = (UChar*) u"Harvest";
    105  * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
    106  *
    107  * USpoofChecker* sc = uspoof_open(&status);
    108  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    109  *
    110  * // Get skeleton 1
    111  * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
    112  * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
    113  * status = U_ZERO_ERROR;
    114  * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
    115  *
    116  * // Get skeleton 2
    117  * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
    118  * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
    119  * status = U_ZERO_ERROR;
    120  * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
    121  *
    122  * // Are the skeletons the same?
    123  * UBool result = u_strcmp(skel1, skel2) == 0;
    124  * // areConfusable: 1 (status: U_ZERO_ERROR)
    125  * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
    126  * uspoof_close(sc);
    127  * free(skel1);
    128  * free(skel2);
    129  * \endcode
    130  *
    131  * <p>
    132  * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
    133  * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
    134  *
    135  * \code{.c}
    136  * UErrorCode status = U_ZERO_ERROR;
    137  * #define DICTIONARY_LENGTH 2
    138  * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
    139  * UChar* skeletons[DICTIONARY_LENGTH];
    140  * UChar* str = (UChar*) u"1orern";
    141  *
    142  * // Setup:
    143  * USpoofChecker* sc = uspoof_open(&status);
    144  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    145  * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    146  *     UChar* word = dictionary[i];
    147  *     int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
    148  *     skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
    149  *     status = U_ZERO_ERROR;
    150  *     uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
    151  * }
    152  *
    153  * // Live Check:
    154  * {
    155  *     int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
    156  *     UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
    157  *     status = U_ZERO_ERROR;
    158  *     uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
    159  *     UBool result = FALSE;
    160  *     for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    161  *         result = u_strcmp(skel, skeletons[i]) == 0;
    162  *         if (result == TRUE) { break; }
    163  *     }
    164  *     // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
    165  *     printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
    166  *     free(skel);
    167  * }
    168  *
    169  * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    170  *     free(skeletons[i]);
    171  * }
    172  * uspoof_close(sc);
    173  * \endcode
    174  *
    175  * <p>
    176  * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
    177  * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
    178  * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
    179  *
    180  * <h2>Spoof Detection</h2>
    181  *
    182  * <p>
    183  * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
    184  * string:
    185  *
    186  * \code{.c}
    187  * UErrorCode status = U_ZERO_ERROR;
    188  * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
    189  *
    190  * // Get the default set of allowable characters:
    191  * USet* allowed = uset_openEmpty();
    192  * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
    193  * uset_addAll(allowed, uspoof_getInclusionSet(&status));
    194  *
    195  * USpoofChecker* sc = uspoof_open(&status);
    196  * uspoof_setAllowedChars(sc, allowed, &status);
    197  * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
    198  *
    199  * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
    200  * UBool result = bitmask != 0;
    201  * // fails checks: 1 (status: U_ZERO_ERROR)
    202  * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
    203  * uspoof_close(sc);
    204  * uset_close(allowed);
    205  * \endcode
    206  *
    207  * <p>
    208  * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
    209  * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
    210  * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
    211  *
    212  * <p>
    213  * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
    214  * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
    215  *
    216  * <p>
    217  * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
    218  * is available in the returned bitmask.  For complete information, use the {@link uspoof_check2} class of functions
    219  * with a {@link USpoofCheckResult} parameter:
    220  *
    221  * \code{.c}
    222  * UErrorCode status = U_ZERO_ERROR;
    223  * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
    224  *
    225  * // Get the default set of allowable characters:
    226  * USet* allowed = uset_openEmpty();
    227  * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
    228  * uset_addAll(allowed, uspoof_getInclusionSet(&status));
    229  *
    230  * USpoofChecker* sc = uspoof_open(&status);
    231  * uspoof_setAllowedChars(sc, allowed, &status);
    232  * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
    233  *
    234  * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
    235  * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
    236  *
    237  * int32_t failures1 = bitmask;
    238  * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
    239  * assert(failures1 == failures2);
    240  * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
    241  * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
    242  *
    243  * // Cleanup:
    244  * uspoof_close(sc);
    245  * uset_close(allowed);
    246  * uspoof_closeCheckResult(checkResult);
    247  * \endcode
    248  *
    249  * C++ users can take advantage of a few syntactical conveniences.  The following snippet is functionally
    250  * equivalent to the one above:
    251  *
    252  * \code{.cpp}
    253  * UErrorCode status = U_ZERO_ERROR;
    254  * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
    255  *
    256  * // Get the default set of allowable characters:
    257  * UnicodeSet allowed;
    258  * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
    259  * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
    260  *
    261  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    262  * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
    263  * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
    264  *
    265  * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
    266  * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
    267  *
    268  * int32_t failures1 = bitmask;
    269  * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
    270  * assert(failures1 == failures2);
    271  * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
    272  * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
    273  *
    274  * // Explicit cleanup not necessary.
    275  * \endcode
    276  *
    277  * <p>
    278  * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
    279  * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
    280  *
    281  * <ul>
    282  * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
    283  * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
    284  * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
    285  * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
    286  * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
    287  * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
    288  * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
    289  * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
    290  * </ul>
    291  *
    292  * <p>
    293  * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
    294  * INVISIBLE and MIXED_NUMBERS conditions, you could do:
    295  *
    296  * \code{.c}
    297  * UErrorCode status = U_ZERO_ERROR;
    298  * UChar* str = (UChar*) u"8\u09EA";  // 8 mixed with U+09EA BENGALI DIGIT FOUR
    299  *
    300  * USpoofChecker* sc = uspoof_open(&status);
    301  * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
    302  *
    303  * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
    304  * UBool result = bitmask != 0;
    305  * // fails checks: 1 (status: U_ZERO_ERROR)
    306  * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
    307  * uspoof_close(sc);
    308  * \endcode
    309  *
    310  * <p>
    311  * Here is an example in C++ showing how to compute the restriction level of a string:
    312  *
    313  * \code{.cpp}
    314  * UErrorCode status = U_ZERO_ERROR;
    315  * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
    316  *
    317  * // Get the default set of allowable characters:
    318  * UnicodeSet allowed;
    319  * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
    320  * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
    321  *
    322  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    323  * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
    324  * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
    325  * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
    326  *
    327  * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
    328  * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
    329  *
    330  * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
    331  * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
    332  * assert((restrictionLevel & bitmask) == restrictionLevel);
    333  * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
    334  * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
    335  * \endcode
    336  *
    337  * <p>
    338  * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE.  Since
    339  * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
    340  *
    341  * <p>
    342  * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
    343  * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
    344  * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
    345  * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
    346  * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
    347  * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
    348  * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
    349  * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
    350  * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
    351  * scripts.
    352  *
    353  * <h2>Additional Information</h2>
    354  *
    355  * <p>
    356  * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
    357  *
    358  * <p>
    359  * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
    360  * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
    361  * using the same USpoofChecker instance.
    362  *
    363  * <p>
    364  * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
    365  * thread safe. Those that take a non-const USpoofChecker are not thread safe..
    366  *
    367  * @stable ICU 4.6
    368  */
    369 
    370 struct USpoofChecker;
    371 /**
    372  * @stable ICU 4.2
    373  */
    374 typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
    375 
    376 struct USpoofCheckResult;
    377 /**
    378  * @see uspoof_openCheckResult
    379  * @stable ICU 58
    380  */
    381 typedef struct USpoofCheckResult USpoofCheckResult;
    382 
    383 /**
    384  * Enum for the kinds of checks that USpoofChecker can perform.
    385  * These enum values are used both to select the set of checks that
    386  * will be performed, and to report results from the check function.
    387  *
    388  * @stable ICU 4.2
    389  */
    390 typedef enum USpoofChecks {
    391     /**
    392      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    393      * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
    394      * 4.
    395      *
    396      * @see uspoof_areConfusable
    397      * @stable ICU 4.2
    398      */
    399     USPOOF_SINGLE_SCRIPT_CONFUSABLE =   1,
    400 
    401     /**
    402      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    403      * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
    404      * 39 section 4.
    405      *
    406      * @see uspoof_areConfusable
    407      * @stable ICU 4.2
    408      */
    409     USPOOF_MIXED_SCRIPT_CONFUSABLE  =   2,
    410 
    411     /**
    412      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    413      * that the two strings are visually confusable and that they are not from the same script but both of them are
    414      * single-script strings, according to UTS 39 section 4.
    415      *
    416      * @see uspoof_areConfusable
    417      * @stable ICU 4.2
    418      */
    419     USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
    420 
    421     /**
    422      * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables.  You may set
    423      * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
    424      * make {@link uspoof_areConfusable} return only those types of confusables.
    425      *
    426      * @see uspoof_areConfusable
    427      * @see uspoof_getSkeleton
    428      * @stable ICU 58
    429      */
    430     USPOOF_CONFUSABLE               =   USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
    431 
    432 #ifndef U_HIDE_DEPRECATED_API
    433     /**
    434       * This flag is deprecated and no longer affects the behavior of SpoofChecker.
    435       *
    436       * @deprecated ICU 58  Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
    437       */
    438     USPOOF_ANY_CASE                 =   8,
    439 #endif  /* U_HIDE_DEPRECATED_API */
    440 
    441     /**
    442       * Check that an identifier is no looser than the specified RestrictionLevel.
    443       * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
    444       *
    445       * If USPOOF_AUX_INFO is enabled the actual restriction level of the
    446       * identifier being tested will also be returned by uspoof_check().
    447       *
    448       * @see URestrictionLevel
    449       * @see uspoof_setRestrictionLevel
    450       * @see USPOOF_AUX_INFO
    451       *
    452       * @stable ICU 51
    453       */
    454     USPOOF_RESTRICTION_LEVEL        = 16,
    455 
    456 #ifndef U_HIDE_DEPRECATED_API
    457     /** Check that an identifier contains only characters from a
    458       * single script (plus chars from the common and inherited scripts.)
    459       * Applies to checks of a single identifier check only.
    460       * @deprecated ICU 51  Use RESTRICTION_LEVEL instead.
    461       */
    462     USPOOF_SINGLE_SCRIPT            =  USPOOF_RESTRICTION_LEVEL,
    463 #endif  /* U_HIDE_DEPRECATED_API */
    464 
    465     /** Check an identifier for the presence of invisible characters,
    466       * such as zero-width spaces, or character sequences that are
    467       * likely not to display, such as multiple occurrences of the same
    468       * non-spacing mark.  This check does not test the input string as a whole
    469       * for conformance to any particular syntax for identifiers.
    470       */
    471     USPOOF_INVISIBLE                =  32,
    472 
    473     /** Check that an identifier contains only characters from a specified set
    474       * of acceptable characters.  See {@link uspoof_setAllowedChars} and
    475       * {@link uspoof_setAllowedLocales}.  Note that a string that fails this check
    476       * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
    477       */
    478     USPOOF_CHAR_LIMIT               =  64,
    479 
    480    /**
    481      * Check that an identifier does not mix numbers from different numbering systems.
    482      * For more information, see UTS 39 section 5.3.
    483      *
    484      * @stable ICU 51
    485      */
    486     USPOOF_MIXED_NUMBERS            = 128,
    487 
    488    /**
    489      * Enable all spoof checks.
    490      *
    491      * @stable ICU 4.6
    492      */
    493     USPOOF_ALL_CHECKS               = 0xFFFF,
    494 
    495     /**
    496       * Enable the return of auxillary (non-error) information in the
    497       * upper bits of the check results value.
    498       *
    499       * If this "check" is not enabled, the results of {@link uspoof_check} will be
    500       * zero when an identifier passes all of the enabled checks.
    501       *
    502       * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
    503       * be zero when an identifier passes all checks.
    504       *
    505       * @stable ICU 51
    506       */
    507     USPOOF_AUX_INFO                  = 0x40000000
    508 
    509     } USpoofChecks;
    510 
    511 
    512     /**
    513      * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
    514      * for returned identifier restriction levels in check results.
    515      *
    516      * @stable ICU 51
    517      *
    518      * @see uspoof_setRestrictionLevel
    519      * @see uspoof_check
    520      */
    521     typedef enum URestrictionLevel {
    522         /**
    523          * All characters in the string are in the identifier profile and all characters in the string are in the
    524          * ASCII range.
    525          *
    526          * @stable ICU 51
    527          */
    528         USPOOF_ASCII = 0x10000000,
    529         /**
    530          * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
    531          * the string is single-script, according to the definition in UTS 39 section 5.1.
    532          *
    533          * @stable ICU 53
    534          */
    535         USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
    536         /**
    537          * The string classifies as Single Script, or all characters in the string are in the identifier profile and
    538          * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
    539          * section 5.1:
    540          * <ul>
    541          *   <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
    542          *   <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
    543          *   <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
    544          * </ul>
    545          * This is the default restriction in ICU.
    546          *
    547          * @stable ICU 51
    548          */
    549         USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
    550         /**
    551          * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
    552          * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
    553          * Greek, and Cherokee.
    554          *
    555          * @stable ICU 51
    556          */
    557         USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
    558         /**
    559          * All characters in the string are in the identifier profile.  Allow arbitrary mixtures of scripts.
    560          *
    561          * @stable ICU 51
    562          */
    563         USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
    564         /**
    565          * Any valid identifiers, including characters outside of the Identifier Profile.
    566          *
    567          * @stable ICU 51
    568          */
    569         USPOOF_UNRESTRICTIVE = 0x60000000,
    570         /**
    571          * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
    572          *
    573          * @stable ICU 53
    574          */
    575         USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
    576 #ifndef U_HIDE_INTERNAL_API
    577         /**
    578          * An undefined restriction level.
    579          * @internal
    580          */
    581         USPOOF_UNDEFINED_RESTRICTIVE = -1
    582 #endif  /* U_HIDE_INTERNAL_API */
    583     } URestrictionLevel;
    584 
    585 /**
    586  *  Create a Unicode Spoof Checker, configured to perform all
    587  *  checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
    588  *  Note that additional checks may be added in the future,
    589  *  resulting in the changes to the default checking behavior.
    590  *
    591  *  @param status  The error code, set if this function encounters a problem.
    592  *  @return        the newly created Spoof Checker
    593  *  @stable ICU 4.2
    594  */
    595 U_STABLE USpoofChecker * U_EXPORT2
    596 uspoof_open(UErrorCode *status);
    597 
    598 
    599 /**
    600  * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
    601  * Inverse of uspoof_serialize().
    602  * The memory containing the serialized data must remain valid and unchanged
    603  * as long as the spoof checker, or any cloned copies of the spoof checker,
    604  * are in use.  Ownership of the memory remains with the caller.
    605  * The spoof checker (and any clones) must be closed prior to deleting the
    606  * serialized data.
    607  *
    608  * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
    609  * @param length the number of bytes available at data;
    610  *               can be more than necessary
    611  * @param pActualLength receives the actual number of bytes at data taken up by the data;
    612  *                      can be NULL
    613  * @param pErrorCode ICU error code
    614  * @return the spoof checker.
    615  *
    616  * @see uspoof_open
    617  * @see uspoof_serialize
    618  * @stable ICU 4.2
    619  */
    620 U_STABLE USpoofChecker * U_EXPORT2
    621 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
    622                           UErrorCode *pErrorCode);
    623 
    624 /**
    625   * Open a Spoof Checker from the source form of the spoof data.
    626   * The input corresponds to the Unicode data file confusables.txt
    627   * as described in Unicode UAX #39.  The syntax of the source data
    628   * is as described in UAX #39 for this file, and the content of
    629   * this file is acceptable input.
    630   *
    631   * The character encoding of the (char *) input text is UTF-8.
    632   *
    633   * @param confusables a pointer to the confusable characters definitions,
    634   *                    as found in file confusables.txt from unicode.org.
    635   * @param confusablesLen The length of the confusables text, or -1 if the
    636   *                    input string is zero terminated.
    637   * @param confusablesWholeScript
    638   *                    Deprecated in ICU 58.  No longer used.
    639   * @param confusablesWholeScriptLen
    640   *                    Deprecated in ICU 58.  No longer used.
    641   * @param errType     In the event of an error in the input, indicates
    642   *                    which of the input files contains the error.
    643   *                    The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
    644   *                    USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
    645   *                    zero if no errors are found.
    646   * @param pe          In the event of an error in the input, receives the position
    647   *                    in the input text (line, offset) of the error.
    648   * @param status      an in/out ICU UErrorCode.  Among the possible errors is
    649   *                    U_PARSE_ERROR, which is used to report syntax errors
    650   *                    in the input.
    651   * @return            A spoof checker that uses the rules from the input files.
    652   * @stable ICU 4.2
    653   */
    654 U_STABLE USpoofChecker * U_EXPORT2
    655 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
    656                       const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
    657                       int32_t *errType, UParseError *pe, UErrorCode *status);
    658 
    659 
    660 /**
    661   * Close a Spoof Checker, freeing any memory that was being held by
    662   *   its implementation.
    663   * @stable ICU 4.2
    664   */
    665 U_STABLE void U_EXPORT2
    666 uspoof_close(USpoofChecker *sc);
    667 
    668 #if U_SHOW_CPLUSPLUS_API
    669 
    670 U_NAMESPACE_BEGIN
    671 
    672 /**
    673  * \class LocalUSpoofCheckerPointer
    674  * "Smart pointer" class, closes a USpoofChecker via uspoof_close().
    675  * For most methods see the LocalPointerBase base class.
    676  *
    677  * @see LocalPointerBase
    678  * @see LocalPointer
    679  * @stable ICU 4.4
    680  */
    681 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);
    682 
    683 U_NAMESPACE_END
    684 
    685 #endif
    686 
    687 /**
    688  * Clone a Spoof Checker.  The clone will be set to perform the same checks
    689  *   as the original source.
    690  *
    691  * @param sc       The source USpoofChecker
    692  * @param status   The error code, set if this function encounters a problem.
    693  * @return
    694  * @stable ICU 4.2
    695  */
    696 U_STABLE USpoofChecker * U_EXPORT2
    697 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
    698 
    699 
    700 /**
    701  * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
    702  * overwrites any checks that may have already been enabled. By default, all checks are enabled.
    703  *
    704  * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
    705  * example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and
    706  * also strings that contain digits from mixed numbering systems:
    707  *
    708  * <pre>
    709  * {@code
    710  * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
    711  * }
    712  * </pre>
    713  *
    714  * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
    715  * ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
    716  * it is good practice to disable the CONFUSABLE check:
    717  *
    718  * <pre>
    719  * {@code
    720  * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
    721  * }
    722  * </pre>
    723  *
    724  * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
    725  * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
    726  * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
    727  * methods.
    728  *
    729  * @param sc       The USpoofChecker
    730  * @param checks         The set of checks that this spoof checker will perform.
    731  *                 The value is a bit set, obtained by OR-ing together
    732  *                 values from enum USpoofChecks.
    733  * @param status   The error code, set if this function encounters a problem.
    734  * @stable ICU 4.2
    735  *
    736  */
    737 U_STABLE void U_EXPORT2
    738 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
    739 
    740 /**
    741  * Get the set of checks that this Spoof Checker has been configured to perform.
    742  *
    743  * @param sc       The USpoofChecker
    744  * @param status   The error code, set if this function encounters a problem.
    745  * @return         The set of checks that this spoof checker will perform.
    746  *                 The value is a bit set, obtained by OR-ing together
    747  *                 values from enum USpoofChecks.
    748  * @stable ICU 4.2
    749  *
    750  */
    751 U_STABLE int32_t U_EXPORT2
    752 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
    753 
    754 /**
    755  * Set the loosest restriction level allowed for strings. The default if this is not called is
    756  * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
    757  * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
    758  * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
    759  *
    760  * @param sc       The USpoofChecker
    761  * @param restrictionLevel The loosest restriction level allowed.
    762  * @see URestrictionLevel
    763  * @stable ICU 51
    764  */
    765 U_STABLE void U_EXPORT2
    766 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
    767 
    768 
    769 /**
    770   * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
    771   *
    772   * @return The restriction level
    773   * @see URestrictionLevel
    774   * @stable ICU 51
    775   */
    776 U_STABLE URestrictionLevel U_EXPORT2
    777 uspoof_getRestrictionLevel(const USpoofChecker *sc);
    778 
    779 /**
    780  * Limit characters that are acceptable in identifiers being checked to those
    781  * normally used with the languages associated with the specified locales.
    782  * Any previously specified list of locales is replaced by the new settings.
    783  *
    784  * A set of languages is determined from the locale(s), and
    785  * from those a set of acceptable Unicode scripts is determined.
    786  * Characters from this set of scripts, along with characters from
    787  * the "common" and "inherited" Unicode Script categories
    788  * will be permitted.
    789  *
    790  * Supplying an empty string removes all restrictions;
    791  * characters from any script will be allowed.
    792  *
    793  * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
    794  * USpoofChecker when calling this function with a non-empty list
    795  * of locales.
    796  *
    797  * The Unicode Set of characters that will be allowed is accessible
    798  * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
    799  * will <i>replace</i> any previously applied set of allowed characters.
    800  *
    801  * Adjustments, such as additions or deletions of certain classes of characters,
    802  * can be made to the result of uspoof_setAllowedLocales() by
    803  * fetching the resulting set with uspoof_getAllowedChars(),
    804  * manipulating it with the Unicode Set API, then resetting the
    805  * spoof detectors limits with uspoof_setAllowedChars().
    806  *
    807  * @param sc           The USpoofChecker
    808  * @param localesList  A list list of locales, from which the language
    809  *                     and associated script are extracted.  The locales
    810  *                     are comma-separated if there is more than one.
    811  *                     White space may not appear within an individual locale,
    812  *                     but is ignored otherwise.
    813  *                     The locales are syntactically like those from the
    814  *                     HTTP Accept-Language header.
    815  *                     If the localesList is empty, no restrictions will be placed on
    816  *                     the allowed characters.
    817  *
    818  * @param status       The error code, set if this function encounters a problem.
    819  * @stable ICU 4.2
    820  */
    821 U_STABLE void U_EXPORT2
    822 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
    823 
    824 /**
    825  * Get a list of locales for the scripts that are acceptable in strings
    826  *  to be checked.  If no limitations on scripts have been specified,
    827  *  an empty string will be returned.
    828  *
    829  *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
    830  *
    831  *  The format of the returned list is the same as that supplied to
    832  *  uspoof_setAllowedLocales(), but returned list may not be identical
    833  *  to the originally specified string; the string may be reformatted,
    834  *  and information other than languages from
    835  *  the originally specified locales may be omitted.
    836  *
    837  * @param sc           The USpoofChecker
    838  * @param status       The error code, set if this function encounters a problem.
    839  * @return             A string containing a list of  locales corresponding
    840  *                     to the acceptable scripts, formatted like an
    841  *                     HTTP Accept Language value.
    842  *
    843  * @stable ICU 4.2
    844  */
    845 U_STABLE const char * U_EXPORT2
    846 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
    847 
    848 
    849 /**
    850  * Limit the acceptable characters to those specified by a Unicode Set.
    851  *   Any previously specified character limit is
    852  *   is replaced by the new settings.  This includes limits on
    853  *   characters that were set with the uspoof_setAllowedLocales() function.
    854  *
    855  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
    856  * USpoofChecker by this function.
    857  *
    858  * @param sc       The USpoofChecker
    859  * @param chars    A Unicode Set containing the list of
    860  *                 characters that are permitted.  Ownership of the set
    861  *                 remains with the caller.  The incoming set is cloned by
    862  *                 this function, so there are no restrictions on modifying
    863  *                 or deleting the USet after calling this function.
    864  * @param status   The error code, set if this function encounters a problem.
    865  * @stable ICU 4.2
    866  */
    867 U_STABLE void U_EXPORT2
    868 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
    869 
    870 
    871 /**
    872  * Get a USet for the characters permitted in an identifier.
    873  * This corresponds to the limits imposed by the Set Allowed Characters
    874  * functions. Limitations imposed by other checks will not be
    875  * reflected in the set returned by this function.
    876  *
    877  * The returned set will be frozen, meaning that it cannot be modified
    878  * by the caller.
    879  *
    880  * Ownership of the returned set remains with the Spoof Detector.  The
    881  * returned set will become invalid if the spoof detector is closed,
    882  * or if a new set of allowed characters is specified.
    883  *
    884  *
    885  * @param sc       The USpoofChecker
    886  * @param status   The error code, set if this function encounters a problem.
    887  * @return         A USet containing the characters that are permitted by
    888  *                 the USPOOF_CHAR_LIMIT test.
    889  * @stable ICU 4.2
    890  */
    891 U_STABLE const USet * U_EXPORT2
    892 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
    893 
    894 
    895 #if U_SHOW_CPLUSPLUS_API
    896 /**
    897  * Limit the acceptable characters to those specified by a Unicode Set.
    898  *   Any previously specified character limit is
    899  *   is replaced by the new settings.    This includes limits on
    900  *   characters that were set with the uspoof_setAllowedLocales() function.
    901  *
    902  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
    903  * USoofChecker by this function.
    904  *
    905  * @param sc       The USpoofChecker
    906  * @param chars    A Unicode Set containing the list of
    907  *                 characters that are permitted.  Ownership of the set
    908  *                 remains with the caller.  The incoming set is cloned by
    909  *                 this function, so there are no restrictions on modifying
    910  *                 or deleting the UnicodeSet after calling this function.
    911  * @param status   The error code, set if this function encounters a problem.
    912  * @stable ICU 4.2
    913  */
    914 U_STABLE void U_EXPORT2
    915 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
    916 
    917 
    918 /**
    919  * Get a UnicodeSet for the characters permitted in an identifier.
    920  * This corresponds to the limits imposed by the Set Allowed Characters /
    921  * UnicodeSet functions. Limitations imposed by other checks will not be
    922  * reflected in the set returned by this function.
    923  *
    924  * The returned set will be frozen, meaning that it cannot be modified
    925  * by the caller.
    926  *
    927  * Ownership of the returned set remains with the Spoof Detector.  The
    928  * returned set will become invalid if the spoof detector is closed,
    929  * or if a new set of allowed characters is specified.
    930  *
    931  *
    932  * @param sc       The USpoofChecker
    933  * @param status   The error code, set if this function encounters a problem.
    934  * @return         A UnicodeSet containing the characters that are permitted by
    935  *                 the USPOOF_CHAR_LIMIT test.
    936  * @stable ICU 4.2
    937  */
    938 U_STABLE const icu::UnicodeSet * U_EXPORT2
    939 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
    940 #endif
    941 
    942 
    943 /**
    944  * Check the specified string for possible security issues.
    945  * The text to be checked will typically be an identifier of some sort.
    946  * The set of checks to be performed is specified with uspoof_setChecks().
    947  *
    948  * \note
    949  *   Consider using the newer API, {@link uspoof_check2}, instead.
    950  *   The newer API exposes additional information from the check procedure
    951  *   and is otherwise identical to this method.
    952  *
    953  * @param sc      The USpoofChecker
    954  * @param id      The identifier to be checked for possible security issues,
    955  *                in UTF-16 format.
    956  * @param length  the length of the string to be checked, expressed in
    957  *                16 bit UTF-16 code units, or -1 if the string is
    958  *                zero terminated.
    959  * @param position  Deprecated in ICU 51.  Always returns zero.
    960  *                Originally, an out parameter for the index of the first
    961  *                string position that failed a check.
    962  *                This parameter may be NULL.
    963  * @param status  The error code, set if an error occurred while attempting to
    964  *                perform the check.
    965  *                Spoofing or security issues detected with the input string are
    966  *                not reported here, but through the function's return value.
    967  * @return        An integer value with bits set for any potential security
    968  *                or spoofing issues detected.  The bits are defined by
    969  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
    970  *                will be zero if the input string passes all of the
    971  *                enabled checks.
    972  * @see uspoof_check2
    973  * @stable ICU 4.2
    974  */
    975 U_STABLE int32_t U_EXPORT2
    976 uspoof_check(const USpoofChecker *sc,
    977                          const UChar *id, int32_t length,
    978                          int32_t *position,
    979                          UErrorCode *status);
    980 
    981 
    982 /**
    983  * Check the specified string for possible security issues.
    984  * The text to be checked will typically be an identifier of some sort.
    985  * The set of checks to be performed is specified with uspoof_setChecks().
    986  *
    987  * \note
    988  *   Consider using the newer API, {@link uspoof_check2UTF8}, instead.
    989  *   The newer API exposes additional information from the check procedure
    990  *   and is otherwise identical to this method.
    991  *
    992  * @param sc      The USpoofChecker
    993  * @param id      A identifier to be checked for possible security issues, in UTF8 format.
    994  * @param length  the length of the string to be checked, or -1 if the string is
    995  *                zero terminated.
    996  * @param position  Deprecated in ICU 51.  Always returns zero.
    997  *                Originally, an out parameter for the index of the first
    998  *                string position that failed a check.
    999  *                This parameter may be NULL.
   1000  * @param status  The error code, set if an error occurred while attempting to
   1001  *                perform the check.
   1002  *                Spoofing or security issues detected with the input string are
   1003  *                not reported here, but through the function's return value.
   1004  *                If the input contains invalid UTF-8 sequences,
   1005  *                a status of U_INVALID_CHAR_FOUND will be returned.
   1006  * @return        An integer value with bits set for any potential security
   1007  *                or spoofing issues detected.  The bits are defined by
   1008  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1009  *                will be zero if the input string passes all of the
   1010  *                enabled checks.
   1011  * @see uspoof_check2UTF8
   1012  * @stable ICU 4.2
   1013  */
   1014 U_STABLE int32_t U_EXPORT2
   1015 uspoof_checkUTF8(const USpoofChecker *sc,
   1016                  const char *id, int32_t length,
   1017                  int32_t *position,
   1018                  UErrorCode *status);
   1019 
   1020 
   1021 #if U_SHOW_CPLUSPLUS_API
   1022 /**
   1023  * Check the specified string for possible security issues.
   1024  * The text to be checked will typically be an identifier of some sort.
   1025  * The set of checks to be performed is specified with uspoof_setChecks().
   1026  *
   1027  * \note
   1028  *   Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
   1029  *   The newer API exposes additional information from the check procedure
   1030  *   and is otherwise identical to this method.
   1031  *
   1032  * @param sc      The USpoofChecker
   1033  * @param id      A identifier to be checked for possible security issues.
   1034  * @param position  Deprecated in ICU 51.  Always returns zero.
   1035  *                Originally, an out parameter for the index of the first
   1036  *                string position that failed a check.
   1037  *                This parameter may be NULL.
   1038  * @param status  The error code, set if an error occurred while attempting to
   1039  *                perform the check.
   1040  *                Spoofing or security issues detected with the input string are
   1041  *                not reported here, but through the function's return value.
   1042  * @return        An integer value with bits set for any potential security
   1043  *                or spoofing issues detected.  The bits are defined by
   1044  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1045  *                will be zero if the input string passes all of the
   1046  *                enabled checks.
   1047  * @see uspoof_check2UnicodeString
   1048  * @stable ICU 4.2
   1049  */
   1050 U_STABLE int32_t U_EXPORT2
   1051 uspoof_checkUnicodeString(const USpoofChecker *sc,
   1052                           const icu::UnicodeString &id,
   1053                           int32_t *position,
   1054                           UErrorCode *status);
   1055 #endif
   1056 
   1057 
   1058 /**
   1059  * Check the specified string for possible security issues.
   1060  * The text to be checked will typically be an identifier of some sort.
   1061  * The set of checks to be performed is specified with uspoof_setChecks().
   1062  *
   1063  * @param sc      The USpoofChecker
   1064  * @param id      The identifier to be checked for possible security issues,
   1065  *                in UTF-16 format.
   1066  * @param length  the length of the string to be checked, or -1 if the string is
   1067  *                zero terminated.
   1068  * @param checkResult  An instance of USpoofCheckResult to be filled with
   1069  *                details about the identifier.  Can be NULL.
   1070  * @param status  The error code, set if an error occurred while attempting to
   1071  *                perform the check.
   1072  *                Spoofing or security issues detected with the input string are
   1073  *                not reported here, but through the function's return value.
   1074  * @return        An integer value with bits set for any potential security
   1075  *                or spoofing issues detected.  The bits are defined by
   1076  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1077  *                will be zero if the input string passes all of the
   1078  *                enabled checks.  Any information in this bitmask will be
   1079  *                consistent with the information saved in the optional
   1080  *                checkResult parameter.
   1081  * @see uspoof_openCheckResult
   1082  * @see uspoof_check2UTF8
   1083  * @see uspoof_check2UnicodeString
   1084  * @stable ICU 58
   1085  */
   1086 U_STABLE int32_t U_EXPORT2
   1087 uspoof_check2(const USpoofChecker *sc,
   1088     const UChar* id, int32_t length,
   1089     USpoofCheckResult* checkResult,
   1090     UErrorCode *status);
   1091 
   1092 /**
   1093  * Check the specified string for possible security issues.
   1094  * The text to be checked will typically be an identifier of some sort.
   1095  * The set of checks to be performed is specified with uspoof_setChecks().
   1096  *
   1097  * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
   1098  * returns additional information about the identifier.  For more
   1099  * information, see {@link uspoof_openCheckResult}.
   1100  *
   1101  * @param sc      The USpoofChecker
   1102  * @param id      A identifier to be checked for possible security issues, in UTF8 format.
   1103  * @param length  the length of the string to be checked, or -1 if the string is
   1104  *                zero terminated.
   1105  * @param checkResult  An instance of USpoofCheckResult to be filled with
   1106  *                details about the identifier.  Can be NULL.
   1107  * @param status  The error code, set if an error occurred while attempting to
   1108  *                perform the check.
   1109  *                Spoofing or security issues detected with the input string are
   1110  *                not reported here, but through the function's return value.
   1111  * @return        An integer value with bits set for any potential security
   1112  *                or spoofing issues detected.  The bits are defined by
   1113  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1114  *                will be zero if the input string passes all of the
   1115  *                enabled checks.  Any information in this bitmask will be
   1116  *                consistent with the information saved in the optional
   1117  *                checkResult parameter.
   1118  * @see uspoof_openCheckResult
   1119  * @see uspoof_check2
   1120  * @see uspoof_check2UnicodeString
   1121  * @stable ICU 58
   1122  */
   1123 U_STABLE int32_t U_EXPORT2
   1124 uspoof_check2UTF8(const USpoofChecker *sc,
   1125     const char *id, int32_t length,
   1126     USpoofCheckResult* checkResult,
   1127     UErrorCode *status);
   1128 
   1129 #if U_SHOW_CPLUSPLUS_API
   1130 /**
   1131  * Check the specified string for possible security issues.
   1132  * The text to be checked will typically be an identifier of some sort.
   1133  * The set of checks to be performed is specified with uspoof_setChecks().
   1134  *
   1135  * @param sc      The USpoofChecker
   1136  * @param id      A identifier to be checked for possible security issues.
   1137  * @param checkResult  An instance of USpoofCheckResult to be filled with
   1138  *                details about the identifier.  Can be NULL.
   1139  * @param status  The error code, set if an error occurred while attempting to
   1140  *                perform the check.
   1141  *                Spoofing or security issues detected with the input string are
   1142  *                not reported here, but through the function's return value.
   1143  * @return        An integer value with bits set for any potential security
   1144  *                or spoofing issues detected.  The bits are defined by
   1145  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1146  *                will be zero if the input string passes all of the
   1147  *                enabled checks.  Any information in this bitmask will be
   1148  *                consistent with the information saved in the optional
   1149  *                checkResult parameter.
   1150  * @see uspoof_openCheckResult
   1151  * @see uspoof_check2
   1152  * @see uspoof_check2UTF8
   1153  * @stable ICU 58
   1154  */
   1155 U_STABLE int32_t U_EXPORT2
   1156 uspoof_check2UnicodeString(const USpoofChecker *sc,
   1157     const icu::UnicodeString &id,
   1158     USpoofCheckResult* checkResult,
   1159     UErrorCode *status);
   1160 #endif
   1161 
   1162 /**
   1163  * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
   1164  * information about the identifier.  Information includes:
   1165  * <ul>
   1166  *   <li>A bitmask of the checks that failed</li>
   1167  *   <li>The identifier's restriction level (UTS 39 section 5.2)</li>
   1168  *   <li>The set of numerics in the string (UTS 39 section 5.3)</li>
   1169  * </ul>
   1170  * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
   1171  * of {@link uspoof_check2}.
   1172  *
   1173  * @param status  The error code, set if this function encounters a problem.
   1174  * @return        the newly created USpoofCheckResult
   1175  * @see uspoof_check2
   1176  * @see uspoof_check2UTF8
   1177  * @see uspoof_check2UnicodeString
   1178  * @stable ICU 58
   1179  */
   1180 U_STABLE USpoofCheckResult* U_EXPORT2
   1181 uspoof_openCheckResult(UErrorCode *status);
   1182 
   1183 /**
   1184  * Close a USpoofCheckResult, freeing any memory that was being held by
   1185  *   its implementation.
   1186  *
   1187  * @param checkResult  The instance of USpoofCheckResult to close
   1188  * @stable ICU 58
   1189  */
   1190 U_STABLE void U_EXPORT2
   1191 uspoof_closeCheckResult(USpoofCheckResult *checkResult);
   1192 
   1193 #if U_SHOW_CPLUSPLUS_API
   1194 
   1195 U_NAMESPACE_BEGIN
   1196 
   1197 /**
   1198  * \class LocalUSpoofCheckResultPointer
   1199  * "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}.
   1200  * For most methods see the LocalPointerBase base class.
   1201  *
   1202  * @see LocalPointerBase
   1203  * @see LocalPointer
   1204  * @stable ICU 58
   1205  */
   1206 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
   1207 
   1208 U_NAMESPACE_END
   1209 
   1210 #endif
   1211 
   1212 /**
   1213  * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
   1214  * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
   1215  *
   1216  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1217  * @param status       The error code, set if an error occurred.
   1218  * @return        An integer value with bits set for any potential security
   1219  *                or spoofing issues detected.  The bits are defined by
   1220  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1221  *                will be zero if the input string passes all of the
   1222  *                enabled checks.
   1223  * @see uspoof_setChecks
   1224  * @stable ICU 58
   1225  */
   1226 U_STABLE int32_t U_EXPORT2
   1227 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
   1228 
   1229 /**
   1230  * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
   1231  * was enabled; otherwise, undefined.
   1232  *
   1233  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1234  * @param status       The error code, set if an error occurred.
   1235  * @return             The restriction level contained in the USpoofCheckResult
   1236  * @see uspoof_setRestrictionLevel
   1237  * @stable ICU 58
   1238  */
   1239 U_STABLE URestrictionLevel U_EXPORT2
   1240 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
   1241 
   1242 /**
   1243  * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
   1244  * otherwise, undefined.  The set will contain the zero digit from each decimal number system found
   1245  * in the input string.  Ownership of the returned USet remains with the USpoofCheckResult.
   1246  * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
   1247  *
   1248  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1249  * @return             The set of numerics contained in the USpoofCheckResult
   1250  * @param status       The error code, set if an error occurred.
   1251  * @stable ICU 58
   1252  */
   1253 U_STABLE const USet* U_EXPORT2
   1254 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
   1255 
   1256 
   1257 /**
   1258  * Check the whether two specified strings are visually confusable.
   1259  *
   1260  * If the strings are confusable, the return value will be nonzero, as long as
   1261  * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
   1262  *
   1263  * The bits in the return value correspond to flags for each of the classes of
   1264  * confusables applicable to the two input strings.  According to UTS 39
   1265  * section 4, the possible flags are:
   1266  *
   1267  * <ul>
   1268  *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
   1269  *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
   1270  *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
   1271  * </ul>
   1272  *
   1273  * If one or more of the above flags were not listed in uspoof_setChecks(), this
   1274  * function will never report that class of confusable.  The check
   1275  * {@link USPOOF_CONFUSABLE} enables all three flags.
   1276  *
   1277  *
   1278  * @param sc      The USpoofChecker
   1279  * @param id1     The first of the two identifiers to be compared for
   1280  *                confusability.  The strings are in UTF-16 format.
   1281  * @param length1 the length of the first identifer, expressed in
   1282  *                16 bit UTF-16 code units, or -1 if the string is
   1283  *                nul terminated.
   1284  * @param id2     The second of the two identifiers to be compared for
   1285  *                confusability.  The identifiers are in UTF-16 format.
   1286  * @param length2 The length of the second identifiers, expressed in
   1287  *                16 bit UTF-16 code units, or -1 if the string is
   1288  *                nul terminated.
   1289  * @param status  The error code, set if an error occurred while attempting to
   1290  *                perform the check.
   1291  *                Confusability of the identifiers is not reported here,
   1292  *                but through this function's return value.
   1293  * @return        An integer value with bit(s) set corresponding to
   1294  *                the type of confusability found, as defined by
   1295  *                enum USpoofChecks.  Zero is returned if the identifiers
   1296  *                are not confusable.
   1297  *
   1298  * @stable ICU 4.2
   1299  */
   1300 U_STABLE int32_t U_EXPORT2
   1301 uspoof_areConfusable(const USpoofChecker *sc,
   1302                      const UChar *id1, int32_t length1,
   1303                      const UChar *id2, int32_t length2,
   1304                      UErrorCode *status);
   1305 
   1306 
   1307 
   1308 /**
   1309  * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
   1310  *
   1311  * @param sc      The USpoofChecker
   1312  * @param id1     The first of the two identifiers to be compared for
   1313  *                confusability.  The strings are in UTF-8 format.
   1314  * @param length1 the length of the first identifiers, in bytes, or -1
   1315  *                if the string is nul terminated.
   1316  * @param id2     The second of the two identifiers to be compared for
   1317  *                confusability.  The strings are in UTF-8 format.
   1318  * @param length2 The length of the second string in bytes, or -1
   1319  *                if the string is nul terminated.
   1320  * @param status  The error code, set if an error occurred while attempting to
   1321  *                perform the check.
   1322  *                Confusability of the strings is not reported here,
   1323  *                but through this function's return value.
   1324  * @return        An integer value with bit(s) set corresponding to
   1325  *                the type of confusability found, as defined by
   1326  *                enum USpoofChecks.  Zero is returned if the strings
   1327  *                are not confusable.
   1328  *
   1329  * @stable ICU 4.2
   1330  *
   1331  * @see uspoof_areConfusable
   1332  */
   1333 U_STABLE int32_t U_EXPORT2
   1334 uspoof_areConfusableUTF8(const USpoofChecker *sc,
   1335                          const char *id1, int32_t length1,
   1336                          const char *id2, int32_t length2,
   1337                          UErrorCode *status);
   1338 
   1339 
   1340 
   1341 
   1342 #if U_SHOW_CPLUSPLUS_API
   1343 /**
   1344  * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
   1345  *
   1346  * @param sc      The USpoofChecker
   1347  * @param s1     The first of the two identifiers to be compared for
   1348  *                confusability.  The strings are in UTF-8 format.
   1349  * @param s2     The second of the two identifiers to be compared for
   1350  *                confusability.  The strings are in UTF-8 format.
   1351  * @param status  The error code, set if an error occurred while attempting to
   1352  *                perform the check.
   1353  *                Confusability of the identifiers is not reported here,
   1354  *                but through this function's return value.
   1355  * @return        An integer value with bit(s) set corresponding to
   1356  *                the type of confusability found, as defined by
   1357  *                enum USpoofChecks.  Zero is returned if the identifiers
   1358  *                are not confusable.
   1359  *
   1360  * @stable ICU 4.2
   1361  *
   1362  * @see uspoof_areConfusable
   1363  */
   1364 U_STABLE int32_t U_EXPORT2
   1365 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
   1366                                   const icu::UnicodeString &s1,
   1367                                   const icu::UnicodeString &s2,
   1368                                   UErrorCode *status);
   1369 #endif
   1370 
   1371 
   1372 /**
   1373  *  Get the "skeleton" for an identifier.
   1374  *  Skeletons are a transformation of the input identifier;
   1375  * Two identifiers are confusable if their skeletons are identical.
   1376  *  See Unicode UAX #39 for additional information.
   1377  *
   1378  *  Using skeletons directly makes it possible to quickly check
   1379  *  whether an identifier is confusable with any of some large
   1380  *  set of existing identifiers, by creating an efficiently
   1381  *  searchable collection of the skeletons.
   1382  *
   1383  * @param sc      The USpoofChecker
   1384  * @param type    Deprecated in ICU 58.  You may pass any number.
   1385  *                Originally, controlled which of the Unicode confusable data
   1386  *                tables to use.
   1387  * @param id      The input identifier whose skeleton will be computed.
   1388  * @param length  The length of the input identifier, expressed in 16 bit
   1389  *                UTF-16 code units, or -1 if the string is zero terminated.
   1390  * @param dest    The output buffer, to receive the skeleton string.
   1391  * @param destCapacity  The length of the output buffer, in 16 bit units.
   1392  *                The destCapacity may be zero, in which case the function will
   1393  *                return the actual length of the skeleton.
   1394  * @param status  The error code, set if an error occurred while attempting to
   1395  *                perform the check.
   1396  * @return        The length of the skeleton string.  The returned length
   1397  *                is always that of the complete skeleton, even when the
   1398  *                supplied buffer is too small (or of zero length)
   1399  *
   1400  * @stable ICU 4.2
   1401  * @see uspoof_areConfusable
   1402  */
   1403 U_STABLE int32_t U_EXPORT2
   1404 uspoof_getSkeleton(const USpoofChecker *sc,
   1405                    uint32_t type,
   1406                    const UChar *id,  int32_t length,
   1407                    UChar *dest, int32_t destCapacity,
   1408                    UErrorCode *status);
   1409 
   1410 /**
   1411  *  Get the "skeleton" for an identifier.
   1412  *  Skeletons are a transformation of the input identifier;
   1413  *  Two identifiers are confusable if their skeletons are identical.
   1414  *  See Unicode UAX #39 for additional information.
   1415  *
   1416  *  Using skeletons directly makes it possible to quickly check
   1417  *  whether an identifier is confusable with any of some large
   1418  *  set of existing identifiers, by creating an efficiently
   1419  *  searchable collection of the skeletons.
   1420  *
   1421  * @param sc      The USpoofChecker
   1422  * @param type    Deprecated in ICU 58.  You may pass any number.
   1423  *                Originally, controlled which of the Unicode confusable data
   1424  *                tables to use.
   1425  * @param id      The UTF-8 format identifier whose skeleton will be computed.
   1426  * @param length  The length of the input string, in bytes,
   1427  *                or -1 if the string is zero terminated.
   1428  * @param dest    The output buffer, to receive the skeleton string.
   1429  * @param destCapacity  The length of the output buffer, in bytes.
   1430  *                The destCapacity may be zero, in which case the function will
   1431  *                return the actual length of the skeleton.
   1432  * @param status  The error code, set if an error occurred while attempting to
   1433  *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
   1434  *                   for invalid UTF-8 sequences, and
   1435  *                   U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
   1436  *                   to hold the complete skeleton.
   1437  * @return        The length of the skeleton string, in bytes.  The returned length
   1438  *                is always that of the complete skeleton, even when the
   1439  *                supplied buffer is too small (or of zero length)
   1440  *
   1441  * @stable ICU 4.2
   1442  */
   1443 U_STABLE int32_t U_EXPORT2
   1444 uspoof_getSkeletonUTF8(const USpoofChecker *sc,
   1445                        uint32_t type,
   1446                        const char *id,  int32_t length,
   1447                        char *dest, int32_t destCapacity,
   1448                        UErrorCode *status);
   1449 
   1450 #if U_SHOW_CPLUSPLUS_API
   1451 /**
   1452  *  Get the "skeleton" for an identifier.
   1453  *  Skeletons are a transformation of the input identifier;
   1454  *  Two identifiers are confusable if their skeletons are identical.
   1455  *  See Unicode UAX #39 for additional information.
   1456  *
   1457  *  Using skeletons directly makes it possible to quickly check
   1458  *  whether an identifier is confusable with any of some large
   1459  *  set of existing identifiers, by creating an efficiently
   1460  *  searchable collection of the skeletons.
   1461  *
   1462  * @param sc      The USpoofChecker.
   1463  * @param type    Deprecated in ICU 58.  You may pass any number.
   1464  *                Originally, controlled which of the Unicode confusable data
   1465  *                tables to use.
   1466  * @param id      The input identifier whose skeleton will be computed.
   1467  * @param dest    The output identifier, to receive the skeleton string.
   1468  * @param status  The error code, set if an error occurred while attempting to
   1469  *                perform the check.
   1470  * @return        A reference to the destination (skeleton) string.
   1471  *
   1472  * @stable ICU 4.2
   1473  */
   1474 U_I18N_API icu::UnicodeString & U_EXPORT2
   1475 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
   1476                                 uint32_t type,
   1477                                 const icu::UnicodeString &id,
   1478                                 icu::UnicodeString &dest,
   1479                                 UErrorCode *status);
   1480 #endif   /* U_SHOW_CPLUSPLUS_API */
   1481 
   1482 /**
   1483   * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
   1484   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1485   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1486   *
   1487   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1488   * be deleted by the caller.
   1489   *
   1490   * @param status The error code, set if a problem occurs while creating the set.
   1491   *
   1492   * @stable ICU 51
   1493   */
   1494 U_STABLE const USet * U_EXPORT2
   1495 uspoof_getInclusionSet(UErrorCode *status);
   1496 
   1497 /**
   1498   * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
   1499   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1500   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1501   *
   1502   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1503   * be deleted by the caller.
   1504   *
   1505   * @param status The error code, set if a problem occurs while creating the set.
   1506   *
   1507   * @stable ICU 51
   1508   */
   1509 U_STABLE const USet * U_EXPORT2
   1510 uspoof_getRecommendedSet(UErrorCode *status);
   1511 
   1512 #if U_SHOW_CPLUSPLUS_API
   1513 
   1514 /**
   1515   * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
   1516   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1517   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1518   *
   1519   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1520   * be deleted by the caller.
   1521   *
   1522   * @param status The error code, set if a problem occurs while creating the set.
   1523   *
   1524   * @stable ICU 51
   1525   */
   1526 U_STABLE const icu::UnicodeSet * U_EXPORT2
   1527 uspoof_getInclusionUnicodeSet(UErrorCode *status);
   1528 
   1529 /**
   1530   * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
   1531   * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1532   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1533   *
   1534   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1535   * be deleted by the caller.
   1536   *
   1537   * @param status The error code, set if a problem occurs while creating the set.
   1538   *
   1539   * @stable ICU 51
   1540   */
   1541 U_STABLE const icu::UnicodeSet * U_EXPORT2
   1542 uspoof_getRecommendedUnicodeSet(UErrorCode *status);
   1543 
   1544 #endif /* U_SHOW_CPLUSPLUS_API */
   1545 
   1546 /**
   1547  * Serialize the data for a spoof detector into a chunk of memory.
   1548  * The flattened spoof detection tables can later be used to efficiently
   1549  * instantiate a new Spoof Detector.
   1550  *
   1551  * The serialized spoof checker includes only the data compiled from the
   1552  * Unicode data tables by uspoof_openFromSource(); it does not include
   1553  * include any other state or configuration that may have been set.
   1554  *
   1555  * @param sc   the Spoof Detector whose data is to be serialized.
   1556  * @param data a pointer to 32-bit-aligned memory to be filled with the data,
   1557  *             can be NULL if capacity==0
   1558  * @param capacity the number of bytes available at data,
   1559  *                 or 0 for preflighting
   1560  * @param status an in/out ICU UErrorCode; possible errors include:
   1561  * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
   1562  * - U_ILLEGAL_ARGUMENT_ERROR  the data or capacity parameters are bad
   1563  * @return the number of bytes written or needed for the spoof data
   1564  *
   1565  * @see utrie2_openFromSerialized()
   1566  * @stable ICU 4.2
   1567  */
   1568 U_STABLE int32_t U_EXPORT2
   1569 uspoof_serialize(USpoofChecker *sc,
   1570                  void *data, int32_t capacity,
   1571                  UErrorCode *status);
   1572 
   1573 
   1574 #endif
   1575 
   1576 #endif   /* USPOOF_H */
   1577