Home | History | Annotate | Download | only in unicode
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2013, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  uidna.h
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003feb1
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #ifndef __UIDNA_H__
     18 #define __UIDNA_H__
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_IDNA
     23 
     24 #include "unicode/localpointer.h"
     25 #include "unicode/parseerr.h"
     26 
     27 /**
     28  * \file
     29  * \brief C API: Internationalizing Domain Names in Applications (IDNA)
     30  *
     31  * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
     32  *
     33  * The C API functions which do take a UIDNA * service object pointer
     34  * implement UTS #46 and IDNA2008.
     35  * The C API functions which do not take a service object pointer
     36  * implement IDNA2003.
     37  */
     38 
     39 /*
     40  * IDNA option bit set values.
     41  */
     42 enum {
     43     /**
     44      * Default options value: None of the other options are set.
     45      * For use in static worker and factory methods.
     46      * @stable ICU 2.6
     47      */
     48     UIDNA_DEFAULT=0,
     49     /**
     50      * Option to allow unassigned code points in domain names and labels.
     51      * For use in static worker and factory methods.
     52      * <p>This option is ignored by the UTS46 implementation.
     53      * (UTS #46 disallows unassigned code points.)
     54      * @stable ICU 2.6
     55      */
     56     UIDNA_ALLOW_UNASSIGNED=1,
     57     /**
     58      * Option to check whether the input conforms to the STD3 ASCII rules,
     59      * for example the restriction of labels to LDH characters
     60      * (ASCII Letters, Digits and Hyphen-Minus).
     61      * For use in static worker and factory methods.
     62      * @stable ICU 2.6
     63      */
     64     UIDNA_USE_STD3_RULES=2,
     65     /**
     66      * IDNA option to check for whether the input conforms to the BiDi rules.
     67      * For use in static worker and factory methods.
     68      * <p>This option is ignored by the IDNA2003 implementation.
     69      * (IDNA2003 always performs a BiDi check.)
     70      * @stable ICU 4.6
     71      */
     72     UIDNA_CHECK_BIDI=4,
     73     /**
     74      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
     75      * For use in static worker and factory methods.
     76      * <p>This option is ignored by the IDNA2003 implementation.
     77      * (The CONTEXTJ check is new in IDNA2008.)
     78      * @stable ICU 4.6
     79      */
     80     UIDNA_CHECK_CONTEXTJ=8,
     81     /**
     82      * IDNA option for nontransitional processing in ToASCII().
     83      * For use in static worker and factory methods.
     84      * <p>By default, ToASCII() uses transitional processing.
     85      * <p>This option is ignored by the IDNA2003 implementation.
     86      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
     87      * @stable ICU 4.6
     88      */
     89     UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
     90     /**
     91      * IDNA option for nontransitional processing in ToUnicode().
     92      * For use in static worker and factory methods.
     93      * <p>By default, ToUnicode() uses transitional processing.
     94      * <p>This option is ignored by the IDNA2003 implementation.
     95      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
     96      * @stable ICU 4.6
     97      */
     98     UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
     99     /**
    100      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
    101      * For use in static worker and factory methods.
    102      * <p>This option is ignored by the IDNA2003 implementation.
    103      * (The CONTEXTO check is new in IDNA2008.)
    104      * <p>This is for use by registries for IDNA2008 conformance.
    105      * UTS #46 does not require the CONTEXTO check.
    106      * @stable ICU 49
    107      */
    108     UIDNA_CHECK_CONTEXTO=0x40
    109 };
    110 
    111 /**
    112  * Opaque C service object type for the new IDNA API.
    113  * @stable ICU 4.6
    114  */
    115 struct UIDNA;
    116 typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
    117 
    118 /**
    119  * Returns a UIDNA instance which implements UTS #46.
    120  * Returns an unmodifiable instance, owned by the caller.
    121  * Cache it for multiple operations, and uidna_close() it when done.
    122  * The instance is thread-safe, that is, it can be used concurrently.
    123  *
    124  * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
    125  *
    126  * @param options Bit set to modify the processing and error checking.
    127  *                See option bit set values in uidna.h.
    128  * @param pErrorCode Standard ICU error code. Its input value must
    129  *                  pass the U_SUCCESS() test, or else the function returns
    130  *                  immediately. Check for U_FAILURE() on output or use with
    131  *                  function chaining. (See User Guide for details.)
    132  * @return the UTS #46 UIDNA instance, if successful
    133  * @stable ICU 4.6
    134  */
    135 U_STABLE UIDNA * U_EXPORT2
    136 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
    137 
    138 /**
    139  * Closes a UIDNA instance.
    140  * @param idna UIDNA instance to be closed
    141  * @stable ICU 4.6
    142  */
    143 U_STABLE void U_EXPORT2
    144 uidna_close(UIDNA *idna);
    145 
    146 #if U_SHOW_CPLUSPLUS_API
    147 
    148 U_NAMESPACE_BEGIN
    149 
    150 /**
    151  * \class LocalUIDNAPointer
    152  * "Smart pointer" class, closes a UIDNA via uidna_close().
    153  * For most methods see the LocalPointerBase base class.
    154  *
    155  * @see LocalPointerBase
    156  * @see LocalPointer
    157  * @stable ICU 4.6
    158  */
    159 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
    160 
    161 U_NAMESPACE_END
    162 
    163 #endif
    164 
    165 /**
    166  * Output container for IDNA processing errors.
    167  * Initialize with UIDNA_INFO_INITIALIZER:
    168  * \code
    169  * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
    170  * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
    171  * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
    172  * \endcode
    173  * @stable ICU 4.6
    174  */
    175 typedef struct UIDNAInfo {
    176     /** sizeof(UIDNAInfo) @stable ICU 4.6 */
    177     int16_t size;
    178     /**
    179      * Set to TRUE if transitional and nontransitional processing produce different results.
    180      * For details see C++ IDNAInfo::isTransitionalDifferent().
    181      * @stable ICU 4.6
    182      */
    183     UBool isTransitionalDifferent;
    184     UBool reservedB3;  /**< Reserved field, do not use. @internal */
    185     /**
    186      * Bit set indicating IDNA processing errors. 0 if no errors.
    187      * See UIDNA_ERROR_... constants.
    188      * @stable ICU 4.6
    189      */
    190     uint32_t errors;
    191     int32_t reservedI2;  /**< Reserved field, do not use. @internal */
    192     int32_t reservedI3;  /**< Reserved field, do not use. @internal */
    193 } UIDNAInfo;
    194 
    195 /**
    196  * Static initializer for a UIDNAInfo struct.
    197  * @stable ICU 4.6
    198  */
    199 #define UIDNA_INFO_INITIALIZER { \
    200     (int16_t)sizeof(UIDNAInfo), \
    201     FALSE, FALSE, \
    202     0, 0, 0 }
    203 
    204 /**
    205  * Converts a single domain name label into its ASCII form for DNS lookup.
    206  * If any processing step fails, then pInfo->errors will be non-zero and
    207  * the result might not be an ASCII string.
    208  * The label might be modified according to the types of errors.
    209  * Labels with severe errors will be left in (or turned into) their Unicode form.
    210  *
    211  * The UErrorCode indicates an error only in exceptional cases,
    212  * such as a U_MEMORY_ALLOCATION_ERROR.
    213  *
    214  * @param idna UIDNA instance
    215  * @param label Input domain name label
    216  * @param length Label length, or -1 if NUL-terminated
    217  * @param dest Destination string buffer
    218  * @param capacity Destination buffer capacity
    219  * @param pInfo Output container of IDNA processing details.
    220  * @param pErrorCode Standard ICU error code. Its input value must
    221  *                  pass the U_SUCCESS() test, or else the function returns
    222  *                  immediately. Check for U_FAILURE() on output or use with
    223  *                  function chaining. (See User Guide for details.)
    224  * @return destination string length
    225  * @stable ICU 4.6
    226  */
    227 U_STABLE int32_t U_EXPORT2
    228 uidna_labelToASCII(const UIDNA *idna,
    229                    const UChar *label, int32_t length,
    230                    UChar *dest, int32_t capacity,
    231                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    232 
    233 /**
    234  * Converts a single domain name label into its Unicode form for human-readable display.
    235  * If any processing step fails, then pInfo->errors will be non-zero.
    236  * The label might be modified according to the types of errors.
    237  *
    238  * The UErrorCode indicates an error only in exceptional cases,
    239  * such as a U_MEMORY_ALLOCATION_ERROR.
    240  *
    241  * @param idna UIDNA instance
    242  * @param label Input domain name label
    243  * @param length Label length, or -1 if NUL-terminated
    244  * @param dest Destination string buffer
    245  * @param capacity Destination buffer capacity
    246  * @param pInfo Output container of IDNA processing details.
    247  * @param pErrorCode Standard ICU error code. Its input value must
    248  *                  pass the U_SUCCESS() test, or else the function returns
    249  *                  immediately. Check for U_FAILURE() on output or use with
    250  *                  function chaining. (See User Guide for details.)
    251  * @return destination string length
    252  * @stable ICU 4.6
    253  */
    254 U_STABLE int32_t U_EXPORT2
    255 uidna_labelToUnicode(const UIDNA *idna,
    256                      const UChar *label, int32_t length,
    257                      UChar *dest, int32_t capacity,
    258                      UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    259 
    260 /**
    261  * Converts a whole domain name into its ASCII form for DNS lookup.
    262  * If any processing step fails, then pInfo->errors will be non-zero and
    263  * the result might not be an ASCII string.
    264  * The domain name might be modified according to the types of errors.
    265  * Labels with severe errors will be left in (or turned into) their Unicode form.
    266  *
    267  * The UErrorCode indicates an error only in exceptional cases,
    268  * such as a U_MEMORY_ALLOCATION_ERROR.
    269  *
    270  * @param idna UIDNA instance
    271  * @param name Input domain name
    272  * @param length Domain name length, or -1 if NUL-terminated
    273  * @param dest Destination string buffer
    274  * @param capacity Destination buffer capacity
    275  * @param pInfo Output container of IDNA processing details.
    276  * @param pErrorCode Standard ICU error code. Its input value must
    277  *                  pass the U_SUCCESS() test, or else the function returns
    278  *                  immediately. Check for U_FAILURE() on output or use with
    279  *                  function chaining. (See User Guide for details.)
    280  * @return destination string length
    281  * @stable ICU 4.6
    282  */
    283 U_STABLE int32_t U_EXPORT2
    284 uidna_nameToASCII(const UIDNA *idna,
    285                   const UChar *name, int32_t length,
    286                   UChar *dest, int32_t capacity,
    287                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    288 
    289 /**
    290  * Converts a whole domain name into its Unicode form for human-readable display.
    291  * If any processing step fails, then pInfo->errors will be non-zero.
    292  * The domain name might be modified according to the types of errors.
    293  *
    294  * The UErrorCode indicates an error only in exceptional cases,
    295  * such as a U_MEMORY_ALLOCATION_ERROR.
    296  *
    297  * @param idna UIDNA instance
    298  * @param name Input domain name
    299  * @param length Domain name length, or -1 if NUL-terminated
    300  * @param dest Destination string buffer
    301  * @param capacity Destination buffer capacity
    302  * @param pInfo Output container of IDNA processing details.
    303  * @param pErrorCode Standard ICU error code. Its input value must
    304  *                  pass the U_SUCCESS() test, or else the function returns
    305  *                  immediately. Check for U_FAILURE() on output or use with
    306  *                  function chaining. (See User Guide for details.)
    307  * @return destination string length
    308  * @stable ICU 4.6
    309  */
    310 U_STABLE int32_t U_EXPORT2
    311 uidna_nameToUnicode(const UIDNA *idna,
    312                     const UChar *name, int32_t length,
    313                     UChar *dest, int32_t capacity,
    314                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    315 
    316 /* UTF-8 versions of the processing methods --------------------------------- */
    317 
    318 /**
    319  * Converts a single domain name label into its ASCII form for DNS lookup.
    320  * UTF-8 version of uidna_labelToASCII(), same behavior.
    321  *
    322  * @param idna UIDNA instance
    323  * @param label Input domain name label
    324  * @param length Label length, or -1 if NUL-terminated
    325  * @param dest Destination string buffer
    326  * @param capacity Destination buffer capacity
    327  * @param pInfo Output container of IDNA processing details.
    328  * @param pErrorCode Standard ICU error code. Its input value must
    329  *                  pass the U_SUCCESS() test, or else the function returns
    330  *                  immediately. Check for U_FAILURE() on output or use with
    331  *                  function chaining. (See User Guide for details.)
    332  * @return destination string length
    333  * @stable ICU 4.6
    334  */
    335 U_STABLE int32_t U_EXPORT2
    336 uidna_labelToASCII_UTF8(const UIDNA *idna,
    337                         const char *label, int32_t length,
    338                         char *dest, int32_t capacity,
    339                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    340 
    341 /**
    342  * Converts a single domain name label into its Unicode form for human-readable display.
    343  * UTF-8 version of uidna_labelToUnicode(), same behavior.
    344  *
    345  * @param idna UIDNA instance
    346  * @param label Input domain name label
    347  * @param length Label length, or -1 if NUL-terminated
    348  * @param dest Destination string buffer
    349  * @param capacity Destination buffer capacity
    350  * @param pInfo Output container of IDNA processing details.
    351  * @param pErrorCode Standard ICU error code. Its input value must
    352  *                  pass the U_SUCCESS() test, or else the function returns
    353  *                  immediately. Check for U_FAILURE() on output or use with
    354  *                  function chaining. (See User Guide for details.)
    355  * @return destination string length
    356  * @stable ICU 4.6
    357  */
    358 U_STABLE int32_t U_EXPORT2
    359 uidna_labelToUnicodeUTF8(const UIDNA *idna,
    360                          const char *label, int32_t length,
    361                          char *dest, int32_t capacity,
    362                          UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    363 
    364 /**
    365  * Converts a whole domain name into its ASCII form for DNS lookup.
    366  * UTF-8 version of uidna_nameToASCII(), same behavior.
    367  *
    368  * @param idna UIDNA instance
    369  * @param name Input domain name
    370  * @param length Domain name length, or -1 if NUL-terminated
    371  * @param dest Destination string buffer
    372  * @param capacity Destination buffer capacity
    373  * @param pInfo Output container of IDNA processing details.
    374  * @param pErrorCode Standard ICU error code. Its input value must
    375  *                  pass the U_SUCCESS() test, or else the function returns
    376  *                  immediately. Check for U_FAILURE() on output or use with
    377  *                  function chaining. (See User Guide for details.)
    378  * @return destination string length
    379  * @stable ICU 4.6
    380  */
    381 U_STABLE int32_t U_EXPORT2
    382 uidna_nameToASCII_UTF8(const UIDNA *idna,
    383                        const char *name, int32_t length,
    384                        char *dest, int32_t capacity,
    385                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    386 
    387 /**
    388  * Converts a whole domain name into its Unicode form for human-readable display.
    389  * UTF-8 version of uidna_nameToUnicode(), same behavior.
    390  *
    391  * @param idna UIDNA instance
    392  * @param name Input domain name
    393  * @param length Domain name length, or -1 if NUL-terminated
    394  * @param dest Destination string buffer
    395  * @param capacity Destination buffer capacity
    396  * @param pInfo Output container of IDNA processing details.
    397  * @param pErrorCode Standard ICU error code. Its input value must
    398  *                  pass the U_SUCCESS() test, or else the function returns
    399  *                  immediately. Check for U_FAILURE() on output or use with
    400  *                  function chaining. (See User Guide for details.)
    401  * @return destination string length
    402  * @stable ICU 4.6
    403  */
    404 U_STABLE int32_t U_EXPORT2
    405 uidna_nameToUnicodeUTF8(const UIDNA *idna,
    406                         const char *name, int32_t length,
    407                         char *dest, int32_t capacity,
    408                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    409 
    410 /*
    411  * IDNA error bit set values.
    412  * When a domain name or label fails a processing step or does not meet the
    413  * validity criteria, then one or more of these error bits are set.
    414  */
    415 enum {
    416     /**
    417      * A non-final domain name label (or the whole domain name) is empty.
    418      * @stable ICU 4.6
    419      */
    420     UIDNA_ERROR_EMPTY_LABEL=1,
    421     /**
    422      * A domain name label is longer than 63 bytes.
    423      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    424      * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
    425      * @stable ICU 4.6
    426      */
    427     UIDNA_ERROR_LABEL_TOO_LONG=2,
    428     /**
    429      * A domain name is longer than 255 bytes in its storage form.
    430      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    431      * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
    432      * @stable ICU 4.6
    433      */
    434     UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
    435     /**
    436      * A label starts with a hyphen-minus ('-').
    437      * @stable ICU 4.6
    438      */
    439     UIDNA_ERROR_LEADING_HYPHEN=8,
    440     /**
    441      * A label ends with a hyphen-minus ('-').
    442      * @stable ICU 4.6
    443      */
    444     UIDNA_ERROR_TRAILING_HYPHEN=0x10,
    445     /**
    446      * A label contains hyphen-minus ('-') in the third and fourth positions.
    447      * @stable ICU 4.6
    448      */
    449     UIDNA_ERROR_HYPHEN_3_4=0x20,
    450     /**
    451      * A label starts with a combining mark.
    452      * @stable ICU 4.6
    453      */
    454     UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
    455     /**
    456      * A label or domain name contains disallowed characters.
    457      * @stable ICU 4.6
    458      */
    459     UIDNA_ERROR_DISALLOWED=0x80,
    460     /**
    461      * A label starts with "xn--" but does not contain valid Punycode.
    462      * That is, an xn-- label failed Punycode decoding.
    463      * @stable ICU 4.6
    464      */
    465     UIDNA_ERROR_PUNYCODE=0x100,
    466     /**
    467      * A label contains a dot=full stop.
    468      * This can occur in an input string for a single-label function.
    469      * @stable ICU 4.6
    470      */
    471     UIDNA_ERROR_LABEL_HAS_DOT=0x200,
    472     /**
    473      * An ACE label does not contain a valid label string.
    474      * The label was successfully ACE (Punycode) decoded but the resulting
    475      * string had severe validation errors. For example,
    476      * it might contain characters that are not allowed in ACE labels,
    477      * or it might not be normalized.
    478      * @stable ICU 4.6
    479      */
    480     UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
    481     /**
    482      * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
    483      * @stable ICU 4.6
    484      */
    485     UIDNA_ERROR_BIDI=0x800,
    486     /**
    487      * A label does not meet the IDNA CONTEXTJ requirements.
    488      * @stable ICU 4.6
    489      */
    490     UIDNA_ERROR_CONTEXTJ=0x1000,
    491     /**
    492      * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
    493      * Some punctuation characters "Would otherwise have been DISALLOWED"
    494      * but are allowed in certain contexts. (RFC 5892)
    495      * @stable ICU 49
    496      */
    497     UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
    498     /**
    499      * A label does not meet the IDNA CONTEXTO requirements for digits.
    500      * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
    501      * @stable ICU 49
    502      */
    503     UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
    504 };
    505 
    506 /* IDNA2003 API ------------------------------------------------------------- */
    507 
    508 /**
    509  * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    510  * This operation is done on <b>single labels</b> before sending it to something that expects
    511  * ASCII names. A label is an individual part of a domain name. Labels are usually
    512  * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
    513  *
    514  * IDNA2003 API Overview:
    515  *
    516  * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
    517  * (http://www.ietf.org/rfc/rfc3490.txt).
    518  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
    519  * containing non-ASCII code points are processed by the
    520  * ToASCII operation before passing it to resolver libraries. Domain names
    521  * that are obtained from resolver libraries are processed by the
    522  * ToUnicode operation before displaying the domain name to the user.
    523  * IDNA requires that implementations process input strings with Nameprep
    524  * (http://www.ietf.org/rfc/rfc3491.txt),
    525  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
    526  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
    527  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
    528  * neither Nameprep nor Punycode are optional.
    529  * The input and output of ToASCII and ToUnicode operations are Unicode
    530  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
    531  * multiple times to an input string will yield the same result as applying the operation
    532  * once.
    533  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
    534  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
    535  *
    536  * @param src               Input UChar array containing label in Unicode.
    537  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    538  * @param dest              Output UChar array with ASCII (ACE encoded) label.
    539  * @param destCapacity      Size of dest.
    540  * @param options           A bit set of options:
    541  *
    542  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    543  *                              and do not use STD3 ASCII rules
    544  *                              If unassigned code points are found the operation fails with
    545  *                              U_UNASSIGNED_ERROR error code.
    546  *
    547  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    548  *                              If this option is set, the unassigned code points are in the input
    549  *                              are treated as normal Unicode code points.
    550  *
    551  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    552  *                              If this option is set and the input does not satisfy STD3 rules,
    553  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    554  *
    555  * @param parseError        Pointer to UParseError struct to receive information on position
    556  *                          of error if an error is encountered. Can be NULL.
    557  * @param status            ICU in/out error code parameter.
    558  *                          U_INVALID_CHAR_FOUND if src contains
    559  *                          unmatched single surrogates.
    560  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    561  *                          too many code points.
    562  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    563  * @return The length of the result string, if successful - or in case of a buffer overflow,
    564  *         in which case it will be greater than destCapacity.
    565  * @stable ICU 2.6
    566  */
    567 U_STABLE int32_t U_EXPORT2
    568 uidna_toASCII(const UChar* src, int32_t srcLength,
    569               UChar* dest, int32_t destCapacity,
    570               int32_t options,
    571               UParseError* parseError,
    572               UErrorCode* status);
    573 
    574 
    575 /**
    576  * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
    577  * This operation is done on <b>single labels</b> before sending it to something that expects
    578  * Unicode names. A label is an individual part of a domain name. Labels are usually
    579  * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
    580  *
    581  * @param src               Input UChar array containing ASCII (ACE encoded) label.
    582  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    583  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
    584  * @param destCapacity      Size of dest.
    585  * @param options           A bit set of options:
    586  *
    587  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    588  *                              and do not use STD3 ASCII rules
    589  *                              If unassigned code points are found the operation fails with
    590  *                              U_UNASSIGNED_ERROR error code.
    591  *
    592  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
    593  *                              If this option is set, the unassigned code points are in the input
    594  *                              are treated as normal Unicode code points. <b> Note: </b> This option is
    595  *                              required on toUnicode operation because the RFC mandates
    596  *                              verification of decoded ACE input by applying toASCII and comparing
    597  *                              its output with source
    598  *
    599  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    600  *                              If this option is set and the input does not satisfy STD3 rules,
    601  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    602  *
    603  * @param parseError        Pointer to UParseError struct to receive information on position
    604  *                          of error if an error is encountered. Can be NULL.
    605  * @param status            ICU in/out error code parameter.
    606  *                          U_INVALID_CHAR_FOUND if src contains
    607  *                          unmatched single surrogates.
    608  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    609  *                          too many code points.
    610  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    611  * @return The length of the result string, if successful - or in case of a buffer overflow,
    612  *         in which case it will be greater than destCapacity.
    613  * @stable ICU 2.6
    614  */
    615 U_STABLE int32_t U_EXPORT2
    616 uidna_toUnicode(const UChar* src, int32_t srcLength,
    617                 UChar* dest, int32_t destCapacity,
    618                 int32_t options,
    619                 UParseError* parseError,
    620                 UErrorCode* status);
    621 
    622 
    623 /**
    624  * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    625  * This operation is done on complete domain names, e.g: "www.example.com".
    626  * It is important to note that this operation can fail. If it fails, then the input
    627  * domain name cannot be used as an Internationalized Domain Name and the application
    628  * should have methods defined to deal with the failure.
    629  *
    630  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    631  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    632  * and then convert. This function does not offer that level of granularity. The options once
    633  * set will apply to all labels in the domain name
    634  *
    635  * @param src               Input UChar array containing IDN in Unicode.
    636  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    637  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
    638  * @param destCapacity      Size of dest.
    639  * @param options           A bit set of options:
    640  *
    641  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    642  *                              and do not use STD3 ASCII rules
    643  *                              If unassigned code points are found the operation fails with
    644  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
    645  *
    646  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    647  *                              If this option is set, the unassigned code points are in the input
    648  *                              are treated as normal Unicode code points.
    649  *
    650  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    651  *                              If this option is set and the input does not satisfy STD3 rules,
    652  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    653  *
    654  * @param parseError        Pointer to UParseError struct to receive information on position
    655  *                          of error if an error is encountered. Can be NULL.
    656  * @param status            ICU in/out error code parameter.
    657  *                          U_INVALID_CHAR_FOUND if src contains
    658  *                          unmatched single surrogates.
    659  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    660  *                          too many code points.
    661  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    662  * @return The length of the result string, if successful - or in case of a buffer overflow,
    663  *         in which case it will be greater than destCapacity.
    664  * @stable ICU 2.6
    665  */
    666 U_STABLE int32_t U_EXPORT2
    667 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
    668                    UChar* dest, int32_t destCapacity,
    669                    int32_t options,
    670                    UParseError* parseError,
    671                    UErrorCode* status);
    672 
    673 /**
    674  * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    675  * This operation is done on complete domain names, e.g: "www.example.com".
    676  *
    677  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    678  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    679  * and then convert. This function does not offer that level of granularity. The options once
    680  * set will apply to all labels in the domain name
    681  *
    682  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
    683  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    684  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
    685  * @param destCapacity      Size of dest.
    686  * @param options           A bit set of options:
    687  *
    688  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    689  *                              and do not use STD3 ASCII rules
    690  *                              If unassigned code points are found the operation fails with
    691  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
    692  *
    693  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    694  *                              If this option is set, the unassigned code points are in the input
    695  *                              are treated as normal Unicode code points.
    696  *
    697  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    698  *                              If this option is set and the input does not satisfy STD3 rules,
    699  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    700  *
    701  * @param parseError        Pointer to UParseError struct to receive information on position
    702  *                          of error if an error is encountered. Can be NULL.
    703  * @param status            ICU in/out error code parameter.
    704  *                          U_INVALID_CHAR_FOUND if src contains
    705  *                          unmatched single surrogates.
    706  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    707  *                          too many code points.
    708  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    709  * @return The length of the result string, if successful - or in case of a buffer overflow,
    710  *         in which case it will be greater than destCapacity.
    711  * @stable ICU 2.6
    712  */
    713 U_STABLE int32_t U_EXPORT2
    714 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
    715                      UChar* dest, int32_t destCapacity,
    716                      int32_t options,
    717                      UParseError* parseError,
    718                      UErrorCode* status);
    719 
    720 /**
    721  * IDNA2003: Compare two IDN strings for equivalence.
    722  * This function splits the domain names into labels and compares them.
    723  * According to IDN RFC, whenever two labels are compared, they are
    724  * considered equal if and only if their ASCII forms (obtained by
    725  * applying toASCII) match using an case-insensitive ASCII comparison.
    726  * Two domain names are considered a match if and only if all labels
    727  * match regardless of whether label separators match.
    728  *
    729  * @param s1                First source string.
    730  * @param length1           Length of first source string, or -1 if NUL-terminated.
    731  *
    732  * @param s2                Second source string.
    733  * @param length2           Length of second source string, or -1 if NUL-terminated.
    734  * @param options           A bit set of options:
    735  *
    736  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    737  *                              and do not use STD3 ASCII rules
    738  *                              If unassigned code points are found the operation fails with
    739  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
    740  *
    741  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    742  *                              If this option is set, the unassigned code points are in the input
    743  *                              are treated as normal Unicode code points.
    744  *
    745  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    746  *                              If this option is set and the input does not satisfy STD3 rules,
    747  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    748  *
    749  * @param status            ICU error code in/out parameter.
    750  *                          Must fulfill U_SUCCESS before the function call.
    751  * @return <0 or 0 or >0 as usual for string comparisons
    752  * @stable ICU 2.6
    753  */
    754 U_STABLE int32_t U_EXPORT2
    755 uidna_compare(  const UChar *s1, int32_t length1,
    756                 const UChar *s2, int32_t length2,
    757                 int32_t options,
    758                 UErrorCode* status);
    759 
    760 #endif /* #if !UCONFIG_NO_IDNA */
    761 
    762 #endif
    763