Home | History | Annotate | Download | only in unicode
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2003-2012, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *   file name:  uidna.h
      9  *   encoding:   US-ASCII
     10  *   tab size:   8 (not used)
     11  *   indentation:4
     12  *
     13  *   created on: 2003feb1
     14  *   created by: Ram Viswanadha
     15  */
     16 
     17 #ifndef __UIDNA_H__
     18 #define __UIDNA_H__
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_IDNA
     23 
     24 #include "unicode/localpointer.h"
     25 #include "unicode/parseerr.h"
     26 
     27 /**
     28  * \file
     29  * \brief C API: Internationalizing Domain Names in Applications (IDNA)
     30  *
     31  * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
     32  *
     33  * The C API functions which do take a UIDNA * service object pointer
     34  * implement UTS #46 and IDNA2008.
     35  * The C API functions which do not take a service object pointer
     36  * implement IDNA2003.
     37  */
     38 
     39 /*
     40  * IDNA option bit set values.
     41  */
     42 enum {
     43     /**
     44      * Default options value: None of the other options are set.
     45      * For use in static worker and factory methods.
     46      * @stable ICU 2.6
     47      */
     48     UIDNA_DEFAULT=0,
     49     /**
     50      * Option to allow unassigned code points in domain names and labels.
     51      * For use in static worker and factory methods.
     52      * <p>This option is ignored by the UTS46 implementation.
     53      * (UTS #46 disallows unassigned code points.)
     54      * @stable ICU 2.6
     55      */
     56     UIDNA_ALLOW_UNASSIGNED=1,
     57     /**
     58      * Option to check whether the input conforms to the STD3 ASCII rules,
     59      * for example the restriction of labels to LDH characters
     60      * (ASCII Letters, Digits and Hyphen-Minus).
     61      * For use in static worker and factory methods.
     62      * @stable ICU 2.6
     63      */
     64     UIDNA_USE_STD3_RULES=2,
     65     /**
     66      * IDNA option to check for whether the input conforms to the BiDi rules.
     67      * For use in static worker and factory methods.
     68      * <p>This option is ignored by the IDNA2003 implementation.
     69      * (IDNA2003 always performs a BiDi check.)
     70      * @stable ICU 4.6
     71      */
     72     UIDNA_CHECK_BIDI=4,
     73     /**
     74      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
     75      * For use in static worker and factory methods.
     76      * <p>This option is ignored by the IDNA2003 implementation.
     77      * (The CONTEXTJ check is new in IDNA2008.)
     78      * @stable ICU 4.6
     79      */
     80     UIDNA_CHECK_CONTEXTJ=8,
     81     /**
     82      * IDNA option for nontransitional processing in ToASCII().
     83      * For use in static worker and factory methods.
     84      * <p>By default, ToASCII() uses transitional processing.
     85      * <p>This option is ignored by the IDNA2003 implementation.
     86      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
     87      * @stable ICU 4.6
     88      */
     89     UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
     90     /**
     91      * IDNA option for nontransitional processing in ToUnicode().
     92      * For use in static worker and factory methods.
     93      * <p>By default, ToUnicode() uses transitional processing.
     94      * <p>This option is ignored by the IDNA2003 implementation.
     95      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
     96      * @stable ICU 4.6
     97      */
     98     UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
     99 #ifndef U_HIDE_DRAFT_API
    100     /**
    101      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
    102      * For use in static worker and factory methods.
    103      * <p>This option is ignored by the IDNA2003 implementation.
    104      * (The CONTEXTO check is new in IDNA2008.)
    105      * <p>This is for use by registries for IDNA2008 conformance.
    106      * UTS #46 does not require the CONTEXTO check.
    107      * @draft ICU 49
    108      */
    109     UIDNA_CHECK_CONTEXTO=0x40
    110 #endif  /* U_HIDE_DRAFT_API */
    111 };
    112 
    113 /**
    114  * Opaque C service object type for the new IDNA API.
    115  * @stable ICU 4.6
    116  */
    117 struct UIDNA;
    118 typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
    119 
    120 /**
    121  * Returns a UIDNA instance which implements UTS #46.
    122  * Returns an unmodifiable instance, owned by the caller.
    123  * Cache it for multiple operations, and uidna_close() it when done.
    124  * The instance is thread-safe, that is, it can be used concurrently.
    125  *
    126  * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
    127  *
    128  * @param options Bit set to modify the processing and error checking.
    129  *                See option bit set values in uidna.h.
    130  * @param pErrorCode Standard ICU error code. Its input value must
    131  *                  pass the U_SUCCESS() test, or else the function returns
    132  *                  immediately. Check for U_FAILURE() on output or use with
    133  *                  function chaining. (See User Guide for details.)
    134  * @return the UTS #46 UIDNA instance, if successful
    135  * @stable ICU 4.6
    136  */
    137 U_STABLE UIDNA * U_EXPORT2
    138 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
    139 
    140 /**
    141  * Closes a UIDNA instance.
    142  * @param idna UIDNA instance to be closed
    143  * @stable ICU 4.6
    144  */
    145 U_STABLE void U_EXPORT2
    146 uidna_close(UIDNA *idna);
    147 
    148 #if U_SHOW_CPLUSPLUS_API
    149 
    150 U_NAMESPACE_BEGIN
    151 
    152 /**
    153  * \class LocalUIDNAPointer
    154  * "Smart pointer" class, closes a UIDNA via uidna_close().
    155  * For most methods see the LocalPointerBase base class.
    156  *
    157  * @see LocalPointerBase
    158  * @see LocalPointer
    159  * @stable ICU 4.6
    160  */
    161 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
    162 
    163 U_NAMESPACE_END
    164 
    165 #endif
    166 
    167 /**
    168  * Output container for IDNA processing errors.
    169  * Initialize with UIDNA_INFO_INITIALIZER:
    170  * \code
    171  * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
    172  * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
    173  * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
    174  * \endcode
    175  * @stable ICU 4.6
    176  */
    177 typedef struct UIDNAInfo {
    178     /** sizeof(UIDNAInfo) @stable ICU 4.6 */
    179     int16_t size;
    180     /**
    181      * Set to TRUE if transitional and nontransitional processing produce different results.
    182      * For details see C++ IDNAInfo::isTransitionalDifferent().
    183      * @stable ICU 4.6
    184      */
    185     UBool isTransitionalDifferent;
    186     UBool reservedB3;  /**< Reserved field, do not use. @internal */
    187     /**
    188      * Bit set indicating IDNA processing errors. 0 if no errors.
    189      * See UIDNA_ERROR_... constants.
    190      * @stable ICU 4.6
    191      */
    192     uint32_t errors;
    193     int32_t reservedI2;  /**< Reserved field, do not use. @internal */
    194     int32_t reservedI3;  /**< Reserved field, do not use. @internal */
    195 } UIDNAInfo;
    196 
    197 /**
    198  * Static initializer for a UIDNAInfo struct.
    199  * @stable ICU 4.6
    200  */
    201 #define UIDNA_INFO_INITIALIZER { \
    202     (int16_t)sizeof(UIDNAInfo), \
    203     FALSE, FALSE, \
    204     0, 0, 0 }
    205 
    206 /**
    207  * Converts a single domain name label into its ASCII form for DNS lookup.
    208  * If any processing step fails, then pInfo->errors will be non-zero and
    209  * the result might not be an ASCII string.
    210  * The label might be modified according to the types of errors.
    211  * Labels with severe errors will be left in (or turned into) their Unicode form.
    212  *
    213  * The UErrorCode indicates an error only in exceptional cases,
    214  * such as a U_MEMORY_ALLOCATION_ERROR.
    215  *
    216  * @param idna UIDNA instance
    217  * @param label Input domain name label
    218  * @param length Label length, or -1 if NUL-terminated
    219  * @param dest Destination string buffer
    220  * @param capacity Destination buffer capacity
    221  * @param pInfo Output container of IDNA processing details.
    222  * @param pErrorCode Standard ICU error code. Its input value must
    223  *                  pass the U_SUCCESS() test, or else the function returns
    224  *                  immediately. Check for U_FAILURE() on output or use with
    225  *                  function chaining. (See User Guide for details.)
    226  * @return destination string length
    227  * @stable ICU 4.6
    228  */
    229 U_STABLE int32_t U_EXPORT2
    230 uidna_labelToASCII(const UIDNA *idna,
    231                    const UChar *label, int32_t length,
    232                    UChar *dest, int32_t capacity,
    233                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    234 
    235 /**
    236  * Converts a single domain name label into its Unicode form for human-readable display.
    237  * If any processing step fails, then pInfo->errors will be non-zero.
    238  * The label might be modified according to the types of errors.
    239  *
    240  * The UErrorCode indicates an error only in exceptional cases,
    241  * such as a U_MEMORY_ALLOCATION_ERROR.
    242  *
    243  * @param idna UIDNA instance
    244  * @param label Input domain name label
    245  * @param length Label length, or -1 if NUL-terminated
    246  * @param dest Destination string buffer
    247  * @param capacity Destination buffer capacity
    248  * @param pInfo Output container of IDNA processing details.
    249  * @param pErrorCode Standard ICU error code. Its input value must
    250  *                  pass the U_SUCCESS() test, or else the function returns
    251  *                  immediately. Check for U_FAILURE() on output or use with
    252  *                  function chaining. (See User Guide for details.)
    253  * @return destination string length
    254  * @stable ICU 4.6
    255  */
    256 U_STABLE int32_t U_EXPORT2
    257 uidna_labelToUnicode(const UIDNA *idna,
    258                      const UChar *label, int32_t length,
    259                      UChar *dest, int32_t capacity,
    260                      UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    261 
    262 /**
    263  * Converts a whole domain name into its ASCII form for DNS lookup.
    264  * If any processing step fails, then pInfo->errors will be non-zero and
    265  * the result might not be an ASCII string.
    266  * The domain name might be modified according to the types of errors.
    267  * Labels with severe errors will be left in (or turned into) their Unicode form.
    268  *
    269  * The UErrorCode indicates an error only in exceptional cases,
    270  * such as a U_MEMORY_ALLOCATION_ERROR.
    271  *
    272  * @param idna UIDNA instance
    273  * @param name Input domain name
    274  * @param length Domain name length, or -1 if NUL-terminated
    275  * @param dest Destination string buffer
    276  * @param capacity Destination buffer capacity
    277  * @param pInfo Output container of IDNA processing details.
    278  * @param pErrorCode Standard ICU error code. Its input value must
    279  *                  pass the U_SUCCESS() test, or else the function returns
    280  *                  immediately. Check for U_FAILURE() on output or use with
    281  *                  function chaining. (See User Guide for details.)
    282  * @return destination string length
    283  * @stable ICU 4.6
    284  */
    285 U_STABLE int32_t U_EXPORT2
    286 uidna_nameToASCII(const UIDNA *idna,
    287                   const UChar *name, int32_t length,
    288                   UChar *dest, int32_t capacity,
    289                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    290 
    291 /**
    292  * Converts a whole domain name into its Unicode form for human-readable display.
    293  * If any processing step fails, then pInfo->errors will be non-zero.
    294  * The domain name might be modified according to the types of errors.
    295  *
    296  * The UErrorCode indicates an error only in exceptional cases,
    297  * such as a U_MEMORY_ALLOCATION_ERROR.
    298  *
    299  * @param idna UIDNA instance
    300  * @param name Input domain name
    301  * @param length Domain name length, or -1 if NUL-terminated
    302  * @param dest Destination string buffer
    303  * @param capacity Destination buffer capacity
    304  * @param pInfo Output container of IDNA processing details.
    305  * @param pErrorCode Standard ICU error code. Its input value must
    306  *                  pass the U_SUCCESS() test, or else the function returns
    307  *                  immediately. Check for U_FAILURE() on output or use with
    308  *                  function chaining. (See User Guide for details.)
    309  * @return destination string length
    310  * @stable ICU 4.6
    311  */
    312 U_STABLE int32_t U_EXPORT2
    313 uidna_nameToUnicode(const UIDNA *idna,
    314                     const UChar *name, int32_t length,
    315                     UChar *dest, int32_t capacity,
    316                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    317 
    318 /* UTF-8 versions of the processing methods --------------------------------- */
    319 
    320 /**
    321  * Converts a single domain name label into its ASCII form for DNS lookup.
    322  * UTF-8 version of uidna_labelToASCII(), same behavior.
    323  *
    324  * @param idna UIDNA instance
    325  * @param label Input domain name label
    326  * @param length Label length, or -1 if NUL-terminated
    327  * @param dest Destination string buffer
    328  * @param capacity Destination buffer capacity
    329  * @param pInfo Output container of IDNA processing details.
    330  * @param pErrorCode Standard ICU error code. Its input value must
    331  *                  pass the U_SUCCESS() test, or else the function returns
    332  *                  immediately. Check for U_FAILURE() on output or use with
    333  *                  function chaining. (See User Guide for details.)
    334  * @return destination string length
    335  * @stable ICU 4.6
    336  */
    337 U_STABLE int32_t U_EXPORT2
    338 uidna_labelToASCII_UTF8(const UIDNA *idna,
    339                         const char *label, int32_t length,
    340                         char *dest, int32_t capacity,
    341                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    342 
    343 /**
    344  * Converts a single domain name label into its Unicode form for human-readable display.
    345  * UTF-8 version of uidna_labelToUnicode(), same behavior.
    346  *
    347  * @param idna UIDNA instance
    348  * @param label Input domain name label
    349  * @param length Label length, or -1 if NUL-terminated
    350  * @param dest Destination string buffer
    351  * @param capacity Destination buffer capacity
    352  * @param pInfo Output container of IDNA processing details.
    353  * @param pErrorCode Standard ICU error code. Its input value must
    354  *                  pass the U_SUCCESS() test, or else the function returns
    355  *                  immediately. Check for U_FAILURE() on output or use with
    356  *                  function chaining. (See User Guide for details.)
    357  * @return destination string length
    358  * @stable ICU 4.6
    359  */
    360 U_STABLE int32_t U_EXPORT2
    361 uidna_labelToUnicodeUTF8(const UIDNA *idna,
    362                          const char *label, int32_t length,
    363                          char *dest, int32_t capacity,
    364                          UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    365 
    366 /**
    367  * Converts a whole domain name into its ASCII form for DNS lookup.
    368  * UTF-8 version of uidna_nameToASCII(), same behavior.
    369  *
    370  * @param idna UIDNA instance
    371  * @param name Input domain name
    372  * @param length Domain name length, or -1 if NUL-terminated
    373  * @param dest Destination string buffer
    374  * @param capacity Destination buffer capacity
    375  * @param pInfo Output container of IDNA processing details.
    376  * @param pErrorCode Standard ICU error code. Its input value must
    377  *                  pass the U_SUCCESS() test, or else the function returns
    378  *                  immediately. Check for U_FAILURE() on output or use with
    379  *                  function chaining. (See User Guide for details.)
    380  * @return destination string length
    381  * @stable ICU 4.6
    382  */
    383 U_STABLE int32_t U_EXPORT2
    384 uidna_nameToASCII_UTF8(const UIDNA *idna,
    385                        const char *name, int32_t length,
    386                        char *dest, int32_t capacity,
    387                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    388 
    389 /**
    390  * Converts a whole domain name into its Unicode form for human-readable display.
    391  * UTF-8 version of uidna_nameToUnicode(), same behavior.
    392  *
    393  * @param idna UIDNA instance
    394  * @param name Input domain name
    395  * @param length Domain name length, or -1 if NUL-terminated
    396  * @param dest Destination string buffer
    397  * @param capacity Destination buffer capacity
    398  * @param pInfo Output container of IDNA processing details.
    399  * @param pErrorCode Standard ICU error code. Its input value must
    400  *                  pass the U_SUCCESS() test, or else the function returns
    401  *                  immediately. Check for U_FAILURE() on output or use with
    402  *                  function chaining. (See User Guide for details.)
    403  * @return destination string length
    404  * @stable ICU 4.6
    405  */
    406 U_STABLE int32_t U_EXPORT2
    407 uidna_nameToUnicodeUTF8(const UIDNA *idna,
    408                         const char *name, int32_t length,
    409                         char *dest, int32_t capacity,
    410                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
    411 
    412 /*
    413  * IDNA error bit set values.
    414  * When a domain name or label fails a processing step or does not meet the
    415  * validity criteria, then one or more of these error bits are set.
    416  */
    417 enum {
    418     /**
    419      * A non-final domain name label (or the whole domain name) is empty.
    420      * @stable ICU 4.6
    421      */
    422     UIDNA_ERROR_EMPTY_LABEL=1,
    423     /**
    424      * A domain name label is longer than 63 bytes.
    425      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    426      * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
    427      * @stable ICU 4.6
    428      */
    429     UIDNA_ERROR_LABEL_TOO_LONG=2,
    430     /**
    431      * A domain name is longer than 255 bytes in its storage form.
    432      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    433      * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
    434      * @stable ICU 4.6
    435      */
    436     UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
    437     /**
    438      * A label starts with a hyphen-minus ('-').
    439      * @stable ICU 4.6
    440      */
    441     UIDNA_ERROR_LEADING_HYPHEN=8,
    442     /**
    443      * A label ends with a hyphen-minus ('-').
    444      * @stable ICU 4.6
    445      */
    446     UIDNA_ERROR_TRAILING_HYPHEN=0x10,
    447     /**
    448      * A label contains hyphen-minus ('-') in the third and fourth positions.
    449      * @stable ICU 4.6
    450      */
    451     UIDNA_ERROR_HYPHEN_3_4=0x20,
    452     /**
    453      * A label starts with a combining mark.
    454      * @stable ICU 4.6
    455      */
    456     UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
    457     /**
    458      * A label or domain name contains disallowed characters.
    459      * @stable ICU 4.6
    460      */
    461     UIDNA_ERROR_DISALLOWED=0x80,
    462     /**
    463      * A label starts with "xn--" but does not contain valid Punycode.
    464      * That is, an xn-- label failed Punycode decoding.
    465      * @stable ICU 4.6
    466      */
    467     UIDNA_ERROR_PUNYCODE=0x100,
    468     /**
    469      * A label contains a dot=full stop.
    470      * This can occur in an input string for a single-label function.
    471      * @stable ICU 4.6
    472      */
    473     UIDNA_ERROR_LABEL_HAS_DOT=0x200,
    474     /**
    475      * An ACE label does not contain a valid label string.
    476      * The label was successfully ACE (Punycode) decoded but the resulting
    477      * string had severe validation errors. For example,
    478      * it might contain characters that are not allowed in ACE labels,
    479      * or it might not be normalized.
    480      * @stable ICU 4.6
    481      */
    482     UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
    483     /**
    484      * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
    485      * @stable ICU 4.6
    486      */
    487     UIDNA_ERROR_BIDI=0x800,
    488     /**
    489      * A label does not meet the IDNA CONTEXTJ requirements.
    490      * @stable ICU 4.6
    491      */
    492     UIDNA_ERROR_CONTEXTJ=0x1000,
    493 #ifndef U_HIDE_DRAFT_API
    494     /**
    495      * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
    496      * Some punctuation characters "Would otherwise have been DISALLOWED"
    497      * but are allowed in certain contexts. (RFC 5892)
    498      * @draft ICU 49
    499      */
    500     UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
    501     /**
    502      * A label does not meet the IDNA CONTEXTO requirements for digits.
    503      * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
    504      * @draft ICU 49
    505      */
    506     UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
    507 #endif  /* U_HIDE_DRAFT_API */
    508 };
    509 
    510 /* IDNA2003 API ------------------------------------------------------------- */
    511 
    512 /**
    513  * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    514  * This operation is done on <b>single labels</b> before sending it to something that expects
    515  * ASCII names. A label is an individual part of a domain name. Labels are usually
    516  * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
    517  *
    518  * IDNA2003 API Overview:
    519  *
    520  * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
    521  * (http://www.ietf.org/rfc/rfc3490.txt).
    522  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
    523  * containing non-ASCII code points are processed by the
    524  * ToASCII operation before passing it to resolver libraries. Domain names
    525  * that are obtained from resolver libraries are processed by the
    526  * ToUnicode operation before displaying the domain name to the user.
    527  * IDNA requires that implementations process input strings with Nameprep
    528  * (http://www.ietf.org/rfc/rfc3491.txt),
    529  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
    530  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
    531  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
    532  * neither Nameprep nor Punycode are optional.
    533  * The input and output of ToASCII and ToUnicode operations are Unicode
    534  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
    535  * multiple times to an input string will yield the same result as applying the operation
    536  * once.
    537  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
    538  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
    539  *
    540  * @param src               Input UChar array containing label in Unicode.
    541  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    542  * @param dest              Output UChar array with ASCII (ACE encoded) label.
    543  * @param destCapacity      Size of dest.
    544  * @param options           A bit set of options:
    545  *
    546  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    547  *                              and do not use STD3 ASCII rules
    548  *                              If unassigned code points are found the operation fails with
    549  *                              U_UNASSIGNED_ERROR error code.
    550  *
    551  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    552  *                              If this option is set, the unassigned code points are in the input
    553  *                              are treated as normal Unicode code points.
    554  *
    555  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    556  *                              If this option is set and the input does not satisfy STD3 rules,
    557  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    558  *
    559  * @param parseError        Pointer to UParseError struct to receive information on position
    560  *                          of error if an error is encountered. Can be NULL.
    561  * @param status            ICU in/out error code parameter.
    562  *                          U_INVALID_CHAR_FOUND if src contains
    563  *                          unmatched single surrogates.
    564  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    565  *                          too many code points.
    566  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    567  * @return The length of the result string, if successful - or in case of a buffer overflow,
    568  *         in which case it will be greater than destCapacity.
    569  * @stable ICU 2.6
    570  */
    571 U_STABLE int32_t U_EXPORT2
    572 uidna_toASCII(const UChar* src, int32_t srcLength,
    573               UChar* dest, int32_t destCapacity,
    574               int32_t options,
    575               UParseError* parseError,
    576               UErrorCode* status);
    577 
    578 
    579 /**
    580  * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
    581  * This operation is done on <b>single labels</b> before sending it to something that expects
    582  * Unicode names. A label is an individual part of a domain name. Labels are usually
    583  * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
    584  *
    585  * @param src               Input UChar array containing ASCII (ACE encoded) label.
    586  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    587  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
    588  * @param destCapacity      Size of dest.
    589  * @param options           A bit set of options:
    590  *
    591  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    592  *                              and do not use STD3 ASCII rules
    593  *                              If unassigned code points are found the operation fails with
    594  *                              U_UNASSIGNED_ERROR error code.
    595  *
    596  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
    597  *                              If this option is set, the unassigned code points are in the input
    598  *                              are treated as normal Unicode code points. <b> Note: </b> This option is
    599  *                              required on toUnicode operation because the RFC mandates
    600  *                              verification of decoded ACE input by applying toASCII and comparing
    601  *                              its output with source
    602  *
    603  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    604  *                              If this option is set and the input does not satisfy STD3 rules,
    605  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    606  *
    607  * @param parseError        Pointer to UParseError struct to receive information on position
    608  *                          of error if an error is encountered. Can be NULL.
    609  * @param status            ICU in/out error code parameter.
    610  *                          U_INVALID_CHAR_FOUND if src contains
    611  *                          unmatched single surrogates.
    612  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    613  *                          too many code points.
    614  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    615  * @return The length of the result string, if successful - or in case of a buffer overflow,
    616  *         in which case it will be greater than destCapacity.
    617  * @stable ICU 2.6
    618  */
    619 U_STABLE int32_t U_EXPORT2
    620 uidna_toUnicode(const UChar* src, int32_t srcLength,
    621                 UChar* dest, int32_t destCapacity,
    622                 int32_t options,
    623                 UParseError* parseError,
    624                 UErrorCode* status);
    625 
    626 
    627 /**
    628  * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    629  * This operation is done on complete domain names, e.g: "www.example.com".
    630  * It is important to note that this operation can fail. If it fails, then the input
    631  * domain name cannot be used as an Internationalized Domain Name and the application
    632  * should have methods defined to deal with the failure.
    633  *
    634  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    635  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    636  * and then convert. This function does not offer that level of granularity. The options once
    637  * set will apply to all labels in the domain name
    638  *
    639  * @param src               Input UChar array containing IDN in Unicode.
    640  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    641  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
    642  * @param destCapacity      Size of dest.
    643  * @param options           A bit set of options:
    644  *
    645  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    646  *                              and do not use STD3 ASCII rules
    647  *                              If unassigned code points are found the operation fails with
    648  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
    649  *
    650  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    651  *                              If this option is set, the unassigned code points are in the input
    652  *                              are treated as normal Unicode code points.
    653  *
    654  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    655  *                              If this option is set and the input does not satisfy STD3 rules,
    656  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    657  *
    658  * @param parseError        Pointer to UParseError struct to receive information on position
    659  *                          of error if an error is encountered. Can be NULL.
    660  * @param status            ICU in/out error code parameter.
    661  *                          U_INVALID_CHAR_FOUND if src contains
    662  *                          unmatched single surrogates.
    663  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    664  *                          too many code points.
    665  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    666  * @return The length of the result string, if successful - or in case of a buffer overflow,
    667  *         in which case it will be greater than destCapacity.
    668  * @stable ICU 2.6
    669  */
    670 U_STABLE int32_t U_EXPORT2
    671 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
    672                    UChar* dest, int32_t destCapacity,
    673                    int32_t options,
    674                    UParseError* parseError,
    675                    UErrorCode* status);
    676 
    677 /**
    678  * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    679  * This operation is done on complete domain names, e.g: "www.example.com".
    680  *
    681  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    682  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    683  * and then convert. This function does not offer that level of granularity. The options once
    684  * set will apply to all labels in the domain name
    685  *
    686  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
    687  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    688  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
    689  * @param destCapacity      Size of dest.
    690  * @param options           A bit set of options:
    691  *
    692  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    693  *                              and do not use STD3 ASCII rules
    694  *                              If unassigned code points are found the operation fails with
    695  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
    696  *
    697  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    698  *                              If this option is set, the unassigned code points are in the input
    699  *                              are treated as normal Unicode code points.
    700  *
    701  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    702  *                              If this option is set and the input does not satisfy STD3 rules,
    703  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    704  *
    705  * @param parseError        Pointer to UParseError struct to receive information on position
    706  *                          of error if an error is encountered. Can be NULL.
    707  * @param status            ICU in/out error code parameter.
    708  *                          U_INVALID_CHAR_FOUND if src contains
    709  *                          unmatched single surrogates.
    710  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
    711  *                          too many code points.
    712  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
    713  * @return The length of the result string, if successful - or in case of a buffer overflow,
    714  *         in which case it will be greater than destCapacity.
    715  * @stable ICU 2.6
    716  */
    717 U_STABLE int32_t U_EXPORT2
    718 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
    719                      UChar* dest, int32_t destCapacity,
    720                      int32_t options,
    721                      UParseError* parseError,
    722                      UErrorCode* status);
    723 
    724 /**
    725  * IDNA2003: Compare two IDN strings for equivalence.
    726  * This function splits the domain names into labels and compares them.
    727  * According to IDN RFC, whenever two labels are compared, they are
    728  * considered equal if and only if their ASCII forms (obtained by
    729  * applying toASCII) match using an case-insensitive ASCII comparison.
    730  * Two domain names are considered a match if and only if all labels
    731  * match regardless of whether label separators match.
    732  *
    733  * @param s1                First source string.
    734  * @param length1           Length of first source string, or -1 if NUL-terminated.
    735  *
    736  * @param s2                Second source string.
    737  * @param length2           Length of second source string, or -1 if NUL-terminated.
    738  * @param options           A bit set of options:
    739  *
    740  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    741  *                              and do not use STD3 ASCII rules
    742  *                              If unassigned code points are found the operation fails with
    743  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
    744  *
    745  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    746  *                              If this option is set, the unassigned code points are in the input
    747  *                              are treated as normal Unicode code points.
    748  *
    749  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    750  *                              If this option is set and the input does not satisfy STD3 rules,
    751  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
    752  *
    753  * @param status            ICU error code in/out parameter.
    754  *                          Must fulfill U_SUCCESS before the function call.
    755  * @return <0 or 0 or >0 as usual for string comparisons
    756  * @stable ICU 2.6
    757  */
    758 U_STABLE int32_t U_EXPORT2
    759 uidna_compare(  const UChar *s1, int32_t length1,
    760                 const UChar *s2, int32_t length2,
    761                 int32_t options,
    762                 UErrorCode* status);
    763 
    764 #endif /* #if !UCONFIG_NO_IDNA */
    765 
    766 #endif
    767