Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  idna.h
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010mar05
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #ifndef __IDNA_H__
     16 #define __IDNA_H__
     17 
     18 /**
     19  * \file
     20  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
     21  */
     22 
     23 #include "unicode/utypes.h"
     24 
     25 #if !UCONFIG_NO_IDNA
     26 
     27 #include "unicode/bytestream.h"
     28 #include "unicode/stringpiece.h"
     29 #include "unicode/uidna.h"
     30 #include "unicode/unistr.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 class IDNAInfo;
     35 
     36 /**
     37  * Abstract base class for IDNA processing.
     38  * See http://www.unicode.org/reports/tr46/
     39  * and http://www.ietf.org/rfc/rfc3490.txt
     40  *
     41  * The IDNA class is not intended for public subclassing.
     42  *
     43  * This C++ API currently only implements UTS #46.
     44  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
     45  * and IDNA2003 (functions that do not use a service object).
     46  * @stable ICU 4.6
     47  */
     48 class U_COMMON_API IDNA : public UObject {
     49 public:
     50     /**
     51      * Destructor.
     52      * @stable ICU 4.6
     53      */
     54     ~IDNA();
     55 
     56     /**
     57      * Returns an IDNA instance which implements UTS #46.
     58      * Returns an unmodifiable instance, owned by the caller.
     59      * Cache it for multiple operations, and delete it when done.
     60      * The instance is thread-safe, that is, it can be used concurrently.
     61      *
     62      * UTS #46 defines Unicode IDNA Compatibility Processing,
     63      * updated to the latest version of Unicode and compatible with both
     64      * IDNA2003 and IDNA2008.
     65      *
     66      * The worker functions use transitional processing, including deviation mappings,
     67      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
     68      * is used in which case the deviation characters are passed through without change.
     69      *
     70      * Disallowed characters are mapped to U+FFFD.
     71      *
     72      * For available options see the uidna.h header.
     73      * Operations with the UTS #46 instance do not support the
     74      * UIDNA_ALLOW_UNASSIGNED option.
     75      *
     76      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
     77      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
     78      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
     79      *
     80      * @param options Bit set to modify the processing and error checking.
     81      *                See option bit set values in uidna.h.
     82      * @param errorCode Standard ICU error code. Its input value must
     83      *                  pass the U_SUCCESS() test, or else the function returns
     84      *                  immediately. Check for U_FAILURE() on output or use with
     85      *                  function chaining. (See User Guide for details.)
     86      * @return the UTS #46 IDNA instance, if successful
     87      * @stable ICU 4.6
     88      */
     89     static IDNA *
     90     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
     91 
     92     /**
     93      * Converts a single domain name label into its ASCII form for DNS lookup.
     94      * If any processing step fails, then info.hasErrors() will be TRUE and
     95      * the result might not be an ASCII string.
     96      * The label might be modified according to the types of errors.
     97      * Labels with severe errors will be left in (or turned into) their Unicode form.
     98      *
     99      * The UErrorCode indicates an error only in exceptional cases,
    100      * such as a U_MEMORY_ALLOCATION_ERROR.
    101      *
    102      * @param label Input domain name label
    103      * @param dest Destination string object
    104      * @param info Output container of IDNA processing details.
    105      * @param errorCode Standard ICU error code. Its input value must
    106      *                  pass the U_SUCCESS() test, or else the function returns
    107      *                  immediately. Check for U_FAILURE() on output or use with
    108      *                  function chaining. (See User Guide for details.)
    109      * @return dest
    110      * @stable ICU 4.6
    111      */
    112     virtual UnicodeString &
    113     labelToASCII(const UnicodeString &label, UnicodeString &dest,
    114                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
    115 
    116     /**
    117      * Converts a single domain name label into its Unicode form for human-readable display.
    118      * If any processing step fails, then info.hasErrors() will be TRUE.
    119      * The label might be modified according to the types of errors.
    120      *
    121      * The UErrorCode indicates an error only in exceptional cases,
    122      * such as a U_MEMORY_ALLOCATION_ERROR.
    123      *
    124      * @param label Input domain name label
    125      * @param dest Destination string object
    126      * @param info Output container of IDNA processing details.
    127      * @param errorCode Standard ICU error code. Its input value must
    128      *                  pass the U_SUCCESS() test, or else the function returns
    129      *                  immediately. Check for U_FAILURE() on output or use with
    130      *                  function chaining. (See User Guide for details.)
    131      * @return dest
    132      * @stable ICU 4.6
    133      */
    134     virtual UnicodeString &
    135     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
    136                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
    137 
    138     /**
    139      * Converts a whole domain name into its ASCII form for DNS lookup.
    140      * If any processing step fails, then info.hasErrors() will be TRUE and
    141      * the result might not be an ASCII string.
    142      * The domain name might be modified according to the types of errors.
    143      * Labels with severe errors will be left in (or turned into) their Unicode form.
    144      *
    145      * The UErrorCode indicates an error only in exceptional cases,
    146      * such as a U_MEMORY_ALLOCATION_ERROR.
    147      *
    148      * @param name Input domain name
    149      * @param dest Destination string object
    150      * @param info Output container of IDNA processing details.
    151      * @param errorCode Standard ICU error code. Its input value must
    152      *                  pass the U_SUCCESS() test, or else the function returns
    153      *                  immediately. Check for U_FAILURE() on output or use with
    154      *                  function chaining. (See User Guide for details.)
    155      * @return dest
    156      * @stable ICU 4.6
    157      */
    158     virtual UnicodeString &
    159     nameToASCII(const UnicodeString &name, UnicodeString &dest,
    160                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
    161 
    162     /**
    163      * Converts a whole domain name into its Unicode form for human-readable display.
    164      * If any processing step fails, then info.hasErrors() will be TRUE.
    165      * The domain name might be modified according to the types of errors.
    166      *
    167      * The UErrorCode indicates an error only in exceptional cases,
    168      * such as a U_MEMORY_ALLOCATION_ERROR.
    169      *
    170      * @param name Input domain name
    171      * @param dest Destination string object
    172      * @param info Output container of IDNA processing details.
    173      * @param errorCode Standard ICU error code. Its input value must
    174      *                  pass the U_SUCCESS() test, or else the function returns
    175      *                  immediately. Check for U_FAILURE() on output or use with
    176      *                  function chaining. (See User Guide for details.)
    177      * @return dest
    178      * @stable ICU 4.6
    179      */
    180     virtual UnicodeString &
    181     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
    182                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
    183 
    184     // UTF-8 versions of the processing methods ---------------------------- ***
    185 
    186     /**
    187      * Converts a single domain name label into its ASCII form for DNS lookup.
    188      * UTF-8 version of labelToASCII(), same behavior.
    189      *
    190      * @param label Input domain name label
    191      * @param dest Destination byte sink; Flush()ed if successful
    192      * @param info Output container of IDNA processing details.
    193      * @param errorCode Standard ICU error code. Its input value must
    194      *                  pass the U_SUCCESS() test, or else the function returns
    195      *                  immediately. Check for U_FAILURE() on output or use with
    196      *                  function chaining. (See User Guide for details.)
    197      * @return dest
    198      * @stable ICU 4.6
    199      */
    200     virtual void
    201     labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
    202                       IDNAInfo &info, UErrorCode &errorCode) const;
    203 
    204     /**
    205      * Converts a single domain name label into its Unicode form for human-readable display.
    206      * UTF-8 version of labelToUnicode(), same behavior.
    207      *
    208      * @param label Input domain name label
    209      * @param dest Destination byte sink; Flush()ed if successful
    210      * @param info Output container of IDNA processing details.
    211      * @param errorCode Standard ICU error code. Its input value must
    212      *                  pass the U_SUCCESS() test, or else the function returns
    213      *                  immediately. Check for U_FAILURE() on output or use with
    214      *                  function chaining. (See User Guide for details.)
    215      * @return dest
    216      * @stable ICU 4.6
    217      */
    218     virtual void
    219     labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
    220                        IDNAInfo &info, UErrorCode &errorCode) const;
    221 
    222     /**
    223      * Converts a whole domain name into its ASCII form for DNS lookup.
    224      * UTF-8 version of nameToASCII(), same behavior.
    225      *
    226      * @param name Input domain name
    227      * @param dest Destination byte sink; Flush()ed if successful
    228      * @param info Output container of IDNA processing details.
    229      * @param errorCode Standard ICU error code. Its input value must
    230      *                  pass the U_SUCCESS() test, or else the function returns
    231      *                  immediately. Check for U_FAILURE() on output or use with
    232      *                  function chaining. (See User Guide for details.)
    233      * @return dest
    234      * @stable ICU 4.6
    235      */
    236     virtual void
    237     nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
    238                      IDNAInfo &info, UErrorCode &errorCode) const;
    239 
    240     /**
    241      * Converts a whole domain name into its Unicode form for human-readable display.
    242      * UTF-8 version of nameToUnicode(), same behavior.
    243      *
    244      * @param name Input domain name
    245      * @param dest Destination byte sink; Flush()ed if successful
    246      * @param info Output container of IDNA processing details.
    247      * @param errorCode Standard ICU error code. Its input value must
    248      *                  pass the U_SUCCESS() test, or else the function returns
    249      *                  immediately. Check for U_FAILURE() on output or use with
    250      *                  function chaining. (See User Guide for details.)
    251      * @return dest
    252      * @stable ICU 4.6
    253      */
    254     virtual void
    255     nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
    256                       IDNAInfo &info, UErrorCode &errorCode) const;
    257 };
    258 
    259 class UTS46;
    260 
    261 /**
    262  * Output container for IDNA processing errors.
    263  * The IDNAInfo class is not suitable for subclassing.
    264  * @stable ICU 4.6
    265  */
    266 class U_COMMON_API IDNAInfo : public UMemory {
    267 public:
    268     /**
    269      * Constructor for stack allocation.
    270      * @stable ICU 4.6
    271      */
    272     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
    273     /**
    274      * Were there IDNA processing errors?
    275      * @return TRUE if there were processing errors
    276      * @stable ICU 4.6
    277      */
    278     UBool hasErrors() const { return errors!=0; }
    279     /**
    280      * Returns a bit set indicating IDNA processing errors.
    281      * See UIDNA_ERROR_... constants in uidna.h.
    282      * @return bit set of processing errors
    283      * @stable ICU 4.6
    284      */
    285     uint32_t getErrors() const { return errors; }
    286     /**
    287      * Returns TRUE if transitional and nontransitional processing produce different results.
    288      * This is the case when the input label or domain name contains
    289      * one or more deviation characters outside a Punycode label (see UTS #46).
    290      * <ul>
    291      * <li>With nontransitional processing, such characters are
    292      * copied to the destination string.
    293      * <li>With transitional processing, such characters are
    294      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
    295      * </ul>
    296      * @return TRUE if transitional and nontransitional processing produce different results
    297      * @stable ICU 4.6
    298      */
    299     UBool isTransitionalDifferent() const { return isTransDiff; }
    300 
    301 private:
    302     friend class UTS46;
    303 
    304     IDNAInfo(const IDNAInfo &other);  // no copying
    305     IDNAInfo &operator=(const IDNAInfo &other);  // no copying
    306 
    307     void reset() {
    308         errors=labelErrors=0;
    309         isTransDiff=FALSE;
    310         isBiDi=FALSE;
    311         isOkBiDi=TRUE;
    312     }
    313 
    314     uint32_t errors, labelErrors;
    315     UBool isTransDiff;
    316     UBool isBiDi;
    317     UBool isOkBiDi;
    318 };
    319 
    320 U_NAMESPACE_END
    321 
    322 #endif  // UCONFIG_NO_IDNA
    323 #endif  // __IDNA_H__
    324