Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  idna.h
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010mar05
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #ifndef __IDNA_H__
     16 #define __IDNA_H__
     17 
     18 /**
     19  * \file
     20  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
     21  */
     22 
     23 #include "unicode/utypes.h"
     24 
     25 #if !UCONFIG_NO_IDNA
     26 
     27 #include "unicode/bytestream.h"
     28 #include "unicode/stringpiece.h"
     29 #include "unicode/uidna.h"
     30 #include "unicode/unistr.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 class U_COMMON_API IDNAInfo;
     35 
     36 /**
     37  * Abstract base class for IDNA processing.
     38  * See http://www.unicode.org/reports/tr46/
     39  * and http://www.ietf.org/rfc/rfc3490.txt
     40  *
     41  * The IDNA class is not intended for public subclassing.
     42  *
     43  * This C++ API currently only implements UTS #46.
     44  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
     45  * and IDNA2003 (functions that do not use a service object).
     46  * @draft ICU 4.6
     47  */
     48 class U_COMMON_API IDNA : public UObject {
     49 public:
     50     /**
     51      * Returns an IDNA instance which implements UTS #46.
     52      * Returns an unmodifiable instance, owned by the caller.
     53      * Cache it for multiple operations, and delete it when done.
     54      * The instance is thread-safe, that is, it can be used concurrently.
     55      *
     56      * UTS #46 defines Unicode IDNA Compatibility Processing,
     57      * updated to the latest version of Unicode and compatible with both
     58      * IDNA2003 and IDNA2008.
     59      *
     60      * The worker functions use transitional processing, including deviation mappings,
     61      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
     62      * is used in which case the deviation characters are passed through without change.
     63      *
     64      * Disallowed characters are mapped to U+FFFD.
     65      *
     66      * For available options see the uidna.h header.
     67      * Operations with the UTS #46 instance do not support the
     68      * UIDNA_ALLOW_UNASSIGNED option.
     69      *
     70      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
     71      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
     72      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
     73      *
     74      * @param options Bit set to modify the processing and error checking.
     75      *                See option bit set values in uidna.h.
     76      * @param errorCode Standard ICU error code. Its input value must
     77      *                  pass the U_SUCCESS() test, or else the function returns
     78      *                  immediately. Check for U_FAILURE() on output or use with
     79      *                  function chaining. (See User Guide for details.)
     80      * @return the UTS #46 IDNA instance, if successful
     81      * @draft ICU 4.6
     82      */
     83     static IDNA *
     84     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
     85 
     86     /**
     87      * Converts a single domain name label into its ASCII form for DNS lookup.
     88      * If any processing step fails, then info.hasErrors() will be TRUE and
     89      * the result might not be an ASCII string.
     90      * The label might be modified according to the types of errors.
     91      * Labels with severe errors will be left in (or turned into) their Unicode form.
     92      *
     93      * The UErrorCode indicates an error only in exceptional cases,
     94      * such as a U_MEMORY_ALLOCATION_ERROR.
     95      *
     96      * @param label Input domain name label
     97      * @param dest Destination string object
     98      * @param info Output container of IDNA processing details.
     99      * @param errorCode Standard ICU error code. Its input value must
    100      *                  pass the U_SUCCESS() test, or else the function returns
    101      *                  immediately. Check for U_FAILURE() on output or use with
    102      *                  function chaining. (See User Guide for details.)
    103      * @return dest
    104      * @draft ICU 4.6
    105      */
    106     virtual UnicodeString &
    107     labelToASCII(const UnicodeString &label, UnicodeString &dest,
    108                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
    109 
    110     /**
    111      * Converts a single domain name label into its Unicode form for human-readable display.
    112      * If any processing step fails, then info.hasErrors() will be TRUE.
    113      * The label might be modified according to the types of errors.
    114      *
    115      * The UErrorCode indicates an error only in exceptional cases,
    116      * such as a U_MEMORY_ALLOCATION_ERROR.
    117      *
    118      * @param label Input domain name label
    119      * @param dest Destination string object
    120      * @param info Output container of IDNA processing details.
    121      * @param errorCode Standard ICU error code. Its input value must
    122      *                  pass the U_SUCCESS() test, or else the function returns
    123      *                  immediately. Check for U_FAILURE() on output or use with
    124      *                  function chaining. (See User Guide for details.)
    125      * @return dest
    126      * @draft ICU 4.6
    127      */
    128     virtual UnicodeString &
    129     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
    130                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
    131 
    132     /**
    133      * Converts a whole domain name into its ASCII form for DNS lookup.
    134      * If any processing step fails, then info.hasErrors() will be TRUE and
    135      * the result might not be an ASCII string.
    136      * The domain name might be modified according to the types of errors.
    137      * Labels with severe errors will be left in (or turned into) their Unicode form.
    138      *
    139      * The UErrorCode indicates an error only in exceptional cases,
    140      * such as a U_MEMORY_ALLOCATION_ERROR.
    141      *
    142      * @param name Input domain name
    143      * @param dest Destination string object
    144      * @param info Output container of IDNA processing details.
    145      * @param errorCode Standard ICU error code. Its input value must
    146      *                  pass the U_SUCCESS() test, or else the function returns
    147      *                  immediately. Check for U_FAILURE() on output or use with
    148      *                  function chaining. (See User Guide for details.)
    149      * @return dest
    150      * @draft ICU 4.6
    151      */
    152     virtual UnicodeString &
    153     nameToASCII(const UnicodeString &name, UnicodeString &dest,
    154                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
    155 
    156     /**
    157      * Converts a whole domain name into its Unicode form for human-readable display.
    158      * If any processing step fails, then info.hasErrors() will be TRUE.
    159      * The domain name might be modified according to the types of errors.
    160      *
    161      * The UErrorCode indicates an error only in exceptional cases,
    162      * such as a U_MEMORY_ALLOCATION_ERROR.
    163      *
    164      * @param name Input domain name
    165      * @param dest Destination string object
    166      * @param info Output container of IDNA processing details.
    167      * @param errorCode Standard ICU error code. Its input value must
    168      *                  pass the U_SUCCESS() test, or else the function returns
    169      *                  immediately. Check for U_FAILURE() on output or use with
    170      *                  function chaining. (See User Guide for details.)
    171      * @return dest
    172      * @draft ICU 4.6
    173      */
    174     virtual UnicodeString &
    175     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
    176                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
    177 
    178     // UTF-8 versions of the processing methods ---------------------------- ***
    179 
    180     /**
    181      * Converts a single domain name label into its ASCII form for DNS lookup.
    182      * UTF-8 version of labelToASCII(), same behavior.
    183      *
    184      * @param label Input domain name label
    185      * @param dest Destination byte sink; Flush()ed if successful
    186      * @param info Output container of IDNA processing details.
    187      * @param errorCode Standard ICU error code. Its input value must
    188      *                  pass the U_SUCCESS() test, or else the function returns
    189      *                  immediately. Check for U_FAILURE() on output or use with
    190      *                  function chaining. (See User Guide for details.)
    191      * @return dest
    192      * @draft ICU 4.6
    193      */
    194     virtual void
    195     labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
    196                       IDNAInfo &info, UErrorCode &errorCode) const;
    197 
    198     /**
    199      * Converts a single domain name label into its Unicode form for human-readable display.
    200      * UTF-8 version of labelToUnicode(), same behavior.
    201      *
    202      * @param label Input domain name label
    203      * @param dest Destination byte sink; Flush()ed if successful
    204      * @param info Output container of IDNA processing details.
    205      * @param errorCode Standard ICU error code. Its input value must
    206      *                  pass the U_SUCCESS() test, or else the function returns
    207      *                  immediately. Check for U_FAILURE() on output or use with
    208      *                  function chaining. (See User Guide for details.)
    209      * @return dest
    210      * @draft ICU 4.6
    211      */
    212     virtual void
    213     labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
    214                        IDNAInfo &info, UErrorCode &errorCode) const;
    215 
    216     /**
    217      * Converts a whole domain name into its ASCII form for DNS lookup.
    218      * UTF-8 version of nameToASCII(), same behavior.
    219      *
    220      * @param name Input domain name
    221      * @param dest Destination byte sink; Flush()ed if successful
    222      * @param info Output container of IDNA processing details.
    223      * @param errorCode Standard ICU error code. Its input value must
    224      *                  pass the U_SUCCESS() test, or else the function returns
    225      *                  immediately. Check for U_FAILURE() on output or use with
    226      *                  function chaining. (See User Guide for details.)
    227      * @return dest
    228      * @draft ICU 4.6
    229      */
    230     virtual void
    231     nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
    232                      IDNAInfo &info, UErrorCode &errorCode) const;
    233 
    234     /**
    235      * Converts a whole domain name into its Unicode form for human-readable display.
    236      * UTF-8 version of nameToUnicode(), same behavior.
    237      *
    238      * @param name Input domain name
    239      * @param dest Destination byte sink; Flush()ed if successful
    240      * @param info Output container of IDNA processing details.
    241      * @param errorCode Standard ICU error code. Its input value must
    242      *                  pass the U_SUCCESS() test, or else the function returns
    243      *                  immediately. Check for U_FAILURE() on output or use with
    244      *                  function chaining. (See User Guide for details.)
    245      * @return dest
    246      * @draft ICU 4.6
    247      */
    248     virtual void
    249     nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
    250                       IDNAInfo &info, UErrorCode &errorCode) const;
    251 
    252 private:
    253     // No ICU "poor man's RTTI" for this class nor its subclasses.
    254     virtual UClassID getDynamicClassID() const;
    255 };
    256 
    257 class UTS46;
    258 
    259 /**
    260  * Output container for IDNA processing errors.
    261  * The IDNAInfo class is not suitable for subclassing.
    262  * @draft ICU 4.6
    263  */
    264 class U_COMMON_API IDNAInfo : public UMemory {
    265 public:
    266     /**
    267      * Constructor for stack allocation.
    268      * @draft ICU 4.6
    269      */
    270     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
    271     /**
    272      * Were there IDNA processing errors?
    273      * @return TRUE if there were processing errors
    274      * @draft ICU 4.6
    275      */
    276     UBool hasErrors() const { return errors!=0; }
    277     /**
    278      * Returns a bit set indicating IDNA processing errors.
    279      * See UIDNA_ERROR_... constants in uidna.h.
    280      * @return bit set of processing errors
    281      * @draft ICU 4.6
    282      */
    283     uint32_t getErrors() const { return errors; }
    284     /**
    285      * Returns TRUE if transitional and nontransitional processing produce different results.
    286      * This is the case when the input label or domain name contains
    287      * one or more deviation characters outside a Punycode label (see UTS #46).
    288      * <ul>
    289      * <li>With nontransitional processing, such characters are
    290      * copied to the destination string.
    291      * <li>With transitional processing, such characters are
    292      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
    293      * </ul>
    294      * @return TRUE if transitional and nontransitional processing produce different results
    295      * @draft ICU 4.6
    296      */
    297     UBool isTransitionalDifferent() const { return isTransDiff; }
    298 
    299 private:
    300     friend class UTS46;
    301 
    302     IDNAInfo(const IDNAInfo &other);  // no copying
    303     IDNAInfo &operator=(const IDNAInfo &other);  // no copying
    304 
    305     void reset() {
    306         errors=labelErrors=0;
    307         isTransDiff=FALSE;
    308         isBiDi=FALSE;
    309         isOkBiDi=TRUE;
    310     }
    311 
    312     uint32_t errors, labelErrors;
    313     UBool isTransDiff;
    314     UBool isBiDi;
    315     UBool isOkBiDi;
    316 };
    317 
    318 U_NAMESPACE_END
    319 
    320 #endif  // UCONFIG_NO_IDNA
    321 #endif  // __IDNA_H__
    322