Home | History | Annotate | Download | only in text
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2003-2016, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.text;
     11 
     12 import java.util.Collections;
     13 import java.util.EnumSet;
     14 import java.util.Set;
     15 
     16 import com.ibm.icu.impl.IDNA2003;
     17 import com.ibm.icu.impl.UTS46;
     18 
     19 /**
     20  * Abstract base class for IDNA processing.
     21  * See http://www.unicode.org/reports/tr46/
     22  * and http://www.ietf.org/rfc/rfc3490.txt
     23  * <p>
     24  * The IDNA class is not intended for public subclassing.
     25  * <p>
     26  * The non-static methods implement UTS #46 and IDNA2008.
     27  * IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
     28  * <p>
     29  * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
     30  * <p>
     31  * IDNA2003 API Overview:
     32  * <p>
     33  * The static IDNA API methods implement the IDNA protocol as defined in the
     34  * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
     35  * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
     36  * containing non-ASCII code points are required to be processed by
     37  * ToASCII operation before passing it to resolver libraries. Domain names
     38  * that are obtained from resolver libraries are required to be processed by
     39  * ToUnicode operation before displaying the domain name to the user.
     40  * IDNA requires that implementations process input strings with
     41  * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
     42  * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
     43  * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
     44  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
     45  * neither Nameprep nor Punycode are optional.
     46  * The input and output of ToASCII and ToUnicode operations are Unicode
     47  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
     48  * multiple times to an input string will yield the same result as applying the operation
     49  * once.
     50  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
     51  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
     52  *
     53  * @author Ram Viswanadha, Markus Scherer
     54  * @stable ICU 2.8
     55  */
     56 public abstract class IDNA {
     57     /**
     58      * Default options value: None of the other options are set.
     59      * For use in static worker and factory methods.
     60      * @stable ICU 2.8
     61      */
     62     public static final int DEFAULT = 0;
     63     /**
     64      * Option to allow unassigned code points in domain names and labels.
     65      * For use in static worker and factory methods.
     66      * <p>This option is ignored by the UTS46 implementation.
     67      * (UTS #46 disallows unassigned code points.)
     68      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
     69      */
     70     @Deprecated
     71     public static final int ALLOW_UNASSIGNED = 1;
     72     /**
     73      * Option to check whether the input conforms to the STD3 ASCII rules,
     74      * for example the restriction of labels to LDH characters
     75      * (ASCII Letters, Digits and Hyphen-Minus).
     76      * For use in static worker and factory methods.
     77      * @stable ICU 2.8
     78      */
     79     public static final int USE_STD3_RULES = 2;
     80     /**
     81      * IDNA option to check for whether the input conforms to the BiDi rules.
     82      * For use in static worker and factory methods.
     83      * <p>This option is ignored by the IDNA2003 implementation.
     84      * (IDNA2003 always performs a BiDi check.)
     85      * @stable ICU 4.6
     86      */
     87     public static final int CHECK_BIDI = 4;
     88     /**
     89      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
     90      * For use in static worker and factory methods.
     91      * <p>This option is ignored by the IDNA2003 implementation.
     92      * (The CONTEXTJ check is new in IDNA2008.)
     93      * @stable ICU 4.6
     94      */
     95     public static final int CHECK_CONTEXTJ = 8;
     96     /**
     97      * IDNA option for nontransitional processing in ToASCII().
     98      * For use in static worker and factory methods.
     99      * <p>By default, ToASCII() uses transitional processing.
    100      * <p>This option is ignored by the IDNA2003 implementation.
    101      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
    102      * @stable ICU 4.6
    103      */
    104     public static final int NONTRANSITIONAL_TO_ASCII = 0x10;
    105     /**
    106      * IDNA option for nontransitional processing in ToUnicode().
    107      * For use in static worker and factory methods.
    108      * <p>By default, ToUnicode() uses transitional processing.
    109      * <p>This option is ignored by the IDNA2003 implementation.
    110      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
    111      * @stable ICU 4.6
    112      */
    113     public static final int NONTRANSITIONAL_TO_UNICODE = 0x20;
    114     /**
    115      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
    116      * For use in static worker and factory methods.
    117      * <p>This option is ignored by the IDNA2003 implementation.
    118      * (The CONTEXTO check is new in IDNA2008.)
    119      * <p>This is for use by registries for IDNA2008 conformance.
    120      * UTS #46 does not require the CONTEXTO check.
    121      * @stable ICU 49
    122      */
    123     public static final int CHECK_CONTEXTO = 0x40;
    124 
    125     /**
    126      * Returns an IDNA instance which implements UTS #46.
    127      * Returns an unmodifiable instance, owned by the caller.
    128      * Cache it for multiple operations, and delete it when done.
    129      * The instance is thread-safe, that is, it can be used concurrently.
    130      * <p>
    131      * UTS #46 defines Unicode IDNA Compatibility Processing,
    132      * updated to the latest version of Unicode and compatible with both
    133      * IDNA2003 and IDNA2008.
    134      * <p>
    135      * The worker functions use transitional processing, including deviation mappings,
    136      * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
    137      * is used in which case the deviation characters are passed through without change.
    138      * <p>
    139      * Disallowed characters are mapped to U+FFFD.
    140      * <p>
    141      * Operations with the UTS #46 instance do not support the
    142      * ALLOW_UNASSIGNED option.
    143      * <p>
    144      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
    145      * When the USE_STD3_RULES option is used, ASCII characters other than
    146      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
    147      *
    148      * @param options Bit set to modify the processing and error checking.
    149      * @return the UTS #46 IDNA instance, if successful
    150      * @stable ICU 4.6
    151      */
    152     public static IDNA getUTS46Instance(int options) {
    153         return new UTS46(options);
    154     }
    155 
    156     /**
    157      * Converts a single domain name label into its ASCII form for DNS lookup.
    158      * If any processing step fails, then info.hasErrors() will be true and
    159      * the result might not be an ASCII string.
    160      * The label might be modified according to the types of errors.
    161      * Labels with severe errors will be left in (or turned into) their Unicode form.
    162      *
    163      * @param label Input domain name label
    164      * @param dest Destination string object
    165      * @param info Output container of IDNA processing details.
    166      * @return dest
    167      * @stable ICU 4.6
    168      */
    169     public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info);
    170 
    171     /**
    172      * Converts a single domain name label into its Unicode form for human-readable display.
    173      * If any processing step fails, then info.hasErrors() will be true.
    174      * The label might be modified according to the types of errors.
    175      *
    176      * @param label Input domain name label
    177      * @param dest Destination string object
    178      * @param info Output container of IDNA processing details.
    179      * @return dest
    180      * @stable ICU 4.6
    181      */
    182     public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info);
    183 
    184     /**
    185      * Converts a whole domain name into its ASCII form for DNS lookup.
    186      * If any processing step fails, then info.hasErrors() will be true and
    187      * the result might not be an ASCII string.
    188      * The domain name might be modified according to the types of errors.
    189      * Labels with severe errors will be left in (or turned into) their Unicode form.
    190      *
    191      * @param name Input domain name
    192      * @param dest Destination string object
    193      * @param info Output container of IDNA processing details.
    194      * @return dest
    195      * @stable ICU 4.6
    196      */
    197     public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info);
    198 
    199     /**
    200      * Converts a whole domain name into its Unicode form for human-readable display.
    201      * If any processing step fails, then info.hasErrors() will be true.
    202      * The domain name might be modified according to the types of errors.
    203      *
    204      * @param name Input domain name
    205      * @param dest Destination string object
    206      * @param info Output container of IDNA processing details.
    207      * @return dest
    208      * @stable ICU 4.6
    209      */
    210     public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info);
    211 
    212     /**
    213      * Output container for IDNA processing errors.
    214      * The Info class is not suitable for subclassing.
    215      * @stable ICU 4.6
    216      */
    217     public static final class Info {
    218         /**
    219          * Constructor.
    220          * @stable ICU 4.6
    221          */
    222         public Info() {
    223             errors=EnumSet.noneOf(Error.class);
    224             labelErrors=EnumSet.noneOf(Error.class);
    225             isTransDiff=false;
    226             isBiDi=false;
    227             isOkBiDi=true;
    228         }
    229         /**
    230          * Were there IDNA processing errors?
    231          * @return true if there were processing errors
    232          * @stable ICU 4.6
    233          */
    234         public boolean hasErrors() { return !errors.isEmpty(); }
    235         /**
    236          * Returns a set indicating IDNA processing errors.
    237          * @return set of processing errors (modifiable, and not null)
    238          * @stable ICU 4.6
    239          */
    240         public Set<Error> getErrors() { return errors; }
    241         /**
    242          * Returns true if transitional and nontransitional processing produce different results.
    243          * This is the case when the input label or domain name contains
    244          * one or more deviation characters outside a Punycode label (see UTS #46).
    245          * <ul>
    246          * <li>With nontransitional processing, such characters are
    247          * copied to the destination string.
    248          * <li>With transitional processing, such characters are
    249          * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
    250          * </ul>
    251          * @return true if transitional and nontransitional processing produce different results
    252          * @stable ICU 4.6
    253          */
    254         public boolean isTransitionalDifferent() { return isTransDiff; }
    255 
    256         private void reset() {
    257             errors.clear();
    258             labelErrors.clear();
    259             isTransDiff=false;
    260             isBiDi=false;
    261             isOkBiDi=true;
    262         }
    263 
    264         private EnumSet<Error> errors, labelErrors;
    265         private boolean isTransDiff;
    266         private boolean isBiDi;
    267         private boolean isOkBiDi;
    268     }
    269 
    270     // The following protected methods give IDNA subclasses access to the private IDNAInfo fields.
    271     // The IDNAInfo also provides intermediate state that is publicly invisible,
    272     // avoiding the allocation of another worker object.
    273     /**
    274      * @internal
    275      * @deprecated This API is ICU internal only.
    276      */
    277     @Deprecated
    278     protected static void resetInfo(Info info) {
    279         info.reset();
    280     }
    281     /**
    282      * @internal
    283      * @deprecated This API is ICU internal only.
    284      */
    285     @Deprecated
    286     protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) {
    287         return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors);
    288     }
    289     /**
    290      * @internal
    291      * @deprecated This API is ICU internal only.
    292      */
    293     @Deprecated
    294     protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) {
    295         return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors);
    296     }
    297     /**
    298      * @internal
    299      * @deprecated This API is ICU internal only.
    300      */
    301     @Deprecated
    302     protected static void addLabelError(Info info, Error error) {
    303         info.labelErrors.add(error);
    304     }
    305     /**
    306      * @internal
    307      * @deprecated This API is ICU internal only.
    308      */
    309     @Deprecated
    310     protected static void promoteAndResetLabelErrors(Info info) {
    311         if(!info.labelErrors.isEmpty()) {
    312             info.errors.addAll(info.labelErrors);
    313             info.labelErrors.clear();
    314         }
    315     }
    316     /**
    317      * @internal
    318      * @deprecated This API is ICU internal only.
    319      */
    320     @Deprecated
    321     protected static void addError(Info info, Error error) {
    322         info.errors.add(error);
    323     }
    324     /**
    325      * @internal
    326      * @deprecated This API is ICU internal only.
    327      */
    328     @Deprecated
    329     protected static void setTransitionalDifferent(Info info) {
    330         info.isTransDiff=true;
    331     }
    332     /**
    333      * @internal
    334      * @deprecated This API is ICU internal only.
    335      */
    336     @Deprecated
    337     protected static void setBiDi(Info info) {
    338         info.isBiDi=true;
    339     }
    340     /**
    341      * @internal
    342      * @deprecated This API is ICU internal only.
    343      */
    344     @Deprecated
    345     protected static boolean isBiDi(Info info) {
    346         return info.isBiDi;
    347     }
    348     /**
    349      * @internal
    350      * @deprecated This API is ICU internal only.
    351      */
    352     @Deprecated
    353     protected static void setNotOkBiDi(Info info) {
    354         info.isOkBiDi=false;
    355     }
    356     /**
    357      * @internal
    358      * @deprecated This API is ICU internal only.
    359      */
    360     @Deprecated
    361     protected static boolean isOkBiDi(Info info) {
    362         return info.isOkBiDi;
    363     }
    364 
    365     /**
    366      * IDNA error bit set values.
    367      * When a domain name or label fails a processing step or does not meet the
    368      * validity criteria, then one or more of these error bits are set.
    369      * @stable ICU 4.6
    370      */
    371     public static enum Error {
    372         /**
    373          * A non-final domain name label (or the whole domain name) is empty.
    374          * @stable ICU 4.6
    375          */
    376         EMPTY_LABEL,
    377         /**
    378          * A domain name label is longer than 63 bytes.
    379          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    380          * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
    381          * @stable ICU 4.6
    382          */
    383         LABEL_TOO_LONG,
    384         /**
    385          * A domain name is longer than 255 bytes in its storage form.
    386          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    387          * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
    388          * @stable ICU 4.6
    389          */
    390         DOMAIN_NAME_TOO_LONG,
    391         /**
    392          * A label starts with a hyphen-minus ('-').
    393          * @stable ICU 4.6
    394          */
    395         LEADING_HYPHEN,
    396         /**
    397          * A label ends with a hyphen-minus ('-').
    398          * @stable ICU 4.6
    399          */
    400         TRAILING_HYPHEN,
    401         /**
    402          * A label contains hyphen-minus ('-') in the third and fourth positions.
    403          * @stable ICU 4.6
    404          */
    405         HYPHEN_3_4,
    406         /**
    407          * A label starts with a combining mark.
    408          * @stable ICU 4.6
    409          */
    410         LEADING_COMBINING_MARK,
    411         /**
    412          * A label or domain name contains disallowed characters.
    413          * @stable ICU 4.6
    414          */
    415         DISALLOWED,
    416         /**
    417          * A label starts with "xn--" but does not contain valid Punycode.
    418          * That is, an xn-- label failed Punycode decoding.
    419          * @stable ICU 4.6
    420          */
    421         PUNYCODE,
    422         /**
    423          * A label contains a dot=full stop.
    424          * This can occur in an input string for a single-label function.
    425          * @stable ICU 4.6
    426          */
    427         LABEL_HAS_DOT,
    428         /**
    429          * An ACE label does not contain a valid label string.
    430          * The label was successfully ACE (Punycode) decoded but the resulting
    431          * string had severe validation errors. For example,
    432          * it might contain characters that are not allowed in ACE labels,
    433          * or it might not be normalized.
    434          * @stable ICU 4.6
    435          */
    436         INVALID_ACE_LABEL,
    437         /**
    438          * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
    439          * @stable ICU 4.6
    440          */
    441         BIDI,
    442         /**
    443          * A label does not meet the IDNA CONTEXTJ requirements.
    444          * @stable ICU 4.6
    445          */
    446         CONTEXTJ,
    447         /**
    448          * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
    449          * Some punctuation characters "Would otherwise have been DISALLOWED"
    450          * but are allowed in certain contexts. (RFC 5892)
    451          * @stable ICU 49
    452          */
    453         CONTEXTO_PUNCTUATION,
    454         /**
    455          * A label does not meet the IDNA CONTEXTO requirements for digits.
    456          * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
    457          * @stable ICU 49
    458          */
    459         CONTEXTO_DIGITS
    460     }
    461 
    462     /**
    463      * Sole constructor. (For invocation by subclass constructors, typically implicit.)
    464      * @internal
    465      * @deprecated This API is ICU internal only.
    466      */
    467     @Deprecated
    468     protected IDNA() {
    469     }
    470 
    471     /* IDNA2003 API ------------------------------------------------------------- */
    472 
    473     /**
    474      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    475      * This operation is done on <b>single labels</b> before sending it to something that expects
    476      * ASCII names. A label is an individual part of a domain name. Labels are usually
    477      * separated by dots; e.g." "www.example.com" is composed of 3 labels
    478      * "www","example", and "com".
    479      *
    480      * @param src       The input string to be processed
    481      * @param options   A bit set of options:
    482      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    483      *                              and do not use STD3 ASCII rules
    484      *                              If unassigned code points are found the operation fails with
    485      *                              StringPrepParseException.
    486      *
    487      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    488      *                              If this option is set, the unassigned code points are in the input
    489      *                              are treated as normal Unicode code points.
    490      *
    491      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    492      *                              If this option is set and the input does not satisfy STD3 rules,
    493      *                              the operation will fail with ParseException
    494      * @return StringBuffer the converted String
    495      * @throws StringPrepParseException When an error occurs for parsing a string.
    496      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    497      */
    498     @Deprecated
    499     public static StringBuffer convertToASCII(String src, int options)
    500         throws StringPrepParseException{
    501         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    502         return convertToASCII(iter,options);
    503     }
    504 
    505     /**
    506      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    507      * This operation is done on <b>single labels</b> before sending it to something that expects
    508      * ASCII names. A label is an individual part of a domain name. Labels are usually
    509      * separated by dots; e.g." "www.example.com" is composed of 3 labels
    510      * "www","example", and "com".
    511      *
    512      * @param src       The input string as StringBuffer to be processed
    513      * @param options   A bit set of options:
    514      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    515      *                              and do not use STD3 ASCII rules
    516      *                              If unassigned code points are found the operation fails with
    517      *                              ParseException.
    518      *
    519      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    520      *                              If this option is set, the unassigned code points are in the input
    521      *                              are treated as normal Unicode code points.
    522      *
    523      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    524      *                              If this option is set and the input does not satisfy STD3 rules,
    525      *                              the operation will fail with ParseException
    526      * @return StringBuffer the converted String
    527      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    528      */
    529     @Deprecated
    530     public static StringBuffer convertToASCII(StringBuffer src, int options)
    531         throws StringPrepParseException{
    532         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    533         return convertToASCII(iter,options);
    534     }
    535 
    536     /**
    537      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    538      * This operation is done on <b>single labels</b> before sending it to something that expects
    539      * ASCII names. A label is an individual part of a domain name. Labels are usually
    540      * separated by dots; e.g." "www.example.com" is composed of 3 labels
    541      * "www","example", and "com".
    542      *
    543      * @param src       The input string as UCharacterIterator to be processed
    544      * @param options   A bit set of options:
    545      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    546      *                              and do not use STD3 ASCII rules
    547      *                              If unassigned code points are found the operation fails with
    548      *                              ParseException.
    549      *
    550      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    551      *                              If this option is set, the unassigned code points are in the input
    552      *                              are treated as normal Unicode code points.
    553      *
    554      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    555      *                              If this option is set and the input does not satisfy STD3 rules,
    556      *                              the operation will fail with ParseException
    557      * @return StringBuffer the converted String
    558      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    559      */
    560     @Deprecated
    561     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
    562                 throws StringPrepParseException{
    563         return IDNA2003.convertToASCII(src, options);
    564     }
    565 
    566     /**
    567      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    568      * This operation is done on complete domain names, e.g: "www.example.com".
    569      * It is important to note that this operation can fail. If it fails, then the input
    570      * domain name cannot be used as an Internationalized Domain Name and the application
    571      * should have methods defined to deal with the failure.
    572      *
    573      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    574      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    575      * and then convert. This function does not offer that level of granularity. The options once
    576      * set will apply to all labels in the domain name
    577      *
    578      * @param src       The input string as UCharacterIterator to be processed
    579      * @param options   A bit set of options:
    580      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    581      *                              and do not use STD3 ASCII rules
    582      *                              If unassigned code points are found the operation fails with
    583      *                              ParseException.
    584      *
    585      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    586      *                              If this option is set, the unassigned code points are in the input
    587      *                              are treated as normal Unicode code points.
    588      *
    589      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    590      *                              If this option is set and the input does not satisfy STD3 rules,
    591      *                              the operation will fail with ParseException
    592      * @return StringBuffer the converted String
    593      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    594      */
    595     @Deprecated
    596     public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
    597             throws StringPrepParseException{
    598         return convertIDNToASCII(src.getText(), options);
    599     }
    600 
    601     /**
    602      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    603      * This operation is done on complete domain names, e.g: "www.example.com".
    604      * It is important to note that this operation can fail. If it fails, then the input
    605      * domain name cannot be used as an Internationalized Domain Name and the application
    606      * should have methods defined to deal with the failure.
    607      *
    608      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    609      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    610      * and then convert. This function does not offer that level of granularity. The options once
    611      * set will apply to all labels in the domain name
    612      *
    613      * @param src       The input string as a StringBuffer to be processed
    614      * @param options   A bit set of options:
    615      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    616      *                              and do not use STD3 ASCII rules
    617      *                              If unassigned code points are found the operation fails with
    618      *                              ParseException.
    619      *
    620      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    621      *                              If this option is set, the unassigned code points are in the input
    622      *                              are treated as normal Unicode code points.
    623      *
    624      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    625      *                              If this option is set and the input does not satisfy STD3 rules,
    626      *                              the operation will fail with ParseException
    627      * @return StringBuffer the converted String
    628      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    629      */
    630     @Deprecated
    631     public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
    632             throws StringPrepParseException{
    633             return convertIDNToASCII(src.toString(), options);
    634     }
    635 
    636     /**
    637      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    638      * This operation is done on complete domain names, e.g: "www.example.com".
    639      * It is important to note that this operation can fail. If it fails, then the input
    640      * domain name cannot be used as an Internationalized Domain Name and the application
    641      * should have methods defined to deal with the failure.
    642      *
    643      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    644      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    645      * and then convert. This function does not offer that level of granularity. The options once
    646      * set will apply to all labels in the domain name
    647      *
    648      * @param src       The input string to be processed
    649      * @param options   A bit set of options:
    650      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    651      *                              and do not use STD3 ASCII rules
    652      *                              If unassigned code points are found the operation fails with
    653      *                              ParseException.
    654      *
    655      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    656      *                              If this option is set, the unassigned code points are in the input
    657      *                              are treated as normal Unicode code points.
    658      *
    659      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    660      *                              If this option is set and the input does not satisfy STD3 rules,
    661      *                              the operation will fail with ParseException
    662      * @return StringBuffer the converted String
    663      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    664      */
    665     @Deprecated
    666     public static StringBuffer convertIDNToASCII(String src,int options)
    667             throws StringPrepParseException{
    668         return IDNA2003.convertIDNToASCII(src, options);
    669     }
    670 
    671 
    672     /**
    673      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
    674      * This operation is done on <b>single labels</b> before sending it to something that expects
    675      * Unicode names. A label is an individual part of a domain name. Labels are usually
    676      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
    677      * "www","example", and "com".
    678      *
    679      * @param src       The input string to be processed
    680      * @param options   A bit set of options:
    681      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    682      *                              and do not use STD3 ASCII rules
    683      *                              If unassigned code points are found the operation fails with
    684      *                              ParseException.
    685      *
    686      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    687      *                              If this option is set, the unassigned code points are in the input
    688      *                              are treated as normal Unicode code points.
    689      *
    690      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    691      *                              If this option is set and the input does not satisfy STD3 rules,
    692      *                              the operation will fail with ParseException
    693      * @return StringBuffer the converted String
    694      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    695      */
    696     @Deprecated
    697     public static StringBuffer convertToUnicode(String src, int options)
    698            throws StringPrepParseException{
    699         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    700         return convertToUnicode(iter,options);
    701     }
    702 
    703     /**
    704      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
    705      * This operation is done on <b>single labels</b> before sending it to something that expects
    706      * Unicode names. A label is an individual part of a domain name. Labels are usually
    707      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
    708      * "www","example", and "com".
    709      *
    710      * @param src       The input string as StringBuffer to be processed
    711      * @param options   A bit set of options:
    712      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    713      *                              and do not use STD3 ASCII rules
    714      *                              If unassigned code points are found the operation fails with
    715      *                              ParseException.
    716      *
    717      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    718      *                              If this option is set, the unassigned code points are in the input
    719      *                              are treated as normal Unicode code points.
    720      *
    721      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    722      *                              If this option is set and the input does not satisfy STD3 rules,
    723      *                              the operation will fail with ParseException
    724      * @return StringBuffer the converted String
    725      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    726      */
    727     @Deprecated
    728     public static StringBuffer convertToUnicode(StringBuffer src, int options)
    729            throws StringPrepParseException{
    730         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    731         return convertToUnicode(iter,options);
    732     }
    733 
    734     /**
    735      * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
    736      * This operation is done on <b>single labels</b> before sending it to something that expects
    737      * Unicode names. A label is an individual part of a domain name. Labels are usually
    738      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
    739      * "www","example", and "com".
    740      *
    741      * @param src       The input string as UCharacterIterator to be processed
    742      * @param options   A bit set of options:
    743      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    744      *                              and do not use STD3 ASCII rules
    745      *                              If unassigned code points are found the operation fails with
    746      *                              ParseException.
    747      *
    748      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    749      *                              If this option is set, the unassigned code points are in the input
    750      *                              are treated as normal Unicode code points.
    751      *
    752      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    753      *                              If this option is set and the input does not satisfy STD3 rules,
    754      *                              the operation will fail with ParseException
    755      * @return StringBuffer the converted String
    756      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    757      */
    758     @Deprecated
    759     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
    760            throws StringPrepParseException{
    761         return IDNA2003.convertToUnicode(src, options);
    762     }
    763 
    764     /**
    765      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    766      * This operation is done on complete domain names, e.g: "www.example.com".
    767      *
    768      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    769      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    770      * and then convert. This function does not offer that level of granularity. The options once
    771      * set will apply to all labels in the domain name
    772      *
    773      * @param src       The input string as UCharacterIterator to be processed
    774      * @param options   A bit set of options:
    775      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    776      *                              and do not use STD3 ASCII rules
    777      *                              If unassigned code points are found the operation fails with
    778      *                              ParseException.
    779      *
    780      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    781      *                              If this option is set, the unassigned code points are in the input
    782      *                              are treated as normal Unicode code points.
    783      *
    784      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    785      *                              If this option is set and the input does not satisfy STD3 rules,
    786      *                              the operation will fail with ParseException
    787      * @return StringBuffer the converted String
    788      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    789      */
    790     @Deprecated
    791     public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options)
    792         throws StringPrepParseException{
    793         return convertIDNToUnicode(src.getText(), options);
    794     }
    795 
    796     /**
    797      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    798      * This operation is done on complete domain names, e.g: "www.example.com".
    799      *
    800      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    801      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    802      * and then convert. This function does not offer that level of granularity. The options once
    803      * set will apply to all labels in the domain name
    804      *
    805      * @param src       The input string as StringBuffer to be processed
    806      * @param options   A bit set of options:
    807      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    808      *                              and do not use STD3 ASCII rules
    809      *                              If unassigned code points are found the operation fails with
    810      *                              ParseException.
    811      *
    812      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    813      *                              If this option is set, the unassigned code points are in the input
    814      *                              are treated as normal Unicode code points.
    815      *
    816      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    817      *                              If this option is set and the input does not satisfy STD3 rules,
    818      *                              the operation will fail with ParseException
    819      * @return StringBuffer the converted String
    820      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    821      */
    822     @Deprecated
    823     public static StringBuffer convertIDNToUnicode(StringBuffer src, int options)
    824         throws StringPrepParseException{
    825         return convertIDNToUnicode(src.toString(), options);
    826     }
    827 
    828     /**
    829      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    830      * This operation is done on complete domain names, e.g: "www.example.com".
    831      *
    832      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    833      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    834      * and then convert. This function does not offer that level of granularity. The options once
    835      * set will apply to all labels in the domain name
    836      *
    837      * @param src       The input string to be processed
    838      * @param options   A bit set of options:
    839      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    840      *                              and do not use STD3 ASCII rules
    841      *                              If unassigned code points are found the operation fails with
    842      *                              ParseException.
    843      *
    844      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    845      *                              If this option is set, the unassigned code points are in the input
    846      *                              are treated as normal Unicode code points.
    847      *
    848      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    849      *                              If this option is set and the input does not satisfy STD3 rules,
    850      *                              the operation will fail with ParseException
    851      * @return StringBuffer the converted String
    852      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    853      */
    854     @Deprecated
    855     public static StringBuffer convertIDNToUnicode(String src, int options)
    856             throws StringPrepParseException{
    857         return IDNA2003.convertIDNToUnicode(src, options);
    858     }
    859 
    860     /**
    861      * IDNA2003: Compare two IDN strings for equivalence.
    862      * This function splits the domain names into labels and compares them.
    863      * According to IDN RFC, whenever two labels are compared, they are
    864      * considered equal if and only if their ASCII forms (obtained by
    865      * applying toASCII) match using an case-insensitive ASCII comparison.
    866      * Two domain names are considered a match if and only if all labels
    867      * match regardless of whether label separators match.
    868      *
    869      * @param s1        First IDN string as StringBuffer
    870      * @param s2        Second IDN string as StringBuffer
    871      * @param options   A bit set of options:
    872      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    873      *                              and do not use STD3 ASCII rules
    874      *                              If unassigned code points are found the operation fails with
    875      *                              ParseException.
    876      *
    877      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    878      *                              If this option is set, the unassigned code points are in the input
    879      *                              are treated as normal Unicode code points.
    880      *
    881      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    882      *                              If this option is set and the input does not satisfy STD3 rules,
    883      *                              the operation will fail with ParseException
    884      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
    885      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    886      */
    887     @Deprecated
    888     public static int compare(StringBuffer s1, StringBuffer s2, int options)
    889         throws StringPrepParseException{
    890         if(s1==null || s2 == null){
    891             throw new IllegalArgumentException("One of the source buffers is null");
    892         }
    893         return IDNA2003.compare(s1.toString(), s2.toString(), options);
    894     }
    895 
    896     /**
    897      * IDNA2003: Compare two IDN strings for equivalence.
    898      * This function splits the domain names into labels and compares them.
    899      * According to IDN RFC, whenever two labels are compared, they are
    900      * considered equal if and only if their ASCII forms (obtained by
    901      * applying toASCII) match using an case-insensitive ASCII comparison.
    902      * Two domain names are considered a match if and only if all labels
    903      * match regardless of whether label separators match.
    904      *
    905      * @param s1        First IDN string
    906      * @param s2        Second IDN string
    907      * @param options   A bit set of options:
    908      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    909      *                              and do not use STD3 ASCII rules
    910      *                              If unassigned code points are found the operation fails with
    911      *                              ParseException.
    912      *
    913      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    914      *                              If this option is set, the unassigned code points are in the input
    915      *                              are treated as normal Unicode code points.
    916      *
    917      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    918      *                              If this option is set and the input does not satisfy STD3 rules,
    919      *                              the operation will fail with ParseException
    920      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
    921      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    922      */
    923     @Deprecated
    924     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
    925         if(s1==null || s2 == null){
    926             throw new IllegalArgumentException("One of the source buffers is null");
    927         }
    928         return IDNA2003.compare(s1, s2, options);
    929     }
    930     /**
    931      * IDNA2003: Compare two IDN strings for equivalence.
    932      * This function splits the domain names into labels and compares them.
    933      * According to IDN RFC, whenever two labels are compared, they are
    934      * considered equal if and only if their ASCII forms (obtained by
    935      * applying toASCII) match using an case-insensitive ASCII comparison.
    936      * Two domain names are considered a match if and only if all labels
    937      * match regardless of whether label separators match.
    938      *
    939      * @param s1        First IDN string as UCharacterIterator
    940      * @param s2        Second IDN string as UCharacterIterator
    941      * @param options   A bit set of options:
    942      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    943      *                              and do not use STD3 ASCII rules
    944      *                              If unassigned code points are found the operation fails with
    945      *                              ParseException.
    946      *
    947      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    948      *                              If this option is set, the unassigned code points are in the input
    949      *                              are treated as normal Unicode code points.
    950      *
    951      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    952      *                              If this option is set and the input does not satisfy STD3 rules,
    953      *                              the operation will fail with ParseException
    954      * @return 0 if the strings are equal, &gt; 0 if i1 &gt; i2 and &lt; 0 if i1 &lt; i2
    955      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    956      */
    957     @Deprecated
    958     public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options)
    959         throws StringPrepParseException{
    960         if(s1==null || s2 == null){
    961             throw new IllegalArgumentException("One of the source buffers is null");
    962         }
    963         return IDNA2003.compare(s1.getText(), s2.getText(), options);
    964     }
    965 }
    966