Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2003-2016, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 
     11 package android.icu.text;
     12 
     13 import java.util.Collections;
     14 import java.util.EnumSet;
     15 import java.util.Set;
     16 
     17 import android.icu.impl.IDNA2003;
     18 import android.icu.impl.UTS46;
     19 
     20 /**
     21  * Abstract base class for IDNA processing.
     22  * See http://www.unicode.org/reports/tr46/
     23  * and http://www.ietf.org/rfc/rfc3490.txt
     24  * <p>
     25  * The IDNA class is not intended for public subclassing.
     26  * <p>
     27  * The non-static methods implement UTS #46 and IDNA2008.
     28  * IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
     29  * <p>
     30  * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
     31  * <p>
     32  * IDNA2003 API Overview:
     33  * <p>
     34  * The static IDNA API methods implement the IDNA protocol as defined in the
     35  * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
     36  * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
     37  * containing non-ASCII code points are required to be processed by
     38  * ToASCII operation before passing it to resolver libraries. Domain names
     39  * that are obtained from resolver libraries are required to be processed by
     40  * ToUnicode operation before displaying the domain name to the user.
     41  * IDNA requires that implementations process input strings with
     42  * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
     43  * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
     44  * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
     45  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
     46  * neither Nameprep nor Punycode are optional.
     47  * The input and output of ToASCII and ToUnicode operations are Unicode
     48  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
     49  * multiple times to an input string will yield the same result as applying the operation
     50  * once.
     51  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
     52  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
     53  *
     54  * @author Ram Viswanadha, Markus Scherer
     55  */
     56 public abstract class IDNA {
     57     /**
     58      * Default options value: None of the other options are set.
     59      * For use in static worker and factory methods.
     60      */
     61     public static final int DEFAULT = 0;
     62     /**
     63      * Option to allow unassigned code points in domain names and labels.
     64      * For use in static worker and factory methods.
     65      * <p>This option is ignored by the UTS46 implementation.
     66      * (UTS #46 disallows unassigned code points.)
     67      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
     68      * @hide original deprecated declaration
     69      */
     70     @Deprecated
     71     public static final int ALLOW_UNASSIGNED = 1;
     72     /**
     73      * Option to check whether the input conforms to the STD3 ASCII rules,
     74      * for example the restriction of labels to LDH characters
     75      * (ASCII Letters, Digits and Hyphen-Minus).
     76      * For use in static worker and factory methods.
     77      */
     78     public static final int USE_STD3_RULES = 2;
     79     /**
     80      * IDNA option to check for whether the input conforms to the BiDi rules.
     81      * For use in static worker and factory methods.
     82      * <p>This option is ignored by the IDNA2003 implementation.
     83      * (IDNA2003 always performs a BiDi check.)
     84      */
     85     public static final int CHECK_BIDI = 4;
     86     /**
     87      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
     88      * For use in static worker and factory methods.
     89      * <p>This option is ignored by the IDNA2003 implementation.
     90      * (The CONTEXTJ check is new in IDNA2008.)
     91      */
     92     public static final int CHECK_CONTEXTJ = 8;
     93     /**
     94      * IDNA option for nontransitional processing in ToASCII().
     95      * For use in static worker and factory methods.
     96      * <p>By default, ToASCII() uses transitional processing.
     97      * <p>This option is ignored by the IDNA2003 implementation.
     98      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
     99      */
    100     public static final int NONTRANSITIONAL_TO_ASCII = 0x10;
    101     /**
    102      * IDNA option for nontransitional processing in ToUnicode().
    103      * For use in static worker and factory methods.
    104      * <p>By default, ToUnicode() uses transitional processing.
    105      * <p>This option is ignored by the IDNA2003 implementation.
    106      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
    107      */
    108     public static final int NONTRANSITIONAL_TO_UNICODE = 0x20;
    109     /**
    110      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
    111      * For use in static worker and factory methods.
    112      * <p>This option is ignored by the IDNA2003 implementation.
    113      * (The CONTEXTO check is new in IDNA2008.)
    114      * <p>This is for use by registries for IDNA2008 conformance.
    115      * UTS #46 does not require the CONTEXTO check.
    116      */
    117     public static final int CHECK_CONTEXTO = 0x40;
    118 
    119     /**
    120      * Returns an IDNA instance which implements UTS #46.
    121      * Returns an unmodifiable instance, owned by the caller.
    122      * Cache it for multiple operations, and delete it when done.
    123      * The instance is thread-safe, that is, it can be used concurrently.
    124      * <p>
    125      * UTS #46 defines Unicode IDNA Compatibility Processing,
    126      * updated to the latest version of Unicode and compatible with both
    127      * IDNA2003 and IDNA2008.
    128      * <p>
    129      * The worker functions use transitional processing, including deviation mappings,
    130      * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
    131      * is used in which case the deviation characters are passed through without change.
    132      * <p>
    133      * Disallowed characters are mapped to U+FFFD.
    134      * <p>
    135      * Operations with the UTS #46 instance do not support the
    136      * ALLOW_UNASSIGNED option.
    137      * <p>
    138      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
    139      * When the USE_STD3_RULES option is used, ASCII characters other than
    140      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
    141      *
    142      * @param options Bit set to modify the processing and error checking.
    143      * @return the UTS #46 IDNA instance, if successful
    144      */
    145     public static IDNA getUTS46Instance(int options) {
    146         return new UTS46(options);
    147     }
    148 
    149     /**
    150      * Converts a single domain name label into its ASCII form for DNS lookup.
    151      * If any processing step fails, then info.hasErrors() will be true and
    152      * the result might not be an ASCII string.
    153      * The label might be modified according to the types of errors.
    154      * Labels with severe errors will be left in (or turned into) their Unicode form.
    155      *
    156      * @param label Input domain name label
    157      * @param dest Destination string object
    158      * @param info Output container of IDNA processing details.
    159      * @return dest
    160      */
    161     public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info);
    162 
    163     /**
    164      * Converts a single domain name label into its Unicode form for human-readable display.
    165      * If any processing step fails, then info.hasErrors() will be true.
    166      * The label might be modified according to the types of errors.
    167      *
    168      * @param label Input domain name label
    169      * @param dest Destination string object
    170      * @param info Output container of IDNA processing details.
    171      * @return dest
    172      */
    173     public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info);
    174 
    175     /**
    176      * Converts a whole domain name into its ASCII form for DNS lookup.
    177      * If any processing step fails, then info.hasErrors() will be true and
    178      * the result might not be an ASCII string.
    179      * The domain name might be modified according to the types of errors.
    180      * Labels with severe errors will be left in (or turned into) their Unicode form.
    181      *
    182      * @param name Input domain name
    183      * @param dest Destination string object
    184      * @param info Output container of IDNA processing details.
    185      * @return dest
    186      */
    187     public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info);
    188 
    189     /**
    190      * Converts a whole domain name into its Unicode form for human-readable display.
    191      * If any processing step fails, then info.hasErrors() will be true.
    192      * The domain name might be modified according to the types of errors.
    193      *
    194      * @param name Input domain name
    195      * @param dest Destination string object
    196      * @param info Output container of IDNA processing details.
    197      * @return dest
    198      */
    199     public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info);
    200 
    201     /**
    202      * Output container for IDNA processing errors.
    203      * The Info class is not suitable for subclassing.
    204      */
    205     public static final class Info {
    206         /**
    207          * Constructor.
    208          */
    209         public Info() {
    210             errors=EnumSet.noneOf(Error.class);
    211             labelErrors=EnumSet.noneOf(Error.class);
    212             isTransDiff=false;
    213             isBiDi=false;
    214             isOkBiDi=true;
    215         }
    216         /**
    217          * Were there IDNA processing errors?
    218          * @return true if there were processing errors
    219          */
    220         public boolean hasErrors() { return !errors.isEmpty(); }
    221         /**
    222          * Returns a set indicating IDNA processing errors.
    223          * @return set of processing errors (modifiable, and not null)
    224          */
    225         public Set<Error> getErrors() { return errors; }
    226         /**
    227          * Returns true if transitional and nontransitional processing produce different results.
    228          * This is the case when the input label or domain name contains
    229          * one or more deviation characters outside a Punycode label (see UTS #46).
    230          * <ul>
    231          * <li>With nontransitional processing, such characters are
    232          * copied to the destination string.
    233          * <li>With transitional processing, such characters are
    234          * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
    235          * </ul>
    236          * @return true if transitional and nontransitional processing produce different results
    237          */
    238         public boolean isTransitionalDifferent() { return isTransDiff; }
    239 
    240         private void reset() {
    241             errors.clear();
    242             labelErrors.clear();
    243             isTransDiff=false;
    244             isBiDi=false;
    245             isOkBiDi=true;
    246         }
    247 
    248         private EnumSet<Error> errors, labelErrors;
    249         private boolean isTransDiff;
    250         private boolean isBiDi;
    251         private boolean isOkBiDi;
    252     }
    253 
    254     // The following protected methods give IDNA subclasses access to the private IDNAInfo fields.
    255     // The IDNAInfo also provides intermediate state that is publicly invisible,
    256     // avoiding the allocation of another worker object.
    257     /**
    258      * @deprecated This API is ICU internal only.
    259      * @hide original deprecated declaration
    260      * @hide draft / provisional / internal are hidden on Android
    261      */
    262     @Deprecated
    263     protected static void resetInfo(Info info) {
    264         info.reset();
    265     }
    266     /**
    267      * @deprecated This API is ICU internal only.
    268      * @hide original deprecated declaration
    269      * @hide draft / provisional / internal are hidden on Android
    270      */
    271     @Deprecated
    272     protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) {
    273         return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors);
    274     }
    275     /**
    276      * @deprecated This API is ICU internal only.
    277      * @hide original deprecated declaration
    278      * @hide draft / provisional / internal are hidden on Android
    279      */
    280     @Deprecated
    281     protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) {
    282         return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors);
    283     }
    284     /**
    285      * @deprecated This API is ICU internal only.
    286      * @hide original deprecated declaration
    287      * @hide draft / provisional / internal are hidden on Android
    288      */
    289     @Deprecated
    290     protected static void addLabelError(Info info, Error error) {
    291         info.labelErrors.add(error);
    292     }
    293     /**
    294      * @deprecated This API is ICU internal only.
    295      * @hide original deprecated declaration
    296      * @hide draft / provisional / internal are hidden on Android
    297      */
    298     @Deprecated
    299     protected static void promoteAndResetLabelErrors(Info info) {
    300         if(!info.labelErrors.isEmpty()) {
    301             info.errors.addAll(info.labelErrors);
    302             info.labelErrors.clear();
    303         }
    304     }
    305     /**
    306      * @deprecated This API is ICU internal only.
    307      * @hide original deprecated declaration
    308      * @hide draft / provisional / internal are hidden on Android
    309      */
    310     @Deprecated
    311     protected static void addError(Info info, Error error) {
    312         info.errors.add(error);
    313     }
    314     /**
    315      * @deprecated This API is ICU internal only.
    316      * @hide original deprecated declaration
    317      * @hide draft / provisional / internal are hidden on Android
    318      */
    319     @Deprecated
    320     protected static void setTransitionalDifferent(Info info) {
    321         info.isTransDiff=true;
    322     }
    323     /**
    324      * @deprecated This API is ICU internal only.
    325      * @hide original deprecated declaration
    326      * @hide draft / provisional / internal are hidden on Android
    327      */
    328     @Deprecated
    329     protected static void setBiDi(Info info) {
    330         info.isBiDi=true;
    331     }
    332     /**
    333      * @deprecated This API is ICU internal only.
    334      * @hide original deprecated declaration
    335      * @hide draft / provisional / internal are hidden on Android
    336      */
    337     @Deprecated
    338     protected static boolean isBiDi(Info info) {
    339         return info.isBiDi;
    340     }
    341     /**
    342      * @deprecated This API is ICU internal only.
    343      * @hide original deprecated declaration
    344      * @hide draft / provisional / internal are hidden on Android
    345      */
    346     @Deprecated
    347     protected static void setNotOkBiDi(Info info) {
    348         info.isOkBiDi=false;
    349     }
    350     /**
    351      * @deprecated This API is ICU internal only.
    352      * @hide original deprecated declaration
    353      * @hide draft / provisional / internal are hidden on Android
    354      */
    355     @Deprecated
    356     protected static boolean isOkBiDi(Info info) {
    357         return info.isOkBiDi;
    358     }
    359 
    360     /**
    361      * IDNA error bit set values.
    362      * When a domain name or label fails a processing step or does not meet the
    363      * validity criteria, then one or more of these error bits are set.
    364      */
    365     public static enum Error {
    366         /**
    367          * A non-final domain name label (or the whole domain name) is empty.
    368          */
    369         EMPTY_LABEL,
    370         /**
    371          * A domain name label is longer than 63 bytes.
    372          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    373          * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
    374          */
    375         LABEL_TOO_LONG,
    376         /**
    377          * A domain name is longer than 255 bytes in its storage form.
    378          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
    379          * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
    380          */
    381         DOMAIN_NAME_TOO_LONG,
    382         /**
    383          * A label starts with a hyphen-minus ('-').
    384          */
    385         LEADING_HYPHEN,
    386         /**
    387          * A label ends with a hyphen-minus ('-').
    388          */
    389         TRAILING_HYPHEN,
    390         /**
    391          * A label contains hyphen-minus ('-') in the third and fourth positions.
    392          */
    393         HYPHEN_3_4,
    394         /**
    395          * A label starts with a combining mark.
    396          */
    397         LEADING_COMBINING_MARK,
    398         /**
    399          * A label or domain name contains disallowed characters.
    400          */
    401         DISALLOWED,
    402         /**
    403          * A label starts with "xn--" but does not contain valid Punycode.
    404          * That is, an xn-- label failed Punycode decoding.
    405          */
    406         PUNYCODE,
    407         /**
    408          * A label contains a dot=full stop.
    409          * This can occur in an input string for a single-label function.
    410          */
    411         LABEL_HAS_DOT,
    412         /**
    413          * An ACE label does not contain a valid label string.
    414          * The label was successfully ACE (Punycode) decoded but the resulting
    415          * string had severe validation errors. For example,
    416          * it might contain characters that are not allowed in ACE labels,
    417          * or it might not be normalized.
    418          */
    419         INVALID_ACE_LABEL,
    420         /**
    421          * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
    422          */
    423         BIDI,
    424         /**
    425          * A label does not meet the IDNA CONTEXTJ requirements.
    426          */
    427         CONTEXTJ,
    428         /**
    429          * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
    430          * Some punctuation characters "Would otherwise have been DISALLOWED"
    431          * but are allowed in certain contexts. (RFC 5892)
    432          */
    433         CONTEXTO_PUNCTUATION,
    434         /**
    435          * A label does not meet the IDNA CONTEXTO requirements for digits.
    436          * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
    437          */
    438         CONTEXTO_DIGITS
    439     }
    440 
    441     /**
    442      * Sole constructor. (For invocation by subclass constructors, typically implicit.)
    443      * @deprecated This API is ICU internal only.
    444      * @hide original deprecated declaration
    445      * @hide draft / provisional / internal are hidden on Android
    446      */
    447     @Deprecated
    448     protected IDNA() {
    449     }
    450 
    451     /* IDNA2003 API ------------------------------------------------------------- */
    452 
    453     /**
    454      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    455      * This operation is done on <b>single labels</b> before sending it to something that expects
    456      * ASCII names. A label is an individual part of a domain name. Labels are usually
    457      * separated by dots; e.g." "www.example.com" is composed of 3 labels
    458      * "www","example", and "com".
    459      *
    460      * @param src       The input string to be processed
    461      * @param options   A bit set of options:
    462      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    463      *                              and do not use STD3 ASCII rules
    464      *                              If unassigned code points are found the operation fails with
    465      *                              StringPrepParseException.
    466      *
    467      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    468      *                              If this option is set, the unassigned code points are in the input
    469      *                              are treated as normal Unicode code points.
    470      *
    471      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    472      *                              If this option is set and the input does not satisfy STD3 rules,
    473      *                              the operation will fail with ParseException
    474      * @return StringBuffer the converted String
    475      * @throws StringPrepParseException When an error occurs for parsing a string.
    476      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    477      * @hide original deprecated declaration
    478      */
    479     @Deprecated
    480     public static StringBuffer convertToASCII(String src, int options)
    481         throws StringPrepParseException{
    482         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    483         return convertToASCII(iter,options);
    484     }
    485 
    486     /**
    487      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    488      * This operation is done on <b>single labels</b> before sending it to something that expects
    489      * ASCII names. A label is an individual part of a domain name. Labels are usually
    490      * separated by dots; e.g." "www.example.com" is composed of 3 labels
    491      * "www","example", and "com".
    492      *
    493      * @param src       The input string as StringBuffer to be processed
    494      * @param options   A bit set of options:
    495      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    496      *                              and do not use STD3 ASCII rules
    497      *                              If unassigned code points are found the operation fails with
    498      *                              ParseException.
    499      *
    500      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    501      *                              If this option is set, the unassigned code points are in the input
    502      *                              are treated as normal Unicode code points.
    503      *
    504      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    505      *                              If this option is set and the input does not satisfy STD3 rules,
    506      *                              the operation will fail with ParseException
    507      * @return StringBuffer the converted String
    508      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    509      * @hide original deprecated declaration
    510      */
    511     @Deprecated
    512     public static StringBuffer convertToASCII(StringBuffer src, int options)
    513         throws StringPrepParseException{
    514         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    515         return convertToASCII(iter,options);
    516     }
    517 
    518     /**
    519      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
    520      * This operation is done on <b>single labels</b> before sending it to something that expects
    521      * ASCII names. A label is an individual part of a domain name. Labels are usually
    522      * separated by dots; e.g." "www.example.com" is composed of 3 labels
    523      * "www","example", and "com".
    524      *
    525      * @param src       The input string as UCharacterIterator to be processed
    526      * @param options   A bit set of options:
    527      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    528      *                              and do not use STD3 ASCII rules
    529      *                              If unassigned code points are found the operation fails with
    530      *                              ParseException.
    531      *
    532      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    533      *                              If this option is set, the unassigned code points are in the input
    534      *                              are treated as normal Unicode code points.
    535      *
    536      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    537      *                              If this option is set and the input does not satisfy STD3 rules,
    538      *                              the operation will fail with ParseException
    539      * @return StringBuffer the converted String
    540      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    541      * @hide original deprecated declaration
    542      */
    543     @Deprecated
    544     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
    545                 throws StringPrepParseException{
    546         return IDNA2003.convertToASCII(src, options);
    547     }
    548 
    549     /**
    550      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    551      * This operation is done on complete domain names, e.g: "www.example.com".
    552      * It is important to note that this operation can fail. If it fails, then the input
    553      * domain name cannot be used as an Internationalized Domain Name and the application
    554      * should have methods defined to deal with the failure.
    555      *
    556      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    557      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    558      * and then convert. This function does not offer that level of granularity. The options once
    559      * set will apply to all labels in the domain name
    560      *
    561      * @param src       The input string as UCharacterIterator to be processed
    562      * @param options   A bit set of options:
    563      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    564      *                              and do not use STD3 ASCII rules
    565      *                              If unassigned code points are found the operation fails with
    566      *                              ParseException.
    567      *
    568      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    569      *                              If this option is set, the unassigned code points are in the input
    570      *                              are treated as normal Unicode code points.
    571      *
    572      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    573      *                              If this option is set and the input does not satisfy STD3 rules,
    574      *                              the operation will fail with ParseException
    575      * @return StringBuffer the converted String
    576      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    577      * @hide original deprecated declaration
    578      */
    579     @Deprecated
    580     public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
    581             throws StringPrepParseException{
    582         return convertIDNToASCII(src.getText(), options);
    583     }
    584 
    585     /**
    586      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    587      * This operation is done on complete domain names, e.g: "www.example.com".
    588      * It is important to note that this operation can fail. If it fails, then the input
    589      * domain name cannot be used as an Internationalized Domain Name and the application
    590      * should have methods defined to deal with the failure.
    591      *
    592      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    593      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    594      * and then convert. This function does not offer that level of granularity. The options once
    595      * set will apply to all labels in the domain name
    596      *
    597      * @param src       The input string as a StringBuffer to be processed
    598      * @param options   A bit set of options:
    599      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    600      *                              and do not use STD3 ASCII rules
    601      *                              If unassigned code points are found the operation fails with
    602      *                              ParseException.
    603      *
    604      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    605      *                              If this option is set, the unassigned code points are in the input
    606      *                              are treated as normal Unicode code points.
    607      *
    608      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    609      *                              If this option is set and the input does not satisfy STD3 rules,
    610      *                              the operation will fail with ParseException
    611      * @return StringBuffer the converted String
    612      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    613      * @hide original deprecated declaration
    614      */
    615     @Deprecated
    616     public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
    617             throws StringPrepParseException{
    618             return convertIDNToASCII(src.toString(), options);
    619     }
    620 
    621     /**
    622      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
    623      * This operation is done on complete domain names, e.g: "www.example.com".
    624      * It is important to note that this operation can fail. If it fails, then the input
    625      * domain name cannot be used as an Internationalized Domain Name and the application
    626      * should have methods defined to deal with the failure.
    627      *
    628      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    629      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    630      * and then convert. This function does not offer that level of granularity. The options once
    631      * set will apply to all labels in the domain name
    632      *
    633      * @param src       The input string to be processed
    634      * @param options   A bit set of options:
    635      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    636      *                              and do not use STD3 ASCII rules
    637      *                              If unassigned code points are found the operation fails with
    638      *                              ParseException.
    639      *
    640      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    641      *                              If this option is set, the unassigned code points are in the input
    642      *                              are treated as normal Unicode code points.
    643      *
    644      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    645      *                              If this option is set and the input does not satisfy STD3 rules,
    646      *                              the operation will fail with ParseException
    647      * @return StringBuffer the converted String
    648      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    649      * @hide original deprecated declaration
    650      */
    651     @Deprecated
    652     public static StringBuffer convertIDNToASCII(String src,int options)
    653             throws StringPrepParseException{
    654         return IDNA2003.convertIDNToASCII(src, options);
    655     }
    656 
    657 
    658     /**
    659      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
    660      * This operation is done on <b>single labels</b> before sending it to something that expects
    661      * Unicode names. A label is an individual part of a domain name. Labels are usually
    662      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
    663      * "www","example", and "com".
    664      *
    665      * @param src       The input string to be processed
    666      * @param options   A bit set of options:
    667      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    668      *                              and do not use STD3 ASCII rules
    669      *                              If unassigned code points are found the operation fails with
    670      *                              ParseException.
    671      *
    672      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    673      *                              If this option is set, the unassigned code points are in the input
    674      *                              are treated as normal Unicode code points.
    675      *
    676      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    677      *                              If this option is set and the input does not satisfy STD3 rules,
    678      *                              the operation will fail with ParseException
    679      * @return StringBuffer the converted String
    680      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    681      * @hide original deprecated declaration
    682      */
    683     @Deprecated
    684     public static StringBuffer convertToUnicode(String src, int options)
    685            throws StringPrepParseException{
    686         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    687         return convertToUnicode(iter,options);
    688     }
    689 
    690     /**
    691      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
    692      * This operation is done on <b>single labels</b> before sending it to something that expects
    693      * Unicode names. A label is an individual part of a domain name. Labels are usually
    694      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
    695      * "www","example", and "com".
    696      *
    697      * @param src       The input string as StringBuffer to be processed
    698      * @param options   A bit set of options:
    699      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    700      *                              and do not use STD3 ASCII rules
    701      *                              If unassigned code points are found the operation fails with
    702      *                              ParseException.
    703      *
    704      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    705      *                              If this option is set, the unassigned code points are in the input
    706      *                              are treated as normal Unicode code points.
    707      *
    708      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    709      *                              If this option is set and the input does not satisfy STD3 rules,
    710      *                              the operation will fail with ParseException
    711      * @return StringBuffer the converted String
    712      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    713      * @hide original deprecated declaration
    714      */
    715     @Deprecated
    716     public static StringBuffer convertToUnicode(StringBuffer src, int options)
    717            throws StringPrepParseException{
    718         UCharacterIterator iter = UCharacterIterator.getInstance(src);
    719         return convertToUnicode(iter,options);
    720     }
    721 
    722     /**
    723      * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
    724      * This operation is done on <b>single labels</b> before sending it to something that expects
    725      * Unicode names. A label is an individual part of a domain name. Labels are usually
    726      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
    727      * "www","example", and "com".
    728      *
    729      * @param src       The input string as UCharacterIterator to be processed
    730      * @param options   A bit set of options:
    731      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    732      *                              and do not use STD3 ASCII rules
    733      *                              If unassigned code points are found the operation fails with
    734      *                              ParseException.
    735      *
    736      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    737      *                              If this option is set, the unassigned code points are in the input
    738      *                              are treated as normal Unicode code points.
    739      *
    740      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    741      *                              If this option is set and the input does not satisfy STD3 rules,
    742      *                              the operation will fail with ParseException
    743      * @return StringBuffer the converted String
    744      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    745      * @hide original deprecated declaration
    746      */
    747     @Deprecated
    748     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
    749            throws StringPrepParseException{
    750         return IDNA2003.convertToUnicode(src, options);
    751     }
    752 
    753     /**
    754      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    755      * This operation is done on complete domain names, e.g: "www.example.com".
    756      *
    757      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    758      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    759      * and then convert. This function does not offer that level of granularity. The options once
    760      * set will apply to all labels in the domain name
    761      *
    762      * @param src       The input string as UCharacterIterator to be processed
    763      * @param options   A bit set of options:
    764      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    765      *                              and do not use STD3 ASCII rules
    766      *                              If unassigned code points are found the operation fails with
    767      *                              ParseException.
    768      *
    769      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    770      *                              If this option is set, the unassigned code points are in the input
    771      *                              are treated as normal Unicode code points.
    772      *
    773      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    774      *                              If this option is set and the input does not satisfy STD3 rules,
    775      *                              the operation will fail with ParseException
    776      * @return StringBuffer the converted String
    777      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    778      * @hide original deprecated declaration
    779      */
    780     @Deprecated
    781     public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options)
    782         throws StringPrepParseException{
    783         return convertIDNToUnicode(src.getText(), options);
    784     }
    785 
    786     /**
    787      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    788      * This operation is done on complete domain names, e.g: "www.example.com".
    789      *
    790      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    791      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    792      * and then convert. This function does not offer that level of granularity. The options once
    793      * set will apply to all labels in the domain name
    794      *
    795      * @param src       The input string as StringBuffer to be processed
    796      * @param options   A bit set of options:
    797      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    798      *                              and do not use STD3 ASCII rules
    799      *                              If unassigned code points are found the operation fails with
    800      *                              ParseException.
    801      *
    802      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    803      *                              If this option is set, the unassigned code points are in the input
    804      *                              are treated as normal Unicode code points.
    805      *
    806      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    807      *                              If this option is set and the input does not satisfy STD3 rules,
    808      *                              the operation will fail with ParseException
    809      * @return StringBuffer the converted String
    810      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    811      * @hide original deprecated declaration
    812      */
    813     @Deprecated
    814     public static StringBuffer convertIDNToUnicode(StringBuffer src, int options)
    815         throws StringPrepParseException{
    816         return convertIDNToUnicode(src.toString(), options);
    817     }
    818 
    819     /**
    820      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
    821      * This operation is done on complete domain names, e.g: "www.example.com".
    822      *
    823      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
    824      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
    825      * and then convert. This function does not offer that level of granularity. The options once
    826      * set will apply to all labels in the domain name
    827      *
    828      * @param src       The input string to be processed
    829      * @param options   A bit set of options:
    830      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    831      *                              and do not use STD3 ASCII rules
    832      *                              If unassigned code points are found the operation fails with
    833      *                              ParseException.
    834      *
    835      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    836      *                              If this option is set, the unassigned code points are in the input
    837      *                              are treated as normal Unicode code points.
    838      *
    839      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    840      *                              If this option is set and the input does not satisfy STD3 rules,
    841      *                              the operation will fail with ParseException
    842      * @return StringBuffer the converted String
    843      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    844      * @hide original deprecated declaration
    845      */
    846     @Deprecated
    847     public static StringBuffer convertIDNToUnicode(String src, int options)
    848             throws StringPrepParseException{
    849         return IDNA2003.convertIDNToUnicode(src, options);
    850     }
    851 
    852     /**
    853      * IDNA2003: Compare two IDN strings for equivalence.
    854      * This function splits the domain names into labels and compares them.
    855      * According to IDN RFC, whenever two labels are compared, they are
    856      * considered equal if and only if their ASCII forms (obtained by
    857      * applying toASCII) match using an case-insensitive ASCII comparison.
    858      * Two domain names are considered a match if and only if all labels
    859      * match regardless of whether label separators match.
    860      *
    861      * @param s1        First IDN string as StringBuffer
    862      * @param s2        Second IDN string as StringBuffer
    863      * @param options   A bit set of options:
    864      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    865      *                              and do not use STD3 ASCII rules
    866      *                              If unassigned code points are found the operation fails with
    867      *                              ParseException.
    868      *
    869      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    870      *                              If this option is set, the unassigned code points are in the input
    871      *                              are treated as normal Unicode code points.
    872      *
    873      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    874      *                              If this option is set and the input does not satisfy STD3 rules,
    875      *                              the operation will fail with ParseException
    876      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
    877      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    878      * @hide original deprecated declaration
    879      */
    880     @Deprecated
    881     public static int compare(StringBuffer s1, StringBuffer s2, int options)
    882         throws StringPrepParseException{
    883         if(s1==null || s2 == null){
    884             throw new IllegalArgumentException("One of the source buffers is null");
    885         }
    886         return IDNA2003.compare(s1.toString(), s2.toString(), options);
    887     }
    888 
    889     /**
    890      * IDNA2003: Compare two IDN strings for equivalence.
    891      * This function splits the domain names into labels and compares them.
    892      * According to IDN RFC, whenever two labels are compared, they are
    893      * considered equal if and only if their ASCII forms (obtained by
    894      * applying toASCII) match using an case-insensitive ASCII comparison.
    895      * Two domain names are considered a match if and only if all labels
    896      * match regardless of whether label separators match.
    897      *
    898      * @param s1        First IDN string
    899      * @param s2        Second IDN string
    900      * @param options   A bit set of options:
    901      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    902      *                              and do not use STD3 ASCII rules
    903      *                              If unassigned code points are found the operation fails with
    904      *                              ParseException.
    905      *
    906      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    907      *                              If this option is set, the unassigned code points are in the input
    908      *                              are treated as normal Unicode code points.
    909      *
    910      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
    911      *                              If this option is set and the input does not satisfy STD3 rules,
    912      *                              the operation will fail with ParseException
    913      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
    914      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    915      * @hide original deprecated declaration
    916      */
    917     @Deprecated
    918     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
    919         if(s1==null || s2 == null){
    920             throw new IllegalArgumentException("One of the source buffers is null");
    921         }
    922         return IDNA2003.compare(s1, s2, options);
    923     }
    924     /**
    925      * IDNA2003: Compare two IDN strings for equivalence.
    926      * This function splits the domain names into labels and compares them.
    927      * According to IDN RFC, whenever two labels are compared, they are
    928      * considered equal if and only if their ASCII forms (obtained by
    929      * applying toASCII) match using an case-insensitive ASCII comparison.
    930      * Two domain names are considered a match if and only if all labels
    931      * match regardless of whether label separators match.
    932      *
    933      * @param s1        First IDN string as UCharacterIterator
    934      * @param s2        Second IDN string as UCharacterIterator
    935      * @param options   A bit set of options:
    936      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
    937      *                              and do not use STD3 ASCII rules
    938      *                              If unassigned code points are found the operation fails with
    939      *                              ParseException.
    940      *
    941      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
    942      *                              If this option is set, the unassigned code points are in the input
    943      *                              are treated as normal Unicode code points.
    944      *
    945      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
    946      *                              If this option is set and the input does not satisfy STD3 rules,
    947      *                              the operation will fail with ParseException
    948      * @return 0 if the strings are equal, &gt; 0 if i1 &gt; i2 and &lt; 0 if i1 &lt; i2
    949      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
    950      * @hide original deprecated declaration
    951      */
    952     @Deprecated
    953     public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options)
    954         throws StringPrepParseException{
    955         if(s1==null || s2 == null){
    956             throw new IllegalArgumentException("One of the source buffers is null");
    957         }
    958         return IDNA2003.compare(s1.getText(), s2.getText(), options);
    959     }
    960 }
    961