Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 * Copyright (C) 2003-2010, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 */
     10 package android.icu.impl;
     11 
     12 import android.icu.text.IDNA;
     13 import android.icu.text.StringPrep;
     14 import android.icu.text.StringPrepParseException;
     15 import android.icu.text.UCharacterIterator;
     16 
     17 /**
     18  * IDNA2003 implementation code, moved out of android.icu.text.IDNA.java
     19  * while extending that class to support IDNA2008/UTS #46 as well.
     20  * @author Ram Viswanadha
     21  * @hide Only a subset of ICU is exposed in Android
     22  */
     23 public final class IDNA2003 {
     24     /* IDNA ACE Prefix is "xn--" */
     25     private static char[] ACE_PREFIX                = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
     26     //private static final int ACE_PREFIX_LENGTH      = ACE_PREFIX.length;
     27 
     28     private static final int MAX_LABEL_LENGTH       = 63;
     29     private static final int HYPHEN                 = 0x002D;
     30     private static final int CAPITAL_A              = 0x0041;
     31     private static final int CAPITAL_Z              = 0x005A;
     32     private static final int LOWER_CASE_DELTA       = 0x0020;
     33     private static final int FULL_STOP              = 0x002E;
     34     private static final int MAX_DOMAIN_NAME_LENGTH = 255;
     35 
     36     // The NamePrep profile object
     37     private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
     38 
     39     private static boolean startsWithPrefix(StringBuffer src){
     40         boolean startsWithPrefix = true;
     41 
     42         if(src.length() < ACE_PREFIX.length){
     43             return false;
     44         }
     45         for(int i=0; i<ACE_PREFIX.length;i++){
     46             if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
     47                 startsWithPrefix = false;
     48             }
     49         }
     50         return startsWithPrefix;
     51     }
     52 
     53     private static char toASCIILower(char ch){
     54         if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
     55             return (char)(ch + LOWER_CASE_DELTA);
     56         }
     57         return ch;
     58     }
     59 
     60     private static StringBuffer toASCIILower(CharSequence src){
     61         StringBuffer dest = new StringBuffer();
     62         for(int i=0; i<src.length();i++){
     63             dest.append(toASCIILower(src.charAt(i)));
     64         }
     65         return dest;
     66     }
     67 
     68     private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
     69         char c1,c2;
     70         int rc;
     71         for(int i =0;/* no condition */;i++) {
     72             /* If we reach the ends of both strings then they match */
     73             if(i == s1.length()) {
     74                 return 0;
     75             }
     76 
     77             c1 = s1.charAt(i);
     78             c2 = s2.charAt(i);
     79 
     80             /* Case-insensitive comparison */
     81             if(c1!=c2) {
     82                 rc=toASCIILower(c1)-toASCIILower(c2);
     83                 if(rc!=0) {
     84                     return rc;
     85                 }
     86             }
     87         }
     88     }
     89 
     90     private static int getSeparatorIndex(char[] src,int start, int limit){
     91         for(; start<limit;start++){
     92             if(isLabelSeparator(src[start])){
     93                 return start;
     94             }
     95         }
     96         // we have not found the separator just return length
     97         return start;
     98     }
     99 
    100     /*
    101     private static int getSeparatorIndex(UCharacterIterator iter){
    102         int currentIndex = iter.getIndex();
    103         int separatorIndex = 0;
    104         int ch;
    105         while((ch=iter.next())!= UCharacterIterator.DONE){
    106             if(isLabelSeparator(ch)){
    107                 separatorIndex = iter.getIndex();
    108                 iter.setIndex(currentIndex);
    109                 return separatorIndex;
    110             }
    111         }
    112         // reset index
    113         iter.setIndex(currentIndex);
    114         // we have not found the separator just return the length
    115 
    116     }
    117     */
    118 
    119 
    120     private static boolean isLDHChar(int ch){
    121         // high runner case
    122         if(ch>0x007A){
    123             return false;
    124         }
    125         //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
    126         if( (ch==0x002D) ||
    127             (0x0030 <= ch && ch <= 0x0039) ||
    128             (0x0041 <= ch && ch <= 0x005A) ||
    129             (0x0061 <= ch && ch <= 0x007A)
    130           ){
    131             return true;
    132         }
    133         return false;
    134     }
    135 
    136     /**
    137      * Ascertain if the given code point is a label separator as
    138      * defined by the IDNA RFC
    139      *
    140      * @param ch The code point to be ascertained
    141      * @return true if the char is a label separator
    142      */
    143     private static boolean isLabelSeparator(int ch){
    144         switch(ch){
    145             case 0x002e:
    146             case 0x3002:
    147             case 0xFF0E:
    148             case 0xFF61:
    149                 return true;
    150             default:
    151                 return false;
    152         }
    153     }
    154 
    155     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
    156             throws StringPrepParseException{
    157 
    158         boolean[] caseFlags = null;
    159 
    160         // the source contains all ascii codepoints
    161         boolean srcIsASCII  = true;
    162         // assume the source contains all LDH codepoints
    163         boolean srcIsLDH = true;
    164 
    165         //get the options
    166         boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0);
    167         int ch;
    168         // step 1
    169         while((ch = src.next())!= UCharacterIterator.DONE){
    170             if(ch> 0x7f){
    171                 srcIsASCII = false;
    172             }
    173         }
    174         int failPos = -1;
    175         src.setToStart();
    176         StringBuffer processOut = null;
    177         // step 2 is performed only if the source contains non ASCII
    178         if(!srcIsASCII){
    179             // step 2
    180             processOut = namePrep.prepare(src, options);
    181         }else{
    182             processOut = new StringBuffer(src.getText());
    183         }
    184         int poLen = processOut.length();
    185 
    186         if(poLen==0){
    187             throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
    188         }
    189         StringBuffer dest = new StringBuffer();
    190 
    191         // reset the variable to verify if output of prepare is ASCII or not
    192         srcIsASCII = true;
    193 
    194         // step 3 & 4
    195         for(int j=0;j<poLen;j++ ){
    196             ch=processOut.charAt(j);
    197             if(ch > 0x7F){
    198                 srcIsASCII = false;
    199             }else if(isLDHChar(ch)==false){
    200                 // here we do not assemble surrogates
    201                 // since we know that LDH code points
    202                 // are in the ASCII range only
    203                 srcIsLDH = false;
    204                 failPos = j;
    205             }
    206         }
    207 
    208         if(useSTD3ASCIIRules == true){
    209             // verify 3a and 3b
    210             if( srcIsLDH == false /* source contains some non-LDH characters */
    211                 || processOut.charAt(0) ==  HYPHEN
    212                 || processOut.charAt(processOut.length()-1) == HYPHEN){
    213 
    214                 /* populate the parseError struct */
    215                 if(srcIsLDH==false){
    216                      throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
    217                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
    218                                               processOut.toString(),
    219                                              (failPos>0) ? (failPos-1) : failPos);
    220                 }else if(processOut.charAt(0) == HYPHEN){
    221                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
    222                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
    223 
    224                 }else{
    225                      throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
    226                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
    227                                               processOut.toString(),
    228                                               (poLen>0) ? poLen-1 : poLen);
    229 
    230                 }
    231             }
    232         }
    233         if(srcIsASCII){
    234             dest =  processOut;
    235         }else{
    236             // step 5 : verify the sequence does not begin with ACE prefix
    237             if(!startsWithPrefix(processOut)){
    238 
    239                 //step 6: encode the sequence with punycode
    240                 caseFlags = new boolean[poLen];
    241 
    242                 StringBuilder punyout = Punycode.encode(processOut,caseFlags);
    243 
    244                 // convert all codepoints to lower case ASCII
    245                 StringBuffer lowerOut = toASCIILower(punyout);
    246 
    247                 //Step 7: prepend the ACE prefix
    248                 dest.append(ACE_PREFIX,0,ACE_PREFIX.length);
    249                 //Step 6: copy the contents in b2 into dest
    250                 dest.append(lowerOut);
    251             }else{
    252 
    253                 throw new StringPrepParseException("The input does not start with the ACE Prefix.",
    254                                          StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
    255             }
    256         }
    257         if(dest.length() > MAX_LABEL_LENGTH){
    258             throw new StringPrepParseException("The labels in the input are too long. Length > 63.",
    259                                      StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
    260         }
    261         return dest;
    262     }
    263 
    264     public static StringBuffer convertIDNToASCII(String src,int options)
    265             throws StringPrepParseException{
    266 
    267         char[] srcArr = src.toCharArray();
    268         StringBuffer result = new StringBuffer();
    269         int sepIndex=0;
    270         int oldSepIndex=0;
    271         for(;;){
    272             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
    273             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
    274             //make sure this is not a root label separator.
    275             if(!(label.length()==0 && sepIndex==srcArr.length)){
    276                 UCharacterIterator iter = UCharacterIterator.getInstance(label);
    277                 result.append(convertToASCII(iter,options));
    278             }
    279             if(sepIndex==srcArr.length){
    280                 break;
    281             }
    282 
    283             // increment the sepIndex to skip past the separator
    284             sepIndex++;
    285             oldSepIndex = sepIndex;
    286             result.append((char)FULL_STOP);
    287         }
    288         if(result.length() > MAX_DOMAIN_NAME_LENGTH){
    289             throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
    290         }
    291         return result;
    292     }
    293 
    294     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
    295             throws StringPrepParseException{
    296 
    297         boolean[] caseFlags = null;
    298 
    299         // the source contains all ascii codepoints
    300         boolean srcIsASCII  = true;
    301         // assume the source contains all LDH codepoints
    302         //boolean srcIsLDH = true;
    303 
    304         //get the options
    305         //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
    306 
    307         //int failPos = -1;
    308         int ch;
    309         int saveIndex = src.getIndex();
    310         // step 1: find out if all the codepoints in src are ASCII
    311         while((ch=src.next())!= UCharacterIterator.DONE){
    312             if(ch>0x7F){
    313                 srcIsASCII = false;
    314             }/*else if((srcIsLDH = isLDHChar(ch))==false){
    315                 failPos = src.getIndex();
    316             }*/
    317         }
    318         StringBuffer processOut;
    319 
    320         if(srcIsASCII == false){
    321             try {
    322                 // step 2: process the string
    323                 src.setIndex(saveIndex);
    324                 processOut = namePrep.prepare(src,options);
    325             } catch (StringPrepParseException ex) {
    326                 return new StringBuffer(src.getText());
    327             }
    328 
    329         }else{
    330             //just point to source
    331             processOut = new StringBuffer(src.getText());
    332         }
    333         // TODO:
    334         // The RFC states that
    335         // <quote>
    336         // ToUnicode never fails. If any step fails, then the original input
    337         // is returned immediately in that step.
    338         // </quote>
    339 
    340         //step 3: verify ACE Prefix
    341         if(startsWithPrefix(processOut)){
    342             StringBuffer decodeOut = null;
    343 
    344             //step 4: Remove the ACE Prefix
    345             String temp = processOut.substring(ACE_PREFIX.length,processOut.length());
    346 
    347             //step 5: Decode using punycode
    348             try {
    349                 decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags));
    350             } catch (StringPrepParseException e) {
    351                 decodeOut = null;
    352             }
    353 
    354             //step 6:Apply toASCII
    355             if (decodeOut != null) {
    356                 StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options);
    357 
    358                 //step 7: verify
    359                 if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
    360 //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
    361 //                                             StringPrepParseException.VERIFICATION_ERROR);
    362                     decodeOut = null;
    363                 }
    364             }
    365 
    366             //step 8: return output of step 5
    367              if (decodeOut != null) {
    368                  return decodeOut;
    369              }
    370         }
    371 
    372 //        }else{
    373 //            // verify that STD3 ASCII rules are satisfied
    374 //            if(useSTD3ASCIIRules == true){
    375 //                if( srcIsLDH == false /* source contains some non-LDH characters */
    376 //                    || processOut.charAt(0) ==  HYPHEN
    377 //                    || processOut.charAt(processOut.length()-1) == HYPHEN){
    378 //
    379 //                    if(srcIsLDH==false){
    380 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
    381 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
    382 //                                                 (failPos>0) ? (failPos-1) : failPos);
    383 //                    }else if(processOut.charAt(0) == HYPHEN){
    384 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
    385 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
    386 //                                                 processOut.toString(),0);
    387 //
    388 //                    }else{
    389 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
    390 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
    391 //                                                 processOut.toString(),
    392 //                                                 processOut.length());
    393 //
    394 //                    }
    395 //                }
    396 //            }
    397 //            // just return the source
    398 //            return new StringBuffer(src.getText());
    399 //        }
    400 
    401         return new StringBuffer(src.getText());
    402     }
    403 
    404     public static StringBuffer convertIDNToUnicode(String src, int options)
    405             throws StringPrepParseException{
    406 
    407         char[] srcArr = src.toCharArray();
    408         StringBuffer result = new StringBuffer();
    409         int sepIndex=0;
    410         int oldSepIndex=0;
    411         for(;;){
    412             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
    413             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
    414             if(label.length()==0 && sepIndex!=srcArr.length ){
    415                 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
    416             }
    417             UCharacterIterator iter = UCharacterIterator.getInstance(label);
    418             result.append(convertToUnicode(iter,options));
    419             if(sepIndex==srcArr.length){
    420                 break;
    421             }
    422             // Unlike the ToASCII operation we don't normalize the label separators
    423             result.append(srcArr[sepIndex]);
    424             // increment the sepIndex to skip past the separator
    425             sepIndex++;
    426             oldSepIndex =sepIndex;
    427         }
    428         if(result.length() > MAX_DOMAIN_NAME_LENGTH){
    429             throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
    430         }
    431         return result;
    432     }
    433 
    434     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
    435         StringBuffer s1Out = convertIDNToASCII(s1, options);
    436         StringBuffer s2Out = convertIDNToASCII(s2, options);
    437         return compareCaseInsensitiveASCII(s1Out,s2Out);
    438     }
    439 }
    440