Home | History | Annotate | Download | only in stringprep
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2003-2007, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9 */
     10 
     11 /*
     12  *
     13 Disclaimer and license
     14 
     15     Regarding this entire document or any portion of it (including
     16     the pseudocode and C code), the author makes no guarantees and
     17     is not responsible for any damage resulting from its use.  The
     18     author grants irrevocable permission to anyone to use, modify,
     19     and distribute it in any way that does not diminish the rights
     20     of anyone else to use, modify, and distribute it, provided that
     21     redistributed derivative works do not contain misleading author or
     22     version information.  Derivative works need not be licensed under
     23     similar terms.
     24 
     25 punycode.c 0.4.0 (2001-Nov-17-Sat)
     26 http://www.cs.berkeley.edu/~amc/idn/
     27 Adam M. Costello
     28 http://www.nicemice.net/amc/
     29 */
     30 
     31 package android.icu.dev.test.stringprep;
     32 import android.icu.text.StringPrepParseException;
     33 import android.icu.text.UCharacterIterator;
     34 import android.icu.text.UTF16;
     35 
     36 /**
     37  * The implementation is direct port of C code in the RFC
     38  */
     39 
     40 public final class PunycodeReference {
     41     /*** punycode status codes */
     42     public static final int punycode_success=0;
     43     public static final int punycode_bad_input=1;   /* Input is invalid.                       */
     44     public static final int punycode_big_output=2;  /* Output would exceed the space provided. */
     45     public static final int punycode_overflow =3;    /* Input needs wider integers to process.  */
     46 
     47     /*** Bootstring parameters for Punycode ***/
     48     private static final int base = 36;
     49     private static final int tmin = 1;
     50     private static final int tmax = 26;
     51     private static final int skew = 38;
     52     private static final int damp = 700;
     53     private static final int initial_bias = 72;
     54     private static final int initial_n = 0x80;
     55     private static final int delimiter = 0x2D;
     56 
     57 
     58 //    private static final long UNSIGNED_INT_MASK = 0xffffffffL;
     59 
     60     /* basic(cp) tests whether cp is a basic code point: */
     61     private static boolean basic(int cp){
     62         return (char)(cp) < 0x80;
     63     }
     64 
     65     /* delim(cp) tests whether cp is a delimiter: */
     66     private static boolean delim(int cp){
     67         return ((cp) == delimiter);
     68     }
     69 
     70     /* decode_digit(cp) returns the numeric value of a basic code */
     71     /* point (for use in representing integers) in the range 0 to */
     72     /* base-1, or base if cp is does not represent a value.       */
     73 
     74     private static int decode_digit(int cp)
     75     {
     76       return  cp - 48 < 10 ? cp - 22 :  cp - 65 < 26 ? cp - 65 :
     77               cp - 97 < 26 ? cp - 97 :  base;
     78     }
     79 
     80     /* encode_digit(d,flag) returns the basic code point whose value      */
     81     /* (when used for representing integers) is d, which needs to be in   */
     82     /* the range 0 to base-1.  The lowercase form is used unless flag is  */
     83     /* nonzero, in which case the uppercase form is used.  The behavior   */
     84     /* is undefined if flag is nonzero and digit d has no uppercase form. */
     85 
     86     private static char encode_digit(int d, int flag)
     87     {
     88       return (char) (d + 22 + (75 * ((d < 26) ? 1 : 0) - (((flag != 0) ? 1 :0) << 5)));
     89       /*  0..25 map to ASCII a..z or A..Z */
     90       /* 26..35 map to ASCII 0..9         */
     91     }
     92 
     93     /* flagged(bcp) tests whether a basic code point is flagged */
     94     /* (uppercase).  The behavior is undefined if bcp is not a  */
     95     /* basic code point.                                        */
     96 
     97     private static boolean flagged(int bcp){
     98          return ((bcp) - 65 < 26);
     99     }
    100 
    101     /* encode_basic(bcp,flag) forces a basic code point to lowercase */
    102     /* if flag is zero, uppercase if flag is nonzero, and returns    */
    103     /* the resulting code point.  The code point is unchanged if it  */
    104     /* is caseless.  The behavior is undefined if bcp is not a basic */
    105     /* code point.                                                   */
    106 
    107     private static char encode_basic(int bcp, int flag)
    108     {
    109       bcp -= (((bcp - 97) < 26) ? 1 :0 ) << 5;
    110       boolean mybcp = (bcp - 65 < 26);
    111       return (char) (bcp + (((flag==0) && mybcp ) ? 1 : 0 ) << 5);
    112     }
    113 
    114     /*** Platform-specific constants ***/
    115 
    116     /* maxint is the maximum value of a punycode_uint variable: */
    117     private static long maxint = 0xFFFFFFFFL;
    118     /* Because maxint is unsigned, -1 becomes the maximum value. */
    119 
    120     /*** Bias adaptation function ***/
    121 
    122     private static int adapt(int delta, int numpoints, boolean firsttime ){
    123       int k;
    124 
    125       delta = (firsttime==true) ? delta / damp : delta >> 1;
    126       /* delta >> 1 is a faster way of doing delta / 2 */
    127       delta += delta / numpoints;
    128 
    129       for (k = 0;  delta > ((base - tmin) * tmax) / 2;  k += base) {
    130         delta /= base - tmin;
    131       }
    132 
    133       return k + (base - tmin + 1) * delta / (delta + skew);
    134     }
    135 
    136     /*** Main encode function ***/
    137 
    138     public static final int encode(   int input_length,
    139                                       int input[],
    140                                       char[] case_flags,
    141                                       int[] output_length,
    142                                       char output[] ){
    143       int delta, h, b, out, max_out, bias, j, q, k, t;
    144       long m,n;
    145       /* Initialize the state: */
    146 
    147       n = initial_n;
    148       delta = out = 0;
    149       max_out = output_length[0];
    150       bias = initial_bias;
    151 
    152       /* Handle the basic code points: */
    153 
    154       for (j = 0;  j < input_length;  ++j) {
    155         if (basic(input[j])) {
    156           if (max_out - out < 2) return punycode_big_output;
    157           output[out++] = (char)
    158             (case_flags!=null ?  encode_basic(input[j], case_flags[j]) : input[j]);
    159         }
    160         /* else if (input[j] < n) return punycode_bad_input; */
    161         /* (not needed for Punycode with unsigned code points) */
    162       }
    163 
    164       h = b = out;
    165 
    166       /* h is the number of code points that have been handled, b is the  */
    167       /* number of basic code points, and out is the number of characters */
    168       /* that have been output.                                           */
    169 
    170       if (b > 0) output[out++] = delimiter;
    171 
    172       /* Main encoding loop: */
    173 
    174       while (h < input_length) {
    175         /* All non-basic code points < n have been     */
    176         /* handled already.  Find the next larger one: */
    177 
    178         for (m = maxint, j = 0;  j < input_length;  ++j) {
    179           /* if (basic(input[j])) continue; */
    180           /* (not needed for Punycode) */
    181           if (input[j] >= n && input[j] < m) m = input[j];
    182         }
    183 
    184         /* Increase delta enough to advance the decoder's    */
    185         /* <n,i> state to <m,0>, but guard against overflow: */
    186 
    187         if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow;
    188         delta += (m - n) * (h + 1);
    189         n = m;
    190 
    191         for (j = 0;  j < input_length;  ++j) {
    192           /* Punycode does not need to check whether input[j] is basic: */
    193           if (input[j] < n /* || basic(input[j]) */ ) {
    194             if (++delta == 0) return punycode_overflow;
    195           }
    196 
    197           if (input[j] == n) {
    198             /* Represent delta as a generalized variable-length integer: */
    199 
    200             for (q = delta, k = base;  ;  k += base) {
    201               if (out >= max_out) return punycode_big_output;
    202               t = k <= bias /* + tmin */ ? tmin :     /* +tmin not needed */
    203                   k >= bias + tmax ? tmax : k - bias;
    204               if (q < t) break;
    205               output[out++] = encode_digit(t + (q - t) % (base - t), 0);
    206               q = (q - t) / (base - t);
    207             }
    208 
    209             output[out++] = encode_digit(q, (case_flags !=null) ? case_flags[j] : 0);
    210             bias = adapt(delta, h + 1, (h == b));
    211             delta = 0;
    212             ++h;
    213           }
    214         }
    215 
    216         ++delta;
    217         ++n;
    218       }
    219 
    220       output_length[0] = out;
    221       return punycode_success;
    222     }
    223 
    224     public static final StringBuffer encode(StringBuffer input,char[] case_flags)
    225                                throws StringPrepParseException{
    226         int[] in = new int[input.length()];
    227         int inLen = 0;
    228         int ch;
    229         StringBuffer result = new StringBuffer();
    230         UCharacterIterator iter = UCharacterIterator.getInstance(input);
    231         while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
    232             in[inLen++]=ch;
    233         }
    234 
    235         int[] outLen =  new int[1];
    236         outLen[0] = input.length()*4;
    237         char[] output = new char[outLen[0]];
    238         int rc = punycode_success;
    239         for(;;){
    240             rc = encode(inLen,in,case_flags, outLen, output);
    241             if(rc==punycode_big_output){
    242                 outLen[0] = outLen[0]*4;
    243                 output = new char[outLen[0]];
    244                 // continue to convert
    245                 continue;
    246             }
    247             break;
    248         }
    249         if(rc==punycode_success){
    250             return result.append(output,0,outLen[0]);
    251         }
    252         getException(rc);
    253         return result;
    254     }
    255 
    256     private static void getException(int rc)
    257                    throws StringPrepParseException{
    258          switch(rc){
    259              case punycode_big_output:
    260                 throw new StringPrepParseException("The output capacity was not sufficient.",StringPrepParseException.BUFFER_OVERFLOW_ERROR);
    261              case punycode_bad_input:
    262                 throw new StringPrepParseException("Illegal char found in the input",StringPrepParseException.ILLEGAL_CHAR_FOUND);
    263              case punycode_overflow:
    264                 throw new StringPrepParseException("Invalid char found in the input",StringPrepParseException.INVALID_CHAR_FOUND);
    265          }
    266 
    267     }
    268     private static final int MAX_BUFFER_SIZE = 100;
    269 
    270     public static final StringBuffer decode(StringBuffer input,char[] case_flags)
    271                                throws StringPrepParseException{
    272         char[] in = input.toString().toCharArray();
    273         int[] outLen = new int[1];
    274         outLen[0] = MAX_BUFFER_SIZE;
    275         int[] output = new int[outLen[0]];
    276         int rc = punycode_success;
    277         StringBuffer result = new StringBuffer();
    278         for(;;){
    279             rc = decode(input.length(),in, outLen, output,case_flags);
    280             if(rc==punycode_big_output){
    281                 outLen[0] = output.length * 4;
    282                 output = new int[outLen[0]];
    283                 continue;
    284             }
    285             break;
    286         }
    287         if(rc==punycode_success){
    288             for(int i=0; i < outLen[0]; i++ ){
    289                 UTF16.append(result,output[i]);
    290             }
    291         }else{
    292             getException(rc);
    293         }
    294         return result;
    295     }
    296 
    297     /*** Main decode function ***/
    298     public static final int decode(int input_length,
    299                              char[] input,
    300                              int[] output_length,
    301                              int[] output,
    302                              char[] case_flags ){
    303       int n, out, i, max_out, bias,
    304                      b, j, in, oldi, w, k, digit, t;
    305 
    306       /* Initialize the state: */
    307 
    308       n = initial_n;
    309       out = i = 0;
    310       max_out = output_length[0];
    311       bias = initial_bias;
    312 
    313       /* Handle the basic code points:  Let b be the number of input code */
    314       /* points before the last delimiter, or 0 if there is none, then    */
    315       /* copy the first b code points to the output.                      */
    316 
    317       for (b = j = 0;  j < input_length;  ++j){
    318            if (delim(input[j])==true){
    319                 b = j;
    320            }
    321       }
    322       if (b > max_out) return punycode_big_output;
    323 
    324       for (j = 0;  j < b;  ++j) {
    325         if (case_flags != null) case_flags[out] = (char)(flagged(input[j]) ? 1 : 0);
    326         if (!basic(input[j])) return punycode_bad_input;
    327         output[out++] = input[j];
    328       }
    329 
    330       /* Main decoding loop:  Start just after the last delimiter if any  */
    331       /* basic code points were copied; start at the beginning otherwise. */
    332 
    333       for (in = b > 0 ? b + 1 : 0;  in < input_length;  ++out) {
    334 
    335         /* in is the index of the next character to be consumed, and */
    336         /* out is the number of code points in the output array.     */
    337 
    338         /* Decode a generalized variable-length integer into delta,  */
    339         /* which gets added to i.  The overflow checking is easier   */
    340         /* if we increase i as we go, then subtract off its starting */
    341         /* value at the end to obtain delta.                         */
    342 
    343         for (oldi = i, w = 1, k = base;  ;  k += base) {
    344           if (in >= input_length) return punycode_bad_input;
    345           digit = decode_digit(input[in++]);
    346           if (digit >= base) return punycode_bad_input;
    347           if (digit > (maxint - i) / w) return punycode_overflow;
    348           i += digit * w;
    349           t = (k <= bias) /* + tmin */ ? tmin :     /* +tmin not needed */
    350               (k >= (bias + tmax)) ? tmax : k - bias;
    351           if (digit < t) break;
    352           if (w > maxint / (base - t)) return punycode_overflow;
    353           w *= (base - t);
    354         }
    355 
    356         bias = adapt(i - oldi, out + 1, (oldi == 0));
    357 
    358         /* i was supposed to wrap around from out+1 to 0,   */
    359         /* incrementing n each time, so we'll fix that now: */
    360 
    361         if (i / (out + 1) > maxint - n) return punycode_overflow;
    362         n += i / (out + 1);
    363         i %= (out + 1);
    364 
    365         /* Insert n at position i of the output: */
    366 
    367         /* not needed for Punycode: */
    368         /* if (decode_digit(n) <= base) return punycode_invalid_input; */
    369         if (out >= max_out) return punycode_big_output;
    370 
    371         if (case_flags != null) {
    372           System.arraycopy(case_flags, i, case_flags,  i + 1, out - i);
    373           /* Case of last character determines uppercase flag: */
    374           case_flags[i] = (char)(flagged(input[in - 1]) ? 0 :1);
    375         }
    376 
    377         System.arraycopy(output, i, output, i + 1,  (out - i));
    378         output[i++] = n;
    379       }
    380 
    381       output_length[0] = out;
    382       return punycode_success;
    383     }
    384 
    385 }
    386