1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2003-2007, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11 /* 12 * 13 Disclaimer and license 14 15 Regarding this entire document or any portion of it (including 16 the pseudocode and C code), the author makes no guarantees and 17 is not responsible for any damage resulting from its use. The 18 author grants irrevocable permission to anyone to use, modify, 19 and distribute it in any way that does not diminish the rights 20 of anyone else to use, modify, and distribute it, provided that 21 redistributed derivative works do not contain misleading author or 22 version information. Derivative works need not be licensed under 23 similar terms. 24 25 punycode.c 0.4.0 (2001-Nov-17-Sat) 26 http://www.cs.berkeley.edu/~amc/idn/ 27 Adam M. Costello 28 http://www.nicemice.net/amc/ 29 */ 30 31 package android.icu.dev.test.stringprep; 32 import android.icu.text.StringPrepParseException; 33 import android.icu.text.UCharacterIterator; 34 import android.icu.text.UTF16; 35 36 /** 37 * The implementation is direct port of C code in the RFC 38 */ 39 40 public final class PunycodeReference { 41 /*** punycode status codes */ 42 public static final int punycode_success=0; 43 public static final int punycode_bad_input=1; /* Input is invalid. */ 44 public static final int punycode_big_output=2; /* Output would exceed the space provided. */ 45 public static final int punycode_overflow =3; /* Input needs wider integers to process. */ 46 47 /*** Bootstring parameters for Punycode ***/ 48 private static final int base = 36; 49 private static final int tmin = 1; 50 private static final int tmax = 26; 51 private static final int skew = 38; 52 private static final int damp = 700; 53 private static final int initial_bias = 72; 54 private static final int initial_n = 0x80; 55 private static final int delimiter = 0x2D; 56 57 58 // private static final long UNSIGNED_INT_MASK = 0xffffffffL; 59 60 /* basic(cp) tests whether cp is a basic code point: */ 61 private static boolean basic(int cp){ 62 return (char)(cp) < 0x80; 63 } 64 65 /* delim(cp) tests whether cp is a delimiter: */ 66 private static boolean delim(int cp){ 67 return ((cp) == delimiter); 68 } 69 70 /* decode_digit(cp) returns the numeric value of a basic code */ 71 /* point (for use in representing integers) in the range 0 to */ 72 /* base-1, or base if cp is does not represent a value. */ 73 74 private static int decode_digit(int cp) 75 { 76 return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : 77 cp - 97 < 26 ? cp - 97 : base; 78 } 79 80 /* encode_digit(d,flag) returns the basic code point whose value */ 81 /* (when used for representing integers) is d, which needs to be in */ 82 /* the range 0 to base-1. The lowercase form is used unless flag is */ 83 /* nonzero, in which case the uppercase form is used. The behavior */ 84 /* is undefined if flag is nonzero and digit d has no uppercase form. */ 85 86 private static char encode_digit(int d, int flag) 87 { 88 return (char) (d + 22 + (75 * ((d < 26) ? 1 : 0) - (((flag != 0) ? 1 :0) << 5))); 89 /* 0..25 map to ASCII a..z or A..Z */ 90 /* 26..35 map to ASCII 0..9 */ 91 } 92 93 /* flagged(bcp) tests whether a basic code point is flagged */ 94 /* (uppercase). The behavior is undefined if bcp is not a */ 95 /* basic code point. */ 96 97 private static boolean flagged(int bcp){ 98 return ((bcp) - 65 < 26); 99 } 100 101 /* encode_basic(bcp,flag) forces a basic code point to lowercase */ 102 /* if flag is zero, uppercase if flag is nonzero, and returns */ 103 /* the resulting code point. The code point is unchanged if it */ 104 /* is caseless. The behavior is undefined if bcp is not a basic */ 105 /* code point. */ 106 107 private static char encode_basic(int bcp, int flag) 108 { 109 bcp -= (((bcp - 97) < 26) ? 1 :0 ) << 5; 110 boolean mybcp = (bcp - 65 < 26); 111 return (char) (bcp + (((flag==0) && mybcp ) ? 1 : 0 ) << 5); 112 } 113 114 /*** Platform-specific constants ***/ 115 116 /* maxint is the maximum value of a punycode_uint variable: */ 117 private static long maxint = 0xFFFFFFFFL; 118 /* Because maxint is unsigned, -1 becomes the maximum value. */ 119 120 /*** Bias adaptation function ***/ 121 122 private static int adapt(int delta, int numpoints, boolean firsttime ){ 123 int k; 124 125 delta = (firsttime==true) ? delta / damp : delta >> 1; 126 /* delta >> 1 is a faster way of doing delta / 2 */ 127 delta += delta / numpoints; 128 129 for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) { 130 delta /= base - tmin; 131 } 132 133 return k + (base - tmin + 1) * delta / (delta + skew); 134 } 135 136 /*** Main encode function ***/ 137 138 public static final int encode( int input_length, 139 int input[], 140 char[] case_flags, 141 int[] output_length, 142 char output[] ){ 143 int delta, h, b, out, max_out, bias, j, q, k, t; 144 long m,n; 145 /* Initialize the state: */ 146 147 n = initial_n; 148 delta = out = 0; 149 max_out = output_length[0]; 150 bias = initial_bias; 151 152 /* Handle the basic code points: */ 153 154 for (j = 0; j < input_length; ++j) { 155 if (basic(input[j])) { 156 if (max_out - out < 2) return punycode_big_output; 157 output[out++] = (char) 158 (case_flags!=null ? encode_basic(input[j], case_flags[j]) : input[j]); 159 } 160 /* else if (input[j] < n) return punycode_bad_input; */ 161 /* (not needed for Punycode with unsigned code points) */ 162 } 163 164 h = b = out; 165 166 /* h is the number of code points that have been handled, b is the */ 167 /* number of basic code points, and out is the number of characters */ 168 /* that have been output. */ 169 170 if (b > 0) output[out++] = delimiter; 171 172 /* Main encoding loop: */ 173 174 while (h < input_length) { 175 /* All non-basic code points < n have been */ 176 /* handled already. Find the next larger one: */ 177 178 for (m = maxint, j = 0; j < input_length; ++j) { 179 /* if (basic(input[j])) continue; */ 180 /* (not needed for Punycode) */ 181 if (input[j] >= n && input[j] < m) m = input[j]; 182 } 183 184 /* Increase delta enough to advance the decoder's */ 185 /* <n,i> state to <m,0>, but guard against overflow: */ 186 187 if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow; 188 delta += (m - n) * (h + 1); 189 n = m; 190 191 for (j = 0; j < input_length; ++j) { 192 /* Punycode does not need to check whether input[j] is basic: */ 193 if (input[j] < n /* || basic(input[j]) */ ) { 194 if (++delta == 0) return punycode_overflow; 195 } 196 197 if (input[j] == n) { 198 /* Represent delta as a generalized variable-length integer: */ 199 200 for (q = delta, k = base; ; k += base) { 201 if (out >= max_out) return punycode_big_output; 202 t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ 203 k >= bias + tmax ? tmax : k - bias; 204 if (q < t) break; 205 output[out++] = encode_digit(t + (q - t) % (base - t), 0); 206 q = (q - t) / (base - t); 207 } 208 209 output[out++] = encode_digit(q, (case_flags !=null) ? case_flags[j] : 0); 210 bias = adapt(delta, h + 1, (h == b)); 211 delta = 0; 212 ++h; 213 } 214 } 215 216 ++delta; 217 ++n; 218 } 219 220 output_length[0] = out; 221 return punycode_success; 222 } 223 224 public static final StringBuffer encode(StringBuffer input,char[] case_flags) 225 throws StringPrepParseException{ 226 int[] in = new int[input.length()]; 227 int inLen = 0; 228 int ch; 229 StringBuffer result = new StringBuffer(); 230 UCharacterIterator iter = UCharacterIterator.getInstance(input); 231 while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){ 232 in[inLen++]=ch; 233 } 234 235 int[] outLen = new int[1]; 236 outLen[0] = input.length()*4; 237 char[] output = new char[outLen[0]]; 238 int rc = punycode_success; 239 for(;;){ 240 rc = encode(inLen,in,case_flags, outLen, output); 241 if(rc==punycode_big_output){ 242 outLen[0] = outLen[0]*4; 243 output = new char[outLen[0]]; 244 // continue to convert 245 continue; 246 } 247 break; 248 } 249 if(rc==punycode_success){ 250 return result.append(output,0,outLen[0]); 251 } 252 getException(rc); 253 return result; 254 } 255 256 private static void getException(int rc) 257 throws StringPrepParseException{ 258 switch(rc){ 259 case punycode_big_output: 260 throw new StringPrepParseException("The output capacity was not sufficient.",StringPrepParseException.BUFFER_OVERFLOW_ERROR); 261 case punycode_bad_input: 262 throw new StringPrepParseException("Illegal char found in the input",StringPrepParseException.ILLEGAL_CHAR_FOUND); 263 case punycode_overflow: 264 throw new StringPrepParseException("Invalid char found in the input",StringPrepParseException.INVALID_CHAR_FOUND); 265 } 266 267 } 268 private static final int MAX_BUFFER_SIZE = 100; 269 270 public static final StringBuffer decode(StringBuffer input,char[] case_flags) 271 throws StringPrepParseException{ 272 char[] in = input.toString().toCharArray(); 273 int[] outLen = new int[1]; 274 outLen[0] = MAX_BUFFER_SIZE; 275 int[] output = new int[outLen[0]]; 276 int rc = punycode_success; 277 StringBuffer result = new StringBuffer(); 278 for(;;){ 279 rc = decode(input.length(),in, outLen, output,case_flags); 280 if(rc==punycode_big_output){ 281 outLen[0] = output.length * 4; 282 output = new int[outLen[0]]; 283 continue; 284 } 285 break; 286 } 287 if(rc==punycode_success){ 288 for(int i=0; i < outLen[0]; i++ ){ 289 UTF16.append(result,output[i]); 290 } 291 }else{ 292 getException(rc); 293 } 294 return result; 295 } 296 297 /*** Main decode function ***/ 298 public static final int decode(int input_length, 299 char[] input, 300 int[] output_length, 301 int[] output, 302 char[] case_flags ){ 303 int n, out, i, max_out, bias, 304 b, j, in, oldi, w, k, digit, t; 305 306 /* Initialize the state: */ 307 308 n = initial_n; 309 out = i = 0; 310 max_out = output_length[0]; 311 bias = initial_bias; 312 313 /* Handle the basic code points: Let b be the number of input code */ 314 /* points before the last delimiter, or 0 if there is none, then */ 315 /* copy the first b code points to the output. */ 316 317 for (b = j = 0; j < input_length; ++j){ 318 if (delim(input[j])==true){ 319 b = j; 320 } 321 } 322 if (b > max_out) return punycode_big_output; 323 324 for (j = 0; j < b; ++j) { 325 if (case_flags != null) case_flags[out] = (char)(flagged(input[j]) ? 1 : 0); 326 if (!basic(input[j])) return punycode_bad_input; 327 output[out++] = input[j]; 328 } 329 330 /* Main decoding loop: Start just after the last delimiter if any */ 331 /* basic code points were copied; start at the beginning otherwise. */ 332 333 for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) { 334 335 /* in is the index of the next character to be consumed, and */ 336 /* out is the number of code points in the output array. */ 337 338 /* Decode a generalized variable-length integer into delta, */ 339 /* which gets added to i. The overflow checking is easier */ 340 /* if we increase i as we go, then subtract off its starting */ 341 /* value at the end to obtain delta. */ 342 343 for (oldi = i, w = 1, k = base; ; k += base) { 344 if (in >= input_length) return punycode_bad_input; 345 digit = decode_digit(input[in++]); 346 if (digit >= base) return punycode_bad_input; 347 if (digit > (maxint - i) / w) return punycode_overflow; 348 i += digit * w; 349 t = (k <= bias) /* + tmin */ ? tmin : /* +tmin not needed */ 350 (k >= (bias + tmax)) ? tmax : k - bias; 351 if (digit < t) break; 352 if (w > maxint / (base - t)) return punycode_overflow; 353 w *= (base - t); 354 } 355 356 bias = adapt(i - oldi, out + 1, (oldi == 0)); 357 358 /* i was supposed to wrap around from out+1 to 0, */ 359 /* incrementing n each time, so we'll fix that now: */ 360 361 if (i / (out + 1) > maxint - n) return punycode_overflow; 362 n += i / (out + 1); 363 i %= (out + 1); 364 365 /* Insert n at position i of the output: */ 366 367 /* not needed for Punycode: */ 368 /* if (decode_digit(n) <= base) return punycode_invalid_input; */ 369 if (out >= max_out) return punycode_big_output; 370 371 if (case_flags != null) { 372 System.arraycopy(case_flags, i, case_flags, i + 1, out - i); 373 /* Case of last character determines uppercase flag: */ 374 case_flags[i] = (char)(flagged(input[in - 1]) ? 0 :1); 375 } 376 377 System.arraycopy(output, i, output, i + 1, (out - i)); 378 output[i++] = n; 379 } 380 381 output_length[0] = out; 382 return punycode_success; 383 } 384 385 } 386