Home | History | Annotate | Download | only in idna
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package idna
      6 
      7 // This file implements the Punycode algorithm from RFC 3492.
      8 
      9 import (
     10 	"fmt"
     11 	"math"
     12 	"strings"
     13 	"unicode/utf8"
     14 )
     15 
     16 // These parameter values are specified in section 5.
     17 //
     18 // All computation is done with int32s, so that overflow behavior is identical
     19 // regardless of whether int is 32-bit or 64-bit.
     20 const (
     21 	base        int32 = 36
     22 	damp        int32 = 700
     23 	initialBias int32 = 72
     24 	initialN    int32 = 128
     25 	skew        int32 = 38
     26 	tmax        int32 = 26
     27 	tmin        int32 = 1
     28 )
     29 
     30 // decode decodes a string as specified in section 6.2.
     31 func decode(encoded string) (string, error) {
     32 	if encoded == "" {
     33 		return "", nil
     34 	}
     35 	pos := 1 + strings.LastIndex(encoded, "-")
     36 	if pos == 1 {
     37 		return "", fmt.Errorf("idna: invalid label %q", encoded)
     38 	}
     39 	if pos == len(encoded) {
     40 		return encoded[:len(encoded)-1], nil
     41 	}
     42 	output := make([]rune, 0, len(encoded))
     43 	if pos != 0 {
     44 		for _, r := range encoded[:pos-1] {
     45 			output = append(output, r)
     46 		}
     47 	}
     48 	i, n, bias := int32(0), initialN, initialBias
     49 	for pos < len(encoded) {
     50 		oldI, w := i, int32(1)
     51 		for k := base; ; k += base {
     52 			if pos == len(encoded) {
     53 				return "", fmt.Errorf("idna: invalid label %q", encoded)
     54 			}
     55 			digit, ok := decodeDigit(encoded[pos])
     56 			if !ok {
     57 				return "", fmt.Errorf("idna: invalid label %q", encoded)
     58 			}
     59 			pos++
     60 			i += digit * w
     61 			if i < 0 {
     62 				return "", fmt.Errorf("idna: invalid label %q", encoded)
     63 			}
     64 			t := k - bias
     65 			if t < tmin {
     66 				t = tmin
     67 			} else if t > tmax {
     68 				t = tmax
     69 			}
     70 			if digit < t {
     71 				break
     72 			}
     73 			w *= base - t
     74 			if w >= math.MaxInt32/base {
     75 				return "", fmt.Errorf("idna: invalid label %q", encoded)
     76 			}
     77 		}
     78 		x := int32(len(output) + 1)
     79 		bias = adapt(i-oldI, x, oldI == 0)
     80 		n += i / x
     81 		i %= x
     82 		if n > utf8.MaxRune || len(output) >= 1024 {
     83 			return "", fmt.Errorf("idna: invalid label %q", encoded)
     84 		}
     85 		output = append(output, 0)
     86 		copy(output[i+1:], output[i:])
     87 		output[i] = n
     88 		i++
     89 	}
     90 	return string(output), nil
     91 }
     92 
     93 // encode encodes a string as specified in section 6.3 and prepends prefix to
     94 // the result.
     95 //
     96 // The "while h < length(input)" line in the specification becomes "for
     97 // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
     98 func encode(prefix, s string) (string, error) {
     99 	output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
    100 	copy(output, prefix)
    101 	delta, n, bias := int32(0), initialN, initialBias
    102 	b, remaining := int32(0), int32(0)
    103 	for _, r := range s {
    104 		if r < 0x80 {
    105 			b++
    106 			output = append(output, byte(r))
    107 		} else {
    108 			remaining++
    109 		}
    110 	}
    111 	h := b
    112 	if b > 0 {
    113 		output = append(output, '-')
    114 	}
    115 	for remaining != 0 {
    116 		m := int32(0x7fffffff)
    117 		for _, r := range s {
    118 			if m > r && r >= n {
    119 				m = r
    120 			}
    121 		}
    122 		delta += (m - n) * (h + 1)
    123 		if delta < 0 {
    124 			return "", fmt.Errorf("idna: invalid label %q", s)
    125 		}
    126 		n = m
    127 		for _, r := range s {
    128 			if r < n {
    129 				delta++
    130 				if delta < 0 {
    131 					return "", fmt.Errorf("idna: invalid label %q", s)
    132 				}
    133 				continue
    134 			}
    135 			if r > n {
    136 				continue
    137 			}
    138 			q := delta
    139 			for k := base; ; k += base {
    140 				t := k - bias
    141 				if t < tmin {
    142 					t = tmin
    143 				} else if t > tmax {
    144 					t = tmax
    145 				}
    146 				if q < t {
    147 					break
    148 				}
    149 				output = append(output, encodeDigit(t+(q-t)%(base-t)))
    150 				q = (q - t) / (base - t)
    151 			}
    152 			output = append(output, encodeDigit(q))
    153 			bias = adapt(delta, h+1, h == b)
    154 			delta = 0
    155 			h++
    156 			remaining--
    157 		}
    158 		delta++
    159 		n++
    160 	}
    161 	return string(output), nil
    162 }
    163 
    164 func decodeDigit(x byte) (digit int32, ok bool) {
    165 	switch {
    166 	case '0' <= x && x <= '9':
    167 		return int32(x - ('0' - 26)), true
    168 	case 'A' <= x && x <= 'Z':
    169 		return int32(x - 'A'), true
    170 	case 'a' <= x && x <= 'z':
    171 		return int32(x - 'a'), true
    172 	}
    173 	return 0, false
    174 }
    175 
    176 func encodeDigit(digit int32) byte {
    177 	switch {
    178 	case 0 <= digit && digit < 26:
    179 		return byte(digit + 'a')
    180 	case 26 <= digit && digit < 36:
    181 		return byte(digit + ('0' - 26))
    182 	}
    183 	panic("idna: internal error in punycode encoding")
    184 }
    185 
    186 // adapt is the bias adaptation function specified in section 6.1.
    187 func adapt(delta, numPoints int32, firstTime bool) int32 {
    188 	if firstTime {
    189 		delta /= damp
    190 	} else {
    191 		delta /= 2
    192 	}
    193 	delta += delta / numPoints
    194 	k := int32(0)
    195 	for delta > ((base-tmin)*tmax)/2 {
    196 		delta /= base - tmin
    197 		k += base
    198 	}
    199 	return k + (base-tmin+1)*delta/(delta+skew)
    200 }
    201