Home | History | Annotate | Download | only in cookiejar
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package cookiejar
      6 
      7 // This file implements the Punycode algorithm from RFC 3492.
      8 
      9 import (
     10 	"fmt"
     11 	"strings"
     12 	"unicode/utf8"
     13 )
     14 
     15 // These parameter values are specified in section 5.
     16 //
     17 // All computation is done with int32s, so that overflow behavior is identical
     18 // regardless of whether int is 32-bit or 64-bit.
     19 const (
     20 	base        int32 = 36
     21 	damp        int32 = 700
     22 	initialBias int32 = 72
     23 	initialN    int32 = 128
     24 	skew        int32 = 38
     25 	tmax        int32 = 26
     26 	tmin        int32 = 1
     27 )
     28 
     29 // encode encodes a string as specified in section 6.3 and prepends prefix to
     30 // the result.
     31 //
     32 // The "while h < length(input)" line in the specification becomes "for
     33 // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
     34 func encode(prefix, s string) (string, error) {
     35 	output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
     36 	copy(output, prefix)
     37 	delta, n, bias := int32(0), initialN, initialBias
     38 	b, remaining := int32(0), int32(0)
     39 	for _, r := range s {
     40 		if r < utf8.RuneSelf {
     41 			b++
     42 			output = append(output, byte(r))
     43 		} else {
     44 			remaining++
     45 		}
     46 	}
     47 	h := b
     48 	if b > 0 {
     49 		output = append(output, '-')
     50 	}
     51 	for remaining != 0 {
     52 		m := int32(0x7fffffff)
     53 		for _, r := range s {
     54 			if m > r && r >= n {
     55 				m = r
     56 			}
     57 		}
     58 		delta += (m - n) * (h + 1)
     59 		if delta < 0 {
     60 			return "", fmt.Errorf("cookiejar: invalid label %q", s)
     61 		}
     62 		n = m
     63 		for _, r := range s {
     64 			if r < n {
     65 				delta++
     66 				if delta < 0 {
     67 					return "", fmt.Errorf("cookiejar: invalid label %q", s)
     68 				}
     69 				continue
     70 			}
     71 			if r > n {
     72 				continue
     73 			}
     74 			q := delta
     75 			for k := base; ; k += base {
     76 				t := k - bias
     77 				if t < tmin {
     78 					t = tmin
     79 				} else if t > tmax {
     80 					t = tmax
     81 				}
     82 				if q < t {
     83 					break
     84 				}
     85 				output = append(output, encodeDigit(t+(q-t)%(base-t)))
     86 				q = (q - t) / (base - t)
     87 			}
     88 			output = append(output, encodeDigit(q))
     89 			bias = adapt(delta, h+1, h == b)
     90 			delta = 0
     91 			h++
     92 			remaining--
     93 		}
     94 		delta++
     95 		n++
     96 	}
     97 	return string(output), nil
     98 }
     99 
    100 func encodeDigit(digit int32) byte {
    101 	switch {
    102 	case 0 <= digit && digit < 26:
    103 		return byte(digit + 'a')
    104 	case 26 <= digit && digit < 36:
    105 		return byte(digit + ('0' - 26))
    106 	}
    107 	panic("cookiejar: internal error in punycode encoding")
    108 }
    109 
    110 // adapt is the bias adaptation function specified in section 6.1.
    111 func adapt(delta, numPoints int32, firstTime bool) int32 {
    112 	if firstTime {
    113 		delta /= damp
    114 	} else {
    115 		delta /= 2
    116 	}
    117 	delta += delta / numPoints
    118 	k := int32(0)
    119 	for delta > ((base-tmin)*tmax)/2 {
    120 		delta /= base - tmin
    121 		k += base
    122 	}
    123 	return k + (base-tmin+1)*delta/(delta+skew)
    124 }
    125 
    126 // Strictly speaking, the remaining code below deals with IDNA (RFC 5890 and
    127 // friends) and not Punycode (RFC 3492) per se.
    128 
    129 // acePrefix is the ASCII Compatible Encoding prefix.
    130 const acePrefix = "xn--"
    131 
    132 // toASCII converts a domain or domain label to its ASCII form. For example,
    133 // toASCII("bcher.example.com") is "xn--bcher-kva.example.com", and
    134 // toASCII("golang") is "golang".
    135 func toASCII(s string) (string, error) {
    136 	if ascii(s) {
    137 		return s, nil
    138 	}
    139 	labels := strings.Split(s, ".")
    140 	for i, label := range labels {
    141 		if !ascii(label) {
    142 			a, err := encode(acePrefix, label)
    143 			if err != nil {
    144 				return "", err
    145 			}
    146 			labels[i] = a
    147 		}
    148 	}
    149 	return strings.Join(labels, "."), nil
    150 }
    151 
    152 func ascii(s string) bool {
    153 	for i := 0; i < len(s); i++ {
    154 		if s[i] >= utf8.RuneSelf {
    155 			return false
    156 		}
    157 	}
    158 	return true
    159 }
    160