Home | History | Annotate | Download | only in template
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package template
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"unicode"
     11 	"unicode/utf8"
     12 )
     13 
     14 // endsWithCSSKeyword reports whether b ends with an ident that
     15 // case-insensitively matches the lower-case kw.
     16 func endsWithCSSKeyword(b []byte, kw string) bool {
     17 	i := len(b) - len(kw)
     18 	if i < 0 {
     19 		// Too short.
     20 		return false
     21 	}
     22 	if i != 0 {
     23 		r, _ := utf8.DecodeLastRune(b[:i])
     24 		if isCSSNmchar(r) {
     25 			// Too long.
     26 			return false
     27 		}
     28 	}
     29 	// Many CSS keywords, such as "!important" can have characters encoded,
     30 	// but the URI production does not allow that according to
     31 	// http://www.w3.org/TR/css3-syntax/#TOK-URI
     32 	// This does not attempt to recognize encoded keywords. For example,
     33 	// given "\75\72\6c" and "url" this return false.
     34 	return string(bytes.ToLower(b[i:])) == kw
     35 }
     36 
     37 // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
     38 func isCSSNmchar(r rune) bool {
     39 	// Based on the CSS3 nmchar production but ignores multi-rune escape
     40 	// sequences.
     41 	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
     42 	return 'a' <= r && r <= 'z' ||
     43 		'A' <= r && r <= 'Z' ||
     44 		'0' <= r && r <= '9' ||
     45 		r == '-' ||
     46 		r == '_' ||
     47 		// Non-ASCII cases below.
     48 		0x80 <= r && r <= 0xd7ff ||
     49 		0xe000 <= r && r <= 0xfffd ||
     50 		0x10000 <= r && r <= 0x10ffff
     51 }
     52 
     53 // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
     54 // If there is no change, it returns the input, otherwise it returns a slice
     55 // backed by a new array.
     56 // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
     57 func decodeCSS(s []byte) []byte {
     58 	i := bytes.IndexByte(s, '\\')
     59 	if i == -1 {
     60 		return s
     61 	}
     62 	// The UTF-8 sequence for a codepoint is never longer than 1 + the
     63 	// number hex digits need to represent that codepoint, so len(s) is an
     64 	// upper bound on the output length.
     65 	b := make([]byte, 0, len(s))
     66 	for len(s) != 0 {
     67 		i := bytes.IndexByte(s, '\\')
     68 		if i == -1 {
     69 			i = len(s)
     70 		}
     71 		b, s = append(b, s[:i]...), s[i:]
     72 		if len(s) < 2 {
     73 			break
     74 		}
     75 		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
     76 		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
     77 		if isHex(s[1]) {
     78 			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
     79 			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
     80 			j := 2
     81 			for j < len(s) && j < 7 && isHex(s[j]) {
     82 				j++
     83 			}
     84 			r := hexDecode(s[1:j])
     85 			if r > unicode.MaxRune {
     86 				r, j = r/16, j-1
     87 			}
     88 			n := utf8.EncodeRune(b[len(b):cap(b)], r)
     89 			// The optional space at the end allows a hex
     90 			// sequence to be followed by a literal hex.
     91 			// string(decodeCSS([]byte(`\A B`))) == "\nB"
     92 			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
     93 		} else {
     94 			// `\\` decodes to `\` and `\"` to `"`.
     95 			_, n := utf8.DecodeRune(s[1:])
     96 			b, s = append(b, s[1:1+n]...), s[1+n:]
     97 		}
     98 	}
     99 	return b
    100 }
    101 
    102 // isHex reports whether the given character is a hex digit.
    103 func isHex(c byte) bool {
    104 	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
    105 }
    106 
    107 // hexDecode decodes a short hex digit sequence: "10" -> 16.
    108 func hexDecode(s []byte) rune {
    109 	n := '\x00'
    110 	for _, c := range s {
    111 		n <<= 4
    112 		switch {
    113 		case '0' <= c && c <= '9':
    114 			n |= rune(c - '0')
    115 		case 'a' <= c && c <= 'f':
    116 			n |= rune(c-'a') + 10
    117 		case 'A' <= c && c <= 'F':
    118 			n |= rune(c-'A') + 10
    119 		default:
    120 			panic(fmt.Sprintf("Bad hex digit in %q", s))
    121 		}
    122 	}
    123 	return n
    124 }
    125 
    126 // skipCSSSpace returns a suffix of c, skipping over a single space.
    127 func skipCSSSpace(c []byte) []byte {
    128 	if len(c) == 0 {
    129 		return c
    130 	}
    131 	// wc ::= #x9 | #xA | #xC | #xD | #x20
    132 	switch c[0] {
    133 	case '\t', '\n', '\f', ' ':
    134 		return c[1:]
    135 	case '\r':
    136 		// This differs from CSS3's wc production because it contains a
    137 		// probable spec error whereby wc contains all the single byte
    138 		// sequences in nl (newline) but not CRLF.
    139 		if len(c) >= 2 && c[1] == '\n' {
    140 			return c[2:]
    141 		}
    142 		return c[1:]
    143 	}
    144 	return c
    145 }
    146 
    147 // isCSSSpace reports whether b is a CSS space char as defined in wc.
    148 func isCSSSpace(b byte) bool {
    149 	switch b {
    150 	case '\t', '\n', '\f', '\r', ' ':
    151 		return true
    152 	}
    153 	return false
    154 }
    155 
    156 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
    157 func cssEscaper(args ...interface{}) string {
    158 	s, _ := stringify(args...)
    159 	var b bytes.Buffer
    160 	r, w, written := rune(0), 0, 0
    161 	for i := 0; i < len(s); i += w {
    162 		// See comment in htmlEscaper.
    163 		r, w = utf8.DecodeRuneInString(s[i:])
    164 		var repl string
    165 		switch {
    166 		case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
    167 			repl = cssReplacementTable[r]
    168 		default:
    169 			continue
    170 		}
    171 		b.WriteString(s[written:i])
    172 		b.WriteString(repl)
    173 		written = i + w
    174 		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
    175 			b.WriteByte(' ')
    176 		}
    177 	}
    178 	if written == 0 {
    179 		return s
    180 	}
    181 	b.WriteString(s[written:])
    182 	return b.String()
    183 }
    184 
    185 var cssReplacementTable = []string{
    186 	0:    `\0`,
    187 	'\t': `\9`,
    188 	'\n': `\a`,
    189 	'\f': `\c`,
    190 	'\r': `\d`,
    191 	// Encode HTML specials as hex so the output can be embedded
    192 	// in HTML attributes without further encoding.
    193 	'"':  `\22`,
    194 	'&':  `\26`,
    195 	'\'': `\27`,
    196 	'(':  `\28`,
    197 	')':  `\29`,
    198 	'+':  `\2b`,
    199 	'/':  `\2f`,
    200 	':':  `\3a`,
    201 	';':  `\3b`,
    202 	'<':  `\3c`,
    203 	'>':  `\3e`,
    204 	'\\': `\\`,
    205 	'{':  `\7b`,
    206 	'}':  `\7d`,
    207 }
    208 
    209 var expressionBytes = []byte("expression")
    210 var mozBindingBytes = []byte("mozbinding")
    211 
    212 // cssValueFilter allows innocuous CSS values in the output including CSS
    213 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
    214 // (inherit, blue), and colors (#888).
    215 // It filters out unsafe values, such as those that affect token boundaries,
    216 // and anything that might execute scripts.
    217 func cssValueFilter(args ...interface{}) string {
    218 	s, t := stringify(args...)
    219 	if t == contentTypeCSS {
    220 		return s
    221 	}
    222 	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
    223 
    224 	// CSS3 error handling is specified as honoring string boundaries per
    225 	// http://www.w3.org/TR/css3-syntax/#error-handling :
    226 	//     Malformed declarations. User agents must handle unexpected
    227 	//     tokens encountered while parsing a declaration by reading until
    228 	//     the end of the declaration, while observing the rules for
    229 	//     matching pairs of (), [], {}, "", and '', and correctly handling
    230 	//     escapes. For example, a malformed declaration may be missing a
    231 	//     property, colon (:) or value.
    232 	// So we need to make sure that values do not have mismatched bracket
    233 	// or quote characters to prevent the browser from restarting parsing
    234 	// inside a string that might embed JavaScript source.
    235 	for i, c := range b {
    236 		switch c {
    237 		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
    238 			return filterFailsafe
    239 		case '-':
    240 			// Disallow <!-- or -->.
    241 			// -- should not appear in valid identifiers.
    242 			if i != 0 && b[i-1] == '-' {
    243 				return filterFailsafe
    244 			}
    245 		default:
    246 			if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
    247 				id = append(id, c)
    248 			}
    249 		}
    250 	}
    251 	id = bytes.ToLower(id)
    252 	if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
    253 		return filterFailsafe
    254 	}
    255 	return string(b)
    256 }
    257