1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // endsWithCSSKeyword reports whether b ends with an ident that 15 // case-insensitively matches the lower-case kw. 16 func endsWithCSSKeyword(b []byte, kw string) bool { 17 i := len(b) - len(kw) 18 if i < 0 { 19 // Too short. 20 return false 21 } 22 if i != 0 { 23 r, _ := utf8.DecodeLastRune(b[:i]) 24 if isCSSNmchar(r) { 25 // Too long. 26 return false 27 } 28 } 29 // Many CSS keywords, such as "!important" can have characters encoded, 30 // but the URI production does not allow that according to 31 // http://www.w3.org/TR/css3-syntax/#TOK-URI 32 // This does not attempt to recognize encoded keywords. For example, 33 // given "\75\72\6c" and "url" this return false. 34 return string(bytes.ToLower(b[i:])) == kw 35 } 36 37 // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier. 38 func isCSSNmchar(r rune) bool { 39 // Based on the CSS3 nmchar production but ignores multi-rune escape 40 // sequences. 41 // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar 42 return 'a' <= r && r <= 'z' || 43 'A' <= r && r <= 'Z' || 44 '0' <= r && r <= '9' || 45 r == '-' || 46 r == '_' || 47 // Non-ASCII cases below. 48 0x80 <= r && r <= 0xd7ff || 49 0xe000 <= r && r <= 0xfffd || 50 0x10000 <= r && r <= 0x10ffff 51 } 52 53 // decodeCSS decodes CSS3 escapes given a sequence of stringchars. 54 // If there is no change, it returns the input, otherwise it returns a slice 55 // backed by a new array. 56 // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar. 57 func decodeCSS(s []byte) []byte { 58 i := bytes.IndexByte(s, '\\') 59 if i == -1 { 60 return s 61 } 62 // The UTF-8 sequence for a codepoint is never longer than 1 + the 63 // number hex digits need to represent that codepoint, so len(s) is an 64 // upper bound on the output length. 65 b := make([]byte, 0, len(s)) 66 for len(s) != 0 { 67 i := bytes.IndexByte(s, '\\') 68 if i == -1 { 69 i = len(s) 70 } 71 b, s = append(b, s[:i]...), s[i:] 72 if len(s) < 2 { 73 break 74 } 75 // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape 76 // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] 77 if isHex(s[1]) { 78 // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode 79 // unicode ::= '\' [0-9a-fA-F]{1,6} wc? 80 j := 2 81 for j < len(s) && j < 7 && isHex(s[j]) { 82 j++ 83 } 84 r := hexDecode(s[1:j]) 85 if r > unicode.MaxRune { 86 r, j = r/16, j-1 87 } 88 n := utf8.EncodeRune(b[len(b):cap(b)], r) 89 // The optional space at the end allows a hex 90 // sequence to be followed by a literal hex. 91 // string(decodeCSS([]byte(`\A B`))) == "\nB" 92 b, s = b[:len(b)+n], skipCSSSpace(s[j:]) 93 } else { 94 // `\\` decodes to `\` and `\"` to `"`. 95 _, n := utf8.DecodeRune(s[1:]) 96 b, s = append(b, s[1:1+n]...), s[1+n:] 97 } 98 } 99 return b 100 } 101 102 // isHex reports whether the given character is a hex digit. 103 func isHex(c byte) bool { 104 return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' 105 } 106 107 // hexDecode decodes a short hex digit sequence: "10" -> 16. 108 func hexDecode(s []byte) rune { 109 n := '\x00' 110 for _, c := range s { 111 n <<= 4 112 switch { 113 case '0' <= c && c <= '9': 114 n |= rune(c - '0') 115 case 'a' <= c && c <= 'f': 116 n |= rune(c-'a') + 10 117 case 'A' <= c && c <= 'F': 118 n |= rune(c-'A') + 10 119 default: 120 panic(fmt.Sprintf("Bad hex digit in %q", s)) 121 } 122 } 123 return n 124 } 125 126 // skipCSSSpace returns a suffix of c, skipping over a single space. 127 func skipCSSSpace(c []byte) []byte { 128 if len(c) == 0 { 129 return c 130 } 131 // wc ::= #x9 | #xA | #xC | #xD | #x20 132 switch c[0] { 133 case '\t', '\n', '\f', ' ': 134 return c[1:] 135 case '\r': 136 // This differs from CSS3's wc production because it contains a 137 // probable spec error whereby wc contains all the single byte 138 // sequences in nl (newline) but not CRLF. 139 if len(c) >= 2 && c[1] == '\n' { 140 return c[2:] 141 } 142 return c[1:] 143 } 144 return c 145 } 146 147 // isCSSSpace reports whether b is a CSS space char as defined in wc. 148 func isCSSSpace(b byte) bool { 149 switch b { 150 case '\t', '\n', '\f', '\r', ' ': 151 return true 152 } 153 return false 154 } 155 156 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes. 157 func cssEscaper(args ...interface{}) string { 158 s, _ := stringify(args...) 159 var b bytes.Buffer 160 r, w, written := rune(0), 0, 0 161 for i := 0; i < len(s); i += w { 162 // See comment in htmlEscaper. 163 r, w = utf8.DecodeRuneInString(s[i:]) 164 var repl string 165 switch { 166 case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "": 167 repl = cssReplacementTable[r] 168 default: 169 continue 170 } 171 b.WriteString(s[written:i]) 172 b.WriteString(repl) 173 written = i + w 174 if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) { 175 b.WriteByte(' ') 176 } 177 } 178 if written == 0 { 179 return s 180 } 181 b.WriteString(s[written:]) 182 return b.String() 183 } 184 185 var cssReplacementTable = []string{ 186 0: `\0`, 187 '\t': `\9`, 188 '\n': `\a`, 189 '\f': `\c`, 190 '\r': `\d`, 191 // Encode HTML specials as hex so the output can be embedded 192 // in HTML attributes without further encoding. 193 '"': `\22`, 194 '&': `\26`, 195 '\'': `\27`, 196 '(': `\28`, 197 ')': `\29`, 198 '+': `\2b`, 199 '/': `\2f`, 200 ':': `\3a`, 201 ';': `\3b`, 202 '<': `\3c`, 203 '>': `\3e`, 204 '\\': `\\`, 205 '{': `\7b`, 206 '}': `\7d`, 207 } 208 209 var expressionBytes = []byte("expression") 210 var mozBindingBytes = []byte("mozbinding") 211 212 // cssValueFilter allows innocuous CSS values in the output including CSS 213 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values 214 // (inherit, blue), and colors (#888). 215 // It filters out unsafe values, such as those that affect token boundaries, 216 // and anything that might execute scripts. 217 func cssValueFilter(args ...interface{}) string { 218 s, t := stringify(args...) 219 if t == contentTypeCSS { 220 return s 221 } 222 b, id := decodeCSS([]byte(s)), make([]byte, 0, 64) 223 224 // CSS3 error handling is specified as honoring string boundaries per 225 // http://www.w3.org/TR/css3-syntax/#error-handling : 226 // Malformed declarations. User agents must handle unexpected 227 // tokens encountered while parsing a declaration by reading until 228 // the end of the declaration, while observing the rules for 229 // matching pairs of (), [], {}, "", and '', and correctly handling 230 // escapes. For example, a malformed declaration may be missing a 231 // property, colon (:) or value. 232 // So we need to make sure that values do not have mismatched bracket 233 // or quote characters to prevent the browser from restarting parsing 234 // inside a string that might embed JavaScript source. 235 for i, c := range b { 236 switch c { 237 case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}': 238 return filterFailsafe 239 case '-': 240 // Disallow <!-- or -->. 241 // -- should not appear in valid identifiers. 242 if i != 0 && b[i-1] == '-' { 243 return filterFailsafe 244 } 245 default: 246 if c < 0x80 && isCSSNmchar(rune(c)) { 247 id = append(id, c) 248 } 249 } 250 } 251 id = bytes.ToLower(id) 252 if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 { 253 return filterFailsafe 254 } 255 return string(b) 256 } 257