Home | History | Annotate | Download | only in template
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package template
      6 
      7 import (
      8 	"bytes"
      9 	"encoding/json"
     10 	"fmt"
     11 	"reflect"
     12 	"strings"
     13 	"unicode/utf8"
     14 )
     15 
     16 // nextJSCtx returns the context that determines whether a slash after the
     17 // given run of tokens starts a regular expression instead of a division
     18 // operator: / or /=.
     19 //
     20 // This assumes that the token run does not include any string tokens, comment
     21 // tokens, regular expression literal tokens, or division operators.
     22 //
     23 // This fails on some valid but nonsensical JavaScript programs like
     24 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
     25 // fail on any known useful programs. It is based on the draft
     26 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
     27 // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
     28 func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
     29 	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
     30 	if len(s) == 0 {
     31 		return preceding
     32 	}
     33 
     34 	// All cases below are in the single-byte UTF-8 group.
     35 	switch c, n := s[len(s)-1], len(s); c {
     36 	case '+', '-':
     37 		// ++ and -- are not regexp preceders, but + and - are whether
     38 		// they are used as infix or prefix operators.
     39 		start := n - 1
     40 		// Count the number of adjacent dashes or pluses.
     41 		for start > 0 && s[start-1] == c {
     42 			start--
     43 		}
     44 		if (n-start)&1 == 1 {
     45 			// Reached for trailing minus signs since "---" is the
     46 			// same as "-- -".
     47 			return jsCtxRegexp
     48 		}
     49 		return jsCtxDivOp
     50 	case '.':
     51 		// Handle "42."
     52 		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
     53 			return jsCtxDivOp
     54 		}
     55 		return jsCtxRegexp
     56 	// Suffixes for all punctuators from section 7.7 of the language spec
     57 	// that only end binary operators not handled above.
     58 	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
     59 		return jsCtxRegexp
     60 	// Suffixes for all punctuators from section 7.7 of the language spec
     61 	// that are prefix operators not handled above.
     62 	case '!', '~':
     63 		return jsCtxRegexp
     64 	// Matches all the punctuators from section 7.7 of the language spec
     65 	// that are open brackets not handled above.
     66 	case '(', '[':
     67 		return jsCtxRegexp
     68 	// Matches all the punctuators from section 7.7 of the language spec
     69 	// that precede expression starts.
     70 	case ':', ';', '{':
     71 		return jsCtxRegexp
     72 	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
     73 	// are handled in the default except for '}' which can precede a
     74 	// division op as in
     75 	//    ({ valueOf: function () { return 42 } } / 2
     76 	// which is valid, but, in practice, developers don't divide object
     77 	// literals, so our heuristic works well for code like
     78 	//    function () { ... }  /foo/.test(x) && sideEffect();
     79 	// The ')' punctuator can precede a regular expression as in
     80 	//     if (b) /foo/.test(x) && ...
     81 	// but this is much less likely than
     82 	//     (a + b) / c
     83 	case '}':
     84 		return jsCtxRegexp
     85 	default:
     86 		// Look for an IdentifierName and see if it is a keyword that
     87 		// can precede a regular expression.
     88 		j := n
     89 		for j > 0 && isJSIdentPart(rune(s[j-1])) {
     90 			j--
     91 		}
     92 		if regexpPrecederKeywords[string(s[j:])] {
     93 			return jsCtxRegexp
     94 		}
     95 	}
     96 	// Otherwise is a punctuator not listed above, or
     97 	// a string which precedes a div op, or an identifier
     98 	// which precedes a div op.
     99 	return jsCtxDivOp
    100 }
    101 
    102 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
    103 // regular expression in JS source.
    104 var regexpPrecederKeywords = map[string]bool{
    105 	"break":      true,
    106 	"case":       true,
    107 	"continue":   true,
    108 	"delete":     true,
    109 	"do":         true,
    110 	"else":       true,
    111 	"finally":    true,
    112 	"in":         true,
    113 	"instanceof": true,
    114 	"return":     true,
    115 	"throw":      true,
    116 	"try":        true,
    117 	"typeof":     true,
    118 	"void":       true,
    119 }
    120 
    121 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
    122 
    123 // indirectToJSONMarshaler returns the value, after dereferencing as many times
    124 // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
    125 func indirectToJSONMarshaler(a interface{}) interface{} {
    126 	v := reflect.ValueOf(a)
    127 	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
    128 		v = v.Elem()
    129 	}
    130 	return v.Interface()
    131 }
    132 
    133 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
    134 // neither side-effects nor free variables outside (NaN, Infinity).
    135 func jsValEscaper(args ...interface{}) string {
    136 	var a interface{}
    137 	if len(args) == 1 {
    138 		a = indirectToJSONMarshaler(args[0])
    139 		switch t := a.(type) {
    140 		case JS:
    141 			return string(t)
    142 		case JSStr:
    143 			// TODO: normalize quotes.
    144 			return `"` + string(t) + `"`
    145 		case json.Marshaler:
    146 			// Do not treat as a Stringer.
    147 		case fmt.Stringer:
    148 			a = t.String()
    149 		}
    150 	} else {
    151 		for i, arg := range args {
    152 			args[i] = indirectToJSONMarshaler(arg)
    153 		}
    154 		a = fmt.Sprint(args...)
    155 	}
    156 	// TODO: detect cycles before calling Marshal which loops infinitely on
    157 	// cyclic data. This may be an unacceptable DoS risk.
    158 
    159 	b, err := json.Marshal(a)
    160 	if err != nil {
    161 		// Put a space before comment so that if it is flush against
    162 		// a division operator it is not turned into a line comment:
    163 		//     x/{{y}}
    164 		// turning into
    165 		//     x//* error marshaling y:
    166 		//          second line of error message */null
    167 		return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
    168 	}
    169 
    170 	// TODO: maybe post-process output to prevent it from containing
    171 	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
    172 	// in case custom marshalers produce output containing those.
    173 
    174 	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
    175 	if len(b) == 0 {
    176 		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
    177 		// not cause the output `x=y/*z`.
    178 		return " null "
    179 	}
    180 	first, _ := utf8.DecodeRune(b)
    181 	last, _ := utf8.DecodeLastRune(b)
    182 	var buf bytes.Buffer
    183 	// Prevent IdentifierNames and NumericLiterals from running into
    184 	// keywords: in, instanceof, typeof, void
    185 	pad := isJSIdentPart(first) || isJSIdentPart(last)
    186 	if pad {
    187 		buf.WriteByte(' ')
    188 	}
    189 	written := 0
    190 	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
    191 	// so it falls within the subset of JSON which is valid JS.
    192 	for i := 0; i < len(b); {
    193 		rune, n := utf8.DecodeRune(b[i:])
    194 		repl := ""
    195 		if rune == 0x2028 {
    196 			repl = `\u2028`
    197 		} else if rune == 0x2029 {
    198 			repl = `\u2029`
    199 		}
    200 		if repl != "" {
    201 			buf.Write(b[written:i])
    202 			buf.WriteString(repl)
    203 			written = i + n
    204 		}
    205 		i += n
    206 	}
    207 	if buf.Len() != 0 {
    208 		buf.Write(b[written:])
    209 		if pad {
    210 			buf.WriteByte(' ')
    211 		}
    212 		b = buf.Bytes()
    213 	}
    214 	return string(b)
    215 }
    216 
    217 // jsStrEscaper produces a string that can be included between quotes in
    218 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
    219 // or in an HTML5 event handler attribute such as onclick.
    220 func jsStrEscaper(args ...interface{}) string {
    221 	s, t := stringify(args...)
    222 	if t == contentTypeJSStr {
    223 		return replace(s, jsStrNormReplacementTable)
    224 	}
    225 	return replace(s, jsStrReplacementTable)
    226 }
    227 
    228 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
    229 // specials so the result is treated literally when included in a regular
    230 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
    231 // the literal text of {{.X}} followed by the string "bar".
    232 func jsRegexpEscaper(args ...interface{}) string {
    233 	s, _ := stringify(args...)
    234 	s = replace(s, jsRegexpReplacementTable)
    235 	if s == "" {
    236 		// /{{.X}}/ should not produce a line comment when .X == "".
    237 		return "(?:)"
    238 	}
    239 	return s
    240 }
    241 
    242 // replace replaces each rune r of s with replacementTable[r], provided that
    243 // r < len(replacementTable). If replacementTable[r] is the empty string then
    244 // no replacement is made.
    245 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
    246 // `\u2029`.
    247 func replace(s string, replacementTable []string) string {
    248 	var b bytes.Buffer
    249 	r, w, written := rune(0), 0, 0
    250 	for i := 0; i < len(s); i += w {
    251 		// See comment in htmlEscaper.
    252 		r, w = utf8.DecodeRuneInString(s[i:])
    253 		var repl string
    254 		switch {
    255 		case int(r) < len(replacementTable) && replacementTable[r] != "":
    256 			repl = replacementTable[r]
    257 		case r == '\u2028':
    258 			repl = `\u2028`
    259 		case r == '\u2029':
    260 			repl = `\u2029`
    261 		default:
    262 			continue
    263 		}
    264 		b.WriteString(s[written:i])
    265 		b.WriteString(repl)
    266 		written = i + w
    267 	}
    268 	if written == 0 {
    269 		return s
    270 	}
    271 	b.WriteString(s[written:])
    272 	return b.String()
    273 }
    274 
    275 var jsStrReplacementTable = []string{
    276 	0:    `\0`,
    277 	'\t': `\t`,
    278 	'\n': `\n`,
    279 	'\v': `\x0b`, // "\v" == "v" on IE 6.
    280 	'\f': `\f`,
    281 	'\r': `\r`,
    282 	// Encode HTML specials as hex so the output can be embedded
    283 	// in HTML attributes without further encoding.
    284 	'"':  `\x22`,
    285 	'&':  `\x26`,
    286 	'\'': `\x27`,
    287 	'+':  `\x2b`,
    288 	'/':  `\/`,
    289 	'<':  `\x3c`,
    290 	'>':  `\x3e`,
    291 	'\\': `\\`,
    292 }
    293 
    294 // jsStrNormReplacementTable is like jsStrReplacementTable but does not
    295 // overencode existing escapes since this table has no entry for `\`.
    296 var jsStrNormReplacementTable = []string{
    297 	0:    `\0`,
    298 	'\t': `\t`,
    299 	'\n': `\n`,
    300 	'\v': `\x0b`, // "\v" == "v" on IE 6.
    301 	'\f': `\f`,
    302 	'\r': `\r`,
    303 	// Encode HTML specials as hex so the output can be embedded
    304 	// in HTML attributes without further encoding.
    305 	'"':  `\x22`,
    306 	'&':  `\x26`,
    307 	'\'': `\x27`,
    308 	'+':  `\x2b`,
    309 	'/':  `\/`,
    310 	'<':  `\x3c`,
    311 	'>':  `\x3e`,
    312 }
    313 
    314 var jsRegexpReplacementTable = []string{
    315 	0:    `\0`,
    316 	'\t': `\t`,
    317 	'\n': `\n`,
    318 	'\v': `\x0b`, // "\v" == "v" on IE 6.
    319 	'\f': `\f`,
    320 	'\r': `\r`,
    321 	// Encode HTML specials as hex so the output can be embedded
    322 	// in HTML attributes without further encoding.
    323 	'"':  `\x22`,
    324 	'$':  `\$`,
    325 	'&':  `\x26`,
    326 	'\'': `\x27`,
    327 	'(':  `\(`,
    328 	')':  `\)`,
    329 	'*':  `\*`,
    330 	'+':  `\x2b`,
    331 	'-':  `\-`,
    332 	'.':  `\.`,
    333 	'/':  `\/`,
    334 	'<':  `\x3c`,
    335 	'>':  `\x3e`,
    336 	'?':  `\?`,
    337 	'[':  `\[`,
    338 	'\\': `\\`,
    339 	']':  `\]`,
    340 	'^':  `\^`,
    341 	'{':  `\{`,
    342 	'|':  `\|`,
    343 	'}':  `\}`,
    344 }
    345 
    346 // isJSIdentPart reports whether the given rune is a JS identifier part.
    347 // It does not handle all the non-Latin letters, joiners, and combining marks,
    348 // but it does handle every codepoint that can occur in a numeric literal or
    349 // a keyword.
    350 func isJSIdentPart(r rune) bool {
    351 	switch {
    352 	case r == '$':
    353 		return true
    354 	case '0' <= r && r <= '9':
    355 		return true
    356 	case 'A' <= r && r <= 'Z':
    357 		return true
    358 	case r == '_':
    359 		return true
    360 	case 'a' <= r && r <= 'z':
    361 		return true
    362 	}
    363 	return false
    364 }
    365 
    366 // isJSType returns true if the given MIME type should be considered JavaScript.
    367 //
    368 // It is used to determine whether a script tag with a type attribute is a javascript container.
    369 func isJSType(mimeType string) bool {
    370 	// per
    371 	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
    372 	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
    373 	//   https://tools.ietf.org/html/rfc4329#section-3
    374 	//   https://www.ietf.org/rfc/rfc4627.txt
    375 	mimeType = strings.ToLower(mimeType)
    376 	// discard parameters
    377 	if i := strings.Index(mimeType, ";"); i >= 0 {
    378 		mimeType = mimeType[:i]
    379 	}
    380 	mimeType = strings.TrimSpace(mimeType)
    381 	switch mimeType {
    382 	case
    383 		"application/ecmascript",
    384 		"application/javascript",
    385 		"application/json",
    386 		"application/x-ecmascript",
    387 		"application/x-javascript",
    388 		"text/ecmascript",
    389 		"text/javascript",
    390 		"text/javascript1.0",
    391 		"text/javascript1.1",
    392 		"text/javascript1.2",
    393 		"text/javascript1.3",
    394 		"text/javascript1.4",
    395 		"text/javascript1.5",
    396 		"text/jscript",
    397 		"text/livescript",
    398 		"text/x-ecmascript",
    399 		"text/x-javascript":
    400 		return true
    401 	default:
    402 		return false
    403 	}
    404 }
    405