Home | History | Annotate | Download | only in template
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package template
      6 
      7 import (
      8 	"fmt"
      9 )
     10 
     11 // context describes the state an HTML parser must be in when it reaches the
     12 // portion of HTML produced by evaluating a particular template node.
     13 //
     14 // The zero value of type context is the start context for a template that
     15 // produces an HTML fragment as defined at
     16 // http://www.w3.org/TR/html5/syntax.html#the-end
     17 // where the context element is null.
     18 type context struct {
     19 	state   state
     20 	delim   delim
     21 	urlPart urlPart
     22 	jsCtx   jsCtx
     23 	attr    attr
     24 	element element
     25 	err     *Error
     26 }
     27 
     28 func (c context) String() string {
     29 	return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, c.err)
     30 }
     31 
     32 // eq reports whether two contexts are equal.
     33 func (c context) eq(d context) bool {
     34 	return c.state == d.state &&
     35 		c.delim == d.delim &&
     36 		c.urlPart == d.urlPart &&
     37 		c.jsCtx == d.jsCtx &&
     38 		c.attr == d.attr &&
     39 		c.element == d.element &&
     40 		c.err == d.err
     41 }
     42 
     43 // mangle produces an identifier that includes a suffix that distinguishes it
     44 // from template names mangled with different contexts.
     45 func (c context) mangle(templateName string) string {
     46 	// The mangled name for the default context is the input templateName.
     47 	if c.state == stateText {
     48 		return templateName
     49 	}
     50 	s := templateName + "$htmltemplate_" + c.state.String()
     51 	if c.delim != 0 {
     52 		s += "_" + c.delim.String()
     53 	}
     54 	if c.urlPart != 0 {
     55 		s += "_" + c.urlPart.String()
     56 	}
     57 	if c.jsCtx != 0 {
     58 		s += "_" + c.jsCtx.String()
     59 	}
     60 	if c.attr != 0 {
     61 		s += "_" + c.attr.String()
     62 	}
     63 	if c.element != 0 {
     64 		s += "_" + c.element.String()
     65 	}
     66 	return s
     67 }
     68 
     69 // state describes a high-level HTML parser state.
     70 //
     71 // It bounds the top of the element stack, and by extension the HTML insertion
     72 // mode, but also contains state that does not correspond to anything in the
     73 // HTML5 parsing algorithm because a single token production in the HTML
     74 // grammar may contain embedded actions in a template. For instance, the quoted
     75 // HTML attribute produced by
     76 //     <div title="Hello {{.World}}">
     77 // is a single token in HTML's grammar but in a template spans several nodes.
     78 type state uint8
     79 
     80 const (
     81 	// stateText is parsed character data. An HTML parser is in
     82 	// this state when its parse position is outside an HTML tag,
     83 	// directive, comment, and special element body.
     84 	stateText state = iota
     85 	// stateTag occurs before an HTML attribute or the end of a tag.
     86 	stateTag
     87 	// stateAttrName occurs inside an attribute name.
     88 	// It occurs between the ^'s in ` ^name^ = value`.
     89 	stateAttrName
     90 	// stateAfterName occurs after an attr name has ended but before any
     91 	// equals sign. It occurs between the ^'s in ` name^ ^= value`.
     92 	stateAfterName
     93 	// stateBeforeValue occurs after the equals sign but before the value.
     94 	// It occurs between the ^'s in ` name =^ ^value`.
     95 	stateBeforeValue
     96 	// stateHTMLCmt occurs inside an <!-- HTML comment -->.
     97 	stateHTMLCmt
     98 	// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
     99 	// as described at http://www.w3.org/TR/html5/syntax.html#elements-0
    100 	stateRCDATA
    101 	// stateAttr occurs inside an HTML attribute whose content is text.
    102 	stateAttr
    103 	// stateURL occurs inside an HTML attribute whose content is a URL.
    104 	stateURL
    105 	// stateSrcset occurs inside an HTML srcset attribute.
    106 	stateSrcset
    107 	// stateJS occurs inside an event handler or script element.
    108 	stateJS
    109 	// stateJSDqStr occurs inside a JavaScript double quoted string.
    110 	stateJSDqStr
    111 	// stateJSSqStr occurs inside a JavaScript single quoted string.
    112 	stateJSSqStr
    113 	// stateJSRegexp occurs inside a JavaScript regexp literal.
    114 	stateJSRegexp
    115 	// stateJSBlockCmt occurs inside a JavaScript /* block comment */.
    116 	stateJSBlockCmt
    117 	// stateJSLineCmt occurs inside a JavaScript // line comment.
    118 	stateJSLineCmt
    119 	// stateCSS occurs inside a <style> element or style attribute.
    120 	stateCSS
    121 	// stateCSSDqStr occurs inside a CSS double quoted string.
    122 	stateCSSDqStr
    123 	// stateCSSSqStr occurs inside a CSS single quoted string.
    124 	stateCSSSqStr
    125 	// stateCSSDqURL occurs inside a CSS double quoted url("...").
    126 	stateCSSDqURL
    127 	// stateCSSSqURL occurs inside a CSS single quoted url('...').
    128 	stateCSSSqURL
    129 	// stateCSSURL occurs inside a CSS unquoted url(...).
    130 	stateCSSURL
    131 	// stateCSSBlockCmt occurs inside a CSS /* block comment */.
    132 	stateCSSBlockCmt
    133 	// stateCSSLineCmt occurs inside a CSS // line comment.
    134 	stateCSSLineCmt
    135 	// stateError is an infectious error state outside any valid
    136 	// HTML/CSS/JS construct.
    137 	stateError
    138 )
    139 
    140 var stateNames = [...]string{
    141 	stateText:        "stateText",
    142 	stateTag:         "stateTag",
    143 	stateAttrName:    "stateAttrName",
    144 	stateAfterName:   "stateAfterName",
    145 	stateBeforeValue: "stateBeforeValue",
    146 	stateHTMLCmt:     "stateHTMLCmt",
    147 	stateRCDATA:      "stateRCDATA",
    148 	stateAttr:        "stateAttr",
    149 	stateURL:         "stateURL",
    150 	stateSrcset:      "stateSrcset",
    151 	stateJS:          "stateJS",
    152 	stateJSDqStr:     "stateJSDqStr",
    153 	stateJSSqStr:     "stateJSSqStr",
    154 	stateJSRegexp:    "stateJSRegexp",
    155 	stateJSBlockCmt:  "stateJSBlockCmt",
    156 	stateJSLineCmt:   "stateJSLineCmt",
    157 	stateCSS:         "stateCSS",
    158 	stateCSSDqStr:    "stateCSSDqStr",
    159 	stateCSSSqStr:    "stateCSSSqStr",
    160 	stateCSSDqURL:    "stateCSSDqURL",
    161 	stateCSSSqURL:    "stateCSSSqURL",
    162 	stateCSSURL:      "stateCSSURL",
    163 	stateCSSBlockCmt: "stateCSSBlockCmt",
    164 	stateCSSLineCmt:  "stateCSSLineCmt",
    165 	stateError:       "stateError",
    166 }
    167 
    168 func (s state) String() string {
    169 	if int(s) < len(stateNames) {
    170 		return stateNames[s]
    171 	}
    172 	return fmt.Sprintf("illegal state %d", int(s))
    173 }
    174 
    175 // isComment is true for any state that contains content meant for template
    176 // authors & maintainers, not for end-users or machines.
    177 func isComment(s state) bool {
    178 	switch s {
    179 	case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt:
    180 		return true
    181 	}
    182 	return false
    183 }
    184 
    185 // isInTag return whether s occurs solely inside an HTML tag.
    186 func isInTag(s state) bool {
    187 	switch s {
    188 	case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
    189 		return true
    190 	}
    191 	return false
    192 }
    193 
    194 // delim is the delimiter that will end the current HTML attribute.
    195 type delim uint8
    196 
    197 const (
    198 	// delimNone occurs outside any attribute.
    199 	delimNone delim = iota
    200 	// delimDoubleQuote occurs when a double quote (") closes the attribute.
    201 	delimDoubleQuote
    202 	// delimSingleQuote occurs when a single quote (') closes the attribute.
    203 	delimSingleQuote
    204 	// delimSpaceOrTagEnd occurs when a space or right angle bracket (>)
    205 	// closes the attribute.
    206 	delimSpaceOrTagEnd
    207 )
    208 
    209 var delimNames = [...]string{
    210 	delimNone:          "delimNone",
    211 	delimDoubleQuote:   "delimDoubleQuote",
    212 	delimSingleQuote:   "delimSingleQuote",
    213 	delimSpaceOrTagEnd: "delimSpaceOrTagEnd",
    214 }
    215 
    216 func (d delim) String() string {
    217 	if int(d) < len(delimNames) {
    218 		return delimNames[d]
    219 	}
    220 	return fmt.Sprintf("illegal delim %d", int(d))
    221 }
    222 
    223 // urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
    224 // encoding strategies.
    225 type urlPart uint8
    226 
    227 const (
    228 	// urlPartNone occurs when not in a URL, or possibly at the start:
    229 	// ^ in "^http://auth/path?k=v#frag".
    230 	urlPartNone urlPart = iota
    231 	// urlPartPreQuery occurs in the scheme, authority, or path; between the
    232 	// ^s in "h^ttp://auth/path^?k=v#frag".
    233 	urlPartPreQuery
    234 	// urlPartQueryOrFrag occurs in the query portion between the ^s in
    235 	// "http://auth/path?^k=v#frag^".
    236 	urlPartQueryOrFrag
    237 	// urlPartUnknown occurs due to joining of contexts both before and
    238 	// after the query separator.
    239 	urlPartUnknown
    240 )
    241 
    242 var urlPartNames = [...]string{
    243 	urlPartNone:        "urlPartNone",
    244 	urlPartPreQuery:    "urlPartPreQuery",
    245 	urlPartQueryOrFrag: "urlPartQueryOrFrag",
    246 	urlPartUnknown:     "urlPartUnknown",
    247 }
    248 
    249 func (u urlPart) String() string {
    250 	if int(u) < len(urlPartNames) {
    251 		return urlPartNames[u]
    252 	}
    253 	return fmt.Sprintf("illegal urlPart %d", int(u))
    254 }
    255 
    256 // jsCtx determines whether a '/' starts a regular expression literal or a
    257 // division operator.
    258 type jsCtx uint8
    259 
    260 const (
    261 	// jsCtxRegexp occurs where a '/' would start a regexp literal.
    262 	jsCtxRegexp jsCtx = iota
    263 	// jsCtxDivOp occurs where a '/' would start a division operator.
    264 	jsCtxDivOp
    265 	// jsCtxUnknown occurs where a '/' is ambiguous due to context joining.
    266 	jsCtxUnknown
    267 )
    268 
    269 func (c jsCtx) String() string {
    270 	switch c {
    271 	case jsCtxRegexp:
    272 		return "jsCtxRegexp"
    273 	case jsCtxDivOp:
    274 		return "jsCtxDivOp"
    275 	case jsCtxUnknown:
    276 		return "jsCtxUnknown"
    277 	}
    278 	return fmt.Sprintf("illegal jsCtx %d", int(c))
    279 }
    280 
    281 // element identifies the HTML element when inside a start tag or special body.
    282 // Certain HTML element (for example <script> and <style>) have bodies that are
    283 // treated differently from stateText so the element type is necessary to
    284 // transition into the correct context at the end of a tag and to identify the
    285 // end delimiter for the body.
    286 type element uint8
    287 
    288 const (
    289 	// elementNone occurs outside a special tag or special element body.
    290 	elementNone element = iota
    291 	// elementScript corresponds to the raw text <script> element
    292 	// with JS MIME type or no type attribute.
    293 	elementScript
    294 	// elementStyle corresponds to the raw text <style> element.
    295 	elementStyle
    296 	// elementTextarea corresponds to the RCDATA <textarea> element.
    297 	elementTextarea
    298 	// elementTitle corresponds to the RCDATA <title> element.
    299 	elementTitle
    300 )
    301 
    302 var elementNames = [...]string{
    303 	elementNone:     "elementNone",
    304 	elementScript:   "elementScript",
    305 	elementStyle:    "elementStyle",
    306 	elementTextarea: "elementTextarea",
    307 	elementTitle:    "elementTitle",
    308 }
    309 
    310 func (e element) String() string {
    311 	if int(e) < len(elementNames) {
    312 		return elementNames[e]
    313 	}
    314 	return fmt.Sprintf("illegal element %d", int(e))
    315 }
    316 
    317 // attr identifies the current HTML attribute when inside the attribute,
    318 // that is, starting from stateAttrName until stateTag/stateText (exclusive).
    319 type attr uint8
    320 
    321 const (
    322 	// attrNone corresponds to a normal attribute or no attribute.
    323 	attrNone attr = iota
    324 	// attrScript corresponds to an event handler attribute.
    325 	attrScript
    326 	// attrScriptType corresponds to the type attribute in script HTML element
    327 	attrScriptType
    328 	// attrStyle corresponds to the style attribute whose value is CSS.
    329 	attrStyle
    330 	// attrURL corresponds to an attribute whose value is a URL.
    331 	attrURL
    332 	// attrSrcset corresponds to a srcset attribute.
    333 	attrSrcset
    334 )
    335 
    336 var attrNames = [...]string{
    337 	attrNone:       "attrNone",
    338 	attrScript:     "attrScript",
    339 	attrScriptType: "attrScriptType",
    340 	attrStyle:      "attrStyle",
    341 	attrURL:        "attrURL",
    342 	attrSrcset:     "attrSrcset",
    343 }
    344 
    345 func (a attr) String() string {
    346 	if int(a) < len(attrNames) {
    347 		return attrNames[a]
    348 	}
    349 	return fmt.Sprintf("illegal attr %d", int(a))
    350 }
    351