Home | History | Annotate | Download | only in template
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package template
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"strings"
     11 )
     12 
     13 // urlFilter returns its input unless it contains an unsafe scheme in which
     14 // case it defangs the entire URL.
     15 //
     16 // Schemes that cause unintended side effects that are irreversible without user
     17 // interaction are considered unsafe. For example, clicking on a "javascript:"
     18 // link can immediately trigger JavaScript code execution.
     19 //
     20 // This filter conservatively assumes that all schemes other than the following
     21 // are unsafe:
     22 //    * http:   Navigates to a new website, and may open a new window or tab.
     23 //              These side effects can be reversed by navigating back to the
     24 //              previous website, or closing the window or tab. No irreversible
     25 //              changes will take place without further user interaction with
     26 //              the new website.
     27 //    * https:  Same as http.
     28 //    * mailto: Opens an email program and starts a new draft. This side effect
     29 //              is not irreversible until the user explicitly clicks send; it
     30 //              can be undone by closing the email program.
     31 //
     32 // To allow URLs containing other schemes to bypass this filter, developers must
     33 // explicitly indicate that such a URL is expected and safe by encapsulating it
     34 // in a template.URL value.
     35 func urlFilter(args ...interface{}) string {
     36 	s, t := stringify(args...)
     37 	if t == contentTypeURL {
     38 		return s
     39 	}
     40 	if !isSafeUrl(s) {
     41 		return "#" + filterFailsafe
     42 	}
     43 	return s
     44 }
     45 
     46 // isSafeUrl is true if s is a relative URL or if URL has a protocol in
     47 // (http, https, mailto).
     48 func isSafeUrl(s string) bool {
     49 	if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
     50 
     51 		protocol := s[:i]
     52 		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
     53 			return false
     54 		}
     55 	}
     56 	return true
     57 }
     58 
     59 // urlEscaper produces an output that can be embedded in a URL query.
     60 // The output can be embedded in an HTML attribute without further escaping.
     61 func urlEscaper(args ...interface{}) string {
     62 	return urlProcessor(false, args...)
     63 }
     64 
     65 // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
     66 // string or parenthesis delimited url(...).
     67 // The normalizer does not encode all HTML specials. Specifically, it does not
     68 // encode '&' so correct embedding in an HTML attribute requires escaping of
     69 // '&' to '&'.
     70 func urlNormalizer(args ...interface{}) string {
     71 	return urlProcessor(true, args...)
     72 }
     73 
     74 // urlProcessor normalizes (when norm is true) or escapes its input to produce
     75 // a valid hierarchical or opaque URL part.
     76 func urlProcessor(norm bool, args ...interface{}) string {
     77 	s, t := stringify(args...)
     78 	if t == contentTypeURL {
     79 		norm = true
     80 	}
     81 	var b bytes.Buffer
     82 	if processUrlOnto(s, norm, &b) {
     83 		return b.String()
     84 	}
     85 	return s
     86 }
     87 
     88 // processUrlOnto appends a normalized URL corresponding to its input to b
     89 // and returns true if the appended content differs from s.
     90 func processUrlOnto(s string, norm bool, b *bytes.Buffer) bool {
     91 	b.Grow(b.Cap() + len(s) + 16)
     92 	written := 0
     93 	// The byte loop below assumes that all URLs use UTF-8 as the
     94 	// content-encoding. This is similar to the URI to IRI encoding scheme
     95 	// defined in section 3.1 of  RFC 3987, and behaves the same as the
     96 	// EcmaScript builtin encodeURIComponent.
     97 	// It should not cause any misencoding of URLs in pages with
     98 	// Content-type: text/html;charset=UTF-8.
     99 	for i, n := 0, len(s); i < n; i++ {
    100 		c := s[i]
    101 		switch c {
    102 		// Single quote and parens are sub-delims in RFC 3986, but we
    103 		// escape them so the output can be embedded in single
    104 		// quoted attributes and unquoted CSS url(...) constructs.
    105 		// Single quotes are reserved in URLs, but are only used in
    106 		// the obsolete "mark" rule in an appendix in RFC 3986
    107 		// so can be safely encoded.
    108 		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
    109 			if norm {
    110 				continue
    111 			}
    112 		// Unreserved according to RFC 3986 sec 2.3
    113 		// "For consistency, percent-encoded octets in the ranges of
    114 		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
    115 		// period (%2E), underscore (%5F), or tilde (%7E) should not be
    116 		// created by URI producers
    117 		case '-', '.', '_', '~':
    118 			continue
    119 		case '%':
    120 			// When normalizing do not re-encode valid escapes.
    121 			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
    122 				continue
    123 			}
    124 		default:
    125 			// Unreserved according to RFC 3986 sec 2.3
    126 			if 'a' <= c && c <= 'z' {
    127 				continue
    128 			}
    129 			if 'A' <= c && c <= 'Z' {
    130 				continue
    131 			}
    132 			if '0' <= c && c <= '9' {
    133 				continue
    134 			}
    135 		}
    136 		b.WriteString(s[written:i])
    137 		fmt.Fprintf(b, "%%%02x", c)
    138 		written = i + 1
    139 	}
    140 	b.WriteString(s[written:])
    141 	return written != 0
    142 }
    143 
    144 // Filters and normalizes srcset values which are comma separated
    145 // URLs followed by metadata.
    146 func srcsetFilterAndEscaper(args ...interface{}) string {
    147 	s, t := stringify(args...)
    148 	switch t {
    149 	case contentTypeSrcset:
    150 		return s
    151 	case contentTypeURL:
    152 		// Normalizing gets rid of all HTML whitespace
    153 		// which separate the image URL from its metadata.
    154 		var b bytes.Buffer
    155 		if processUrlOnto(s, true, &b) {
    156 			s = b.String()
    157 		}
    158 		// Additionally, commas separate one source from another.
    159 		return strings.Replace(s, ",", "%2c", -1)
    160 	}
    161 
    162 	var b bytes.Buffer
    163 	written := 0
    164 	for i := 0; i < len(s); i++ {
    165 		if s[i] == ',' {
    166 			filterSrcsetElement(s, written, i, &b)
    167 			b.WriteString(",")
    168 			written = i + 1
    169 		}
    170 	}
    171 	filterSrcsetElement(s, written, len(s), &b)
    172 	return b.String()
    173 }
    174 
    175 // Derived from https://play.golang.org/p/Dhmj7FORT5
    176 const htmlSpaceAndAsciiAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
    177 
    178 // isHtmlSpace is true iff c is a whitespace character per
    179 // https://infra.spec.whatwg.org/#ascii-whitespace
    180 func isHtmlSpace(c byte) bool {
    181 	return (c <= 0x20) && 0 != (htmlSpaceAndAsciiAlnumBytes[c>>3]&(1<<uint(c&0x7)))
    182 }
    183 
    184 func isHtmlSpaceOrAsciiAlnum(c byte) bool {
    185 	return (c < 0x80) && 0 != (htmlSpaceAndAsciiAlnumBytes[c>>3]&(1<<uint(c&0x7)))
    186 }
    187 
    188 func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
    189 	start := left
    190 	for start < right && isHtmlSpace(s[start]) {
    191 		start += 1
    192 	}
    193 	end := right
    194 	for i := start; i < right; i++ {
    195 		if isHtmlSpace(s[i]) {
    196 			end = i
    197 			break
    198 		}
    199 	}
    200 	if url := s[start:end]; isSafeUrl(url) {
    201 		// If image metadata is only spaces or alnums then
    202 		// we don't need to URL normalize it.
    203 		metadataOk := true
    204 		for i := end; i < right; i++ {
    205 			if !isHtmlSpaceOrAsciiAlnum(s[i]) {
    206 				metadataOk = false
    207 				break
    208 			}
    209 		}
    210 		if metadataOk {
    211 			b.WriteString(s[left:start])
    212 			processUrlOnto(url, true, b)
    213 			b.WriteString(s[end:right])
    214 			return
    215 		}
    216 	}
    217 	b.WriteString("#")
    218 	b.WriteString(filterFailsafe)
    219 }
    220