1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "strings" 11 ) 12 13 // urlFilter returns its input unless it contains an unsafe protocol in which 14 // case it defangs the entire URL. 15 func urlFilter(args ...interface{}) string { 16 s, t := stringify(args...) 17 if t == contentTypeURL { 18 return s 19 } 20 if i := strings.IndexRune(s, ':'); i >= 0 && strings.IndexRune(s[:i], '/') < 0 { 21 protocol := strings.ToLower(s[:i]) 22 if protocol != "http" && protocol != "https" && protocol != "mailto" { 23 return "#" + filterFailsafe 24 } 25 } 26 return s 27 } 28 29 // urlEscaper produces an output that can be embedded in a URL query. 30 // The output can be embedded in an HTML attribute without further escaping. 31 func urlEscaper(args ...interface{}) string { 32 return urlProcessor(false, args...) 33 } 34 35 // urlEscaper normalizes URL content so it can be embedded in a quote-delimited 36 // string or parenthesis delimited url(...). 37 // The normalizer does not encode all HTML specials. Specifically, it does not 38 // encode '&' so correct embedding in an HTML attribute requires escaping of 39 // '&' to '&'. 40 func urlNormalizer(args ...interface{}) string { 41 return urlProcessor(true, args...) 42 } 43 44 // urlProcessor normalizes (when norm is true) or escapes its input to produce 45 // a valid hierarchical or opaque URL part. 46 func urlProcessor(norm bool, args ...interface{}) string { 47 s, t := stringify(args...) 48 if t == contentTypeURL { 49 norm = true 50 } 51 var b bytes.Buffer 52 written := 0 53 // The byte loop below assumes that all URLs use UTF-8 as the 54 // content-encoding. This is similar to the URI to IRI encoding scheme 55 // defined in section 3.1 of RFC 3987, and behaves the same as the 56 // EcmaScript builtin encodeURIComponent. 57 // It should not cause any misencoding of URLs in pages with 58 // Content-type: text/html;charset=UTF-8. 59 for i, n := 0, len(s); i < n; i++ { 60 c := s[i] 61 switch c { 62 // Single quote and parens are sub-delims in RFC 3986, but we 63 // escape them so the output can be embedded in single 64 // quoted attributes and unquoted CSS url(...) constructs. 65 // Single quotes are reserved in URLs, but are only used in 66 // the obsolete "mark" rule in an appendix in RFC 3986 67 // so can be safely encoded. 68 case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': 69 if norm { 70 continue 71 } 72 // Unreserved according to RFC 3986 sec 2.3 73 // "For consistency, percent-encoded octets in the ranges of 74 // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), 75 // period (%2E), underscore (%5F), or tilde (%7E) should not be 76 // created by URI producers 77 case '-', '.', '_', '~': 78 continue 79 case '%': 80 // When normalizing do not re-encode valid escapes. 81 if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { 82 continue 83 } 84 default: 85 // Unreserved according to RFC 3986 sec 2.3 86 if 'a' <= c && c <= 'z' { 87 continue 88 } 89 if 'A' <= c && c <= 'Z' { 90 continue 91 } 92 if '0' <= c && c <= '9' { 93 continue 94 } 95 } 96 b.WriteString(s[written:i]) 97 fmt.Fprintf(&b, "%%%02x", c) 98 written = i + 1 99 } 100 if written == 0 { 101 return s 102 } 103 b.WriteString(s[written:]) 104 return b.String() 105 } 106