1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package html 6 7 import ( 8 "strings" 9 "testing" 10 ) 11 12 type unescapeTest struct { 13 // A short description of the test case. 14 desc string 15 // The HTML text. 16 html string 17 // The unescaped text. 18 unescaped string 19 } 20 21 var unescapeTests = []unescapeTest{ 22 // Handle no entities. 23 { 24 "copy", 25 "A\ttext\nstring", 26 "A\ttext\nstring", 27 }, 28 // Handle simple named entities. 29 { 30 "simple", 31 "& > <", 32 "& > <", 33 }, 34 // Handle hitting the end of the string. 35 { 36 "stringEnd", 37 "& &", 38 "& &", 39 }, 40 // Handle entities with two codepoints. 41 { 42 "multiCodepoint", 43 "text ⋛︀ blah", 44 "text \u22db\ufe00 blah", 45 }, 46 // Handle decimal numeric entities. 47 { 48 "decimalEntity", 49 "Delta = Δ ", 50 "Delta = ", 51 }, 52 // Handle hexadecimal numeric entities. 53 { 54 "hexadecimalEntity", 55 "Lambda = λ = λ ", 56 "Lambda = = ", 57 }, 58 // Handle numeric early termination. 59 { 60 "numericEnds", 61 "&# &#x €43 © = ©f = ©", 62 "&# &#x 43 = f = ", 63 }, 64 // Handle numeric ISO-8859-1 entity replacements. 65 { 66 "numericReplacements", 67 "Footnote‡", 68 "Footnote", 69 }, 70 // Handle single ampersand. 71 { 72 "copySingleAmpersand", 73 "&", 74 "&", 75 }, 76 // Handle ampersand followed by non-entity. 77 { 78 "copyAmpersandNonEntity", 79 "text &test", 80 "text &test", 81 }, 82 // Handle "&#". 83 { 84 "copyAmpersandHash", 85 "text &#", 86 "text &#", 87 }, 88 } 89 90 func TestUnescape(t *testing.T) { 91 for _, tt := range unescapeTests { 92 unescaped := UnescapeString(tt.html) 93 if unescaped != tt.unescaped { 94 t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped) 95 } 96 } 97 } 98 99 func TestUnescapeEscape(t *testing.T) { 100 ss := []string{ 101 ``, 102 `abc def`, 103 `a & b`, 104 `a&b`, 105 `a & b`, 106 `"`, 107 `"`, 108 `"<&>"`, 109 `"<&>"`, 110 `3&5==1 && 0<1, "0<1", a+acute=á`, 111 `The special characters are: <, >, &, ' and "`, 112 } 113 for _, s := range ss { 114 if got := UnescapeString(EscapeString(s)); got != s { 115 t.Errorf("got %q want %q", got, s) 116 } 117 } 118 } 119 120 var ( 121 benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100) 122 benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100) 123 ) 124 125 func BenchmarkEscape(b *testing.B) { 126 n := 0 127 for i := 0; i < b.N; i++ { 128 n += len(EscapeString(benchEscapeData)) 129 } 130 } 131 132 func BenchmarkEscapeNone(b *testing.B) { 133 n := 0 134 for i := 0; i < b.N; i++ { 135 n += len(EscapeString(benchEscapeNone)) 136 } 137 } 138 139 func BenchmarkUnescape(b *testing.B) { 140 s := EscapeString(benchEscapeData) 141 n := 0 142 for i := 0; i < b.N; i++ { 143 n += len(UnescapeString(s)) 144 } 145 } 146 147 func BenchmarkUnescapeNone(b *testing.B) { 148 s := EscapeString(benchEscapeNone) 149 n := 0 150 for i := 0; i < b.N; i++ { 151 n += len(UnescapeString(s)) 152 } 153 } 154