Home | History | Annotate | Download | only in html
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package html
      6 
      7 import (
      8 	"strings"
      9 	"testing"
     10 )
     11 
     12 type unescapeTest struct {
     13 	// A short description of the test case.
     14 	desc string
     15 	// The HTML text.
     16 	html string
     17 	// The unescaped text.
     18 	unescaped string
     19 }
     20 
     21 var unescapeTests = []unescapeTest{
     22 	// Handle no entities.
     23 	{
     24 		"copy",
     25 		"A\ttext\nstring",
     26 		"A\ttext\nstring",
     27 	},
     28 	// Handle simple named entities.
     29 	{
     30 		"simple",
     31 		"& > <",
     32 		"& > <",
     33 	},
     34 	// Handle hitting the end of the string.
     35 	{
     36 		"stringEnd",
     37 		"&amp &amp",
     38 		"& &",
     39 	},
     40 	// Handle entities with two codepoints.
     41 	{
     42 		"multiCodepoint",
     43 		"text &gesl; blah",
     44 		"text \u22db\ufe00 blah",
     45 	},
     46 	// Handle decimal numeric entities.
     47 	{
     48 		"decimalEntity",
     49 		"Delta = &#916; ",
     50 		"Delta =  ",
     51 	},
     52 	// Handle hexadecimal numeric entities.
     53 	{
     54 		"hexadecimalEntity",
     55 		"Lambda = &#x3bb; = &#X3Bb ",
     56 		"Lambda =  =  ",
     57 	},
     58 	// Handle numeric early termination.
     59 	{
     60 		"numericEnds",
     61 		"&# &#x &#128;43 &copy = &#169f = &#xa9",
     62 		"&# &#x 43  = f = ",
     63 	},
     64 	// Handle numeric ISO-8859-1 entity replacements.
     65 	{
     66 		"numericReplacements",
     67 		"Footnote&#x87;",
     68 		"Footnote",
     69 	},
     70 	// Handle single ampersand.
     71 	{
     72 		"copySingleAmpersand",
     73 		"&",
     74 		"&",
     75 	},
     76 	// Handle ampersand followed by non-entity.
     77 	{
     78 		"copyAmpersandNonEntity",
     79 		"text &test",
     80 		"text &test",
     81 	},
     82 	// Handle "&#".
     83 	{
     84 		"copyAmpersandHash",
     85 		"text &#",
     86 		"text &#",
     87 	},
     88 }
     89 
     90 func TestUnescape(t *testing.T) {
     91 	for _, tt := range unescapeTests {
     92 		unescaped := UnescapeString(tt.html)
     93 		if unescaped != tt.unescaped {
     94 			t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
     95 		}
     96 	}
     97 }
     98 
     99 func TestUnescapeEscape(t *testing.T) {
    100 	ss := []string{
    101 		``,
    102 		`abc def`,
    103 		`a & b`,
    104 		`a&amp;b`,
    105 		`a &amp b`,
    106 		`&quot;`,
    107 		`"`,
    108 		`"<&>"`,
    109 		`&quot;&lt;&amp;&gt;&quot;`,
    110 		`3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
    111 		`The special characters are: <, >, &, ' and "`,
    112 	}
    113 	for _, s := range ss {
    114 		if got := UnescapeString(EscapeString(s)); got != s {
    115 			t.Errorf("got %q want %q", got, s)
    116 		}
    117 	}
    118 }
    119 
    120 var (
    121 	benchEscapeData     = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
    122 	benchEscapeNone     = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
    123 	benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&amp;", 10)
    124 	benchUnescapeDense  = strings.Repeat("&amp;&lt; &amp; &lt;", 100)
    125 )
    126 
    127 func BenchmarkEscape(b *testing.B) {
    128 	n := 0
    129 	for i := 0; i < b.N; i++ {
    130 		n += len(EscapeString(benchEscapeData))
    131 	}
    132 }
    133 
    134 func BenchmarkEscapeNone(b *testing.B) {
    135 	n := 0
    136 	for i := 0; i < b.N; i++ {
    137 		n += len(EscapeString(benchEscapeNone))
    138 	}
    139 }
    140 
    141 func BenchmarkUnescape(b *testing.B) {
    142 	s := EscapeString(benchEscapeData)
    143 	n := 0
    144 	for i := 0; i < b.N; i++ {
    145 		n += len(UnescapeString(s))
    146 	}
    147 }
    148 
    149 func BenchmarkUnescapeNone(b *testing.B) {
    150 	s := EscapeString(benchEscapeNone)
    151 	n := 0
    152 	for i := 0; i < b.N; i++ {
    153 		n += len(UnescapeString(s))
    154 	}
    155 }
    156 
    157 func BenchmarkUnescapeSparse(b *testing.B) {
    158 	n := 0
    159 	for i := 0; i < b.N; i++ {
    160 		n += len(UnescapeString(benchUnescapeSparse))
    161 	}
    162 }
    163 
    164 func BenchmarkUnescapeDense(b *testing.B) {
    165 	n := 0
    166 	for i := 0; i < b.N; i++ {
    167 		n += len(UnescapeString(benchUnescapeDense))
    168 	}
    169 }
    170