Home | History | Annotate | Download | only in strings
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package strings_test
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	. "strings"
     11 	"testing"
     12 )
     13 
     14 var htmlEscaper = NewReplacer(
     15 	"&", "&",
     16 	"<", "&lt;",
     17 	">", "&gt;",
     18 	`"`, "&quot;",
     19 	"'", "&apos;",
     20 )
     21 
     22 var htmlUnescaper = NewReplacer(
     23 	"&amp;", "&",
     24 	"&lt;", "<",
     25 	"&gt;", ">",
     26 	"&quot;", `"`,
     27 	"&apos;", "'",
     28 )
     29 
     30 // The http package's old HTML escaping function.
     31 func oldHTMLEscape(s string) string {
     32 	s = Replace(s, "&", "&amp;", -1)
     33 	s = Replace(s, "<", "&lt;", -1)
     34 	s = Replace(s, ">", "&gt;", -1)
     35 	s = Replace(s, `"`, "&quot;", -1)
     36 	s = Replace(s, "'", "&apos;", -1)
     37 	return s
     38 }
     39 
     40 var capitalLetters = NewReplacer("a", "A", "b", "B")
     41 
     42 // TestReplacer tests the replacer implementations.
     43 func TestReplacer(t *testing.T) {
     44 	type testCase struct {
     45 		r       *Replacer
     46 		in, out string
     47 	}
     48 	var testCases []testCase
     49 
     50 	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
     51 	str := func(b byte) string {
     52 		return string([]byte{b})
     53 	}
     54 	var s []string
     55 
     56 	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
     57 	s = nil
     58 	for i := 0; i < 256; i++ {
     59 		s = append(s, str(byte(i)), str(byte(i+1)))
     60 	}
     61 	inc := NewReplacer(s...)
     62 
     63 	// Test cases with 1-byte old strings, 1-byte new strings.
     64 	testCases = append(testCases,
     65 		testCase{capitalLetters, "brad", "BrAd"},
     66 		testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
     67 		testCase{capitalLetters, "", ""},
     68 
     69 		testCase{inc, "brad", "csbe"},
     70 		testCase{inc, "\x00\xff", "\x01\x00"},
     71 		testCase{inc, "", ""},
     72 
     73 		testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
     74 	)
     75 
     76 	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
     77 	s = nil
     78 	for i := 0; i < 256; i++ {
     79 		n := i + 1 - 'a'
     80 		if n < 1 {
     81 			n = 1
     82 		}
     83 		s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
     84 	}
     85 	repeat := NewReplacer(s...)
     86 
     87 	// Test cases with 1-byte old strings, variable length new strings.
     88 	testCases = append(testCases,
     89 		testCase{htmlEscaper, "No changes", "No changes"},
     90 		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
     91 		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
     92 		testCase{htmlEscaper, "", ""},
     93 
     94 		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
     95 		testCase{repeat, "abba", "abbbba"},
     96 		testCase{repeat, "", ""},
     97 
     98 		testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
     99 	)
    100 
    101 	// The remaining test cases have variable length old strings.
    102 
    103 	testCases = append(testCases,
    104 		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
    105 		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
    106 		testCase{htmlUnescaper, "", ""},
    107 
    108 		testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
    109 
    110 		testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
    111 
    112 		testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
    113 	)
    114 
    115 	// gen1 has multiple old strings of variable length. There is no
    116 	// overall non-empty common prefix, but some pairwise common prefixes.
    117 	gen1 := NewReplacer(
    118 		"aaa", "3[aaa]",
    119 		"aa", "2[aa]",
    120 		"a", "1[a]",
    121 		"i", "i",
    122 		"longerst", "most long",
    123 		"longer", "medium",
    124 		"long", "short",
    125 		"xx", "xx",
    126 		"x", "X",
    127 		"X", "Y",
    128 		"Y", "Z",
    129 	)
    130 	testCases = append(testCases,
    131 		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
    132 		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
    133 		testCase{gen1, "xxxxx", "xxxxX"},
    134 		testCase{gen1, "XiX", "YiY"},
    135 		testCase{gen1, "", ""},
    136 	)
    137 
    138 	// gen2 has multiple old strings with no pairwise common prefix.
    139 	gen2 := NewReplacer(
    140 		"roses", "red",
    141 		"violets", "blue",
    142 		"sugar", "sweet",
    143 	)
    144 	testCases = append(testCases,
    145 		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
    146 		testCase{gen2, "", ""},
    147 	)
    148 
    149 	// gen3 has multiple old strings with an overall common prefix.
    150 	gen3 := NewReplacer(
    151 		"abracadabra", "poof",
    152 		"abracadabrakazam", "splat",
    153 		"abraham", "lincoln",
    154 		"abrasion", "scrape",
    155 		"abraham", "isaac",
    156 	)
    157 	testCases = append(testCases,
    158 		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
    159 		testCase{gen3, "abrasion abracad", "scrape abracad"},
    160 		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
    161 		testCase{gen3, "", ""},
    162 	)
    163 
    164 	// foo{1,2,3,4} have multiple old strings with an overall common prefix
    165 	// and 1- or 2- byte extensions from the common prefix.
    166 	foo1 := NewReplacer(
    167 		"foo1", "A",
    168 		"foo2", "B",
    169 		"foo3", "C",
    170 	)
    171 	foo2 := NewReplacer(
    172 		"foo1", "A",
    173 		"foo2", "B",
    174 		"foo31", "C",
    175 		"foo32", "D",
    176 	)
    177 	foo3 := NewReplacer(
    178 		"foo11", "A",
    179 		"foo12", "B",
    180 		"foo31", "C",
    181 		"foo32", "D",
    182 	)
    183 	foo4 := NewReplacer(
    184 		"foo12", "B",
    185 		"foo32", "D",
    186 	)
    187 	testCases = append(testCases,
    188 		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
    189 		testCase{foo1, "", ""},
    190 
    191 		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
    192 		testCase{foo2, "", ""},
    193 
    194 		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
    195 		testCase{foo3, "", ""},
    196 
    197 		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
    198 		testCase{foo4, "", ""},
    199 	)
    200 
    201 	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
    202 	allBytes := make([]byte, 256)
    203 	for i := range allBytes {
    204 		allBytes[i] = byte(i)
    205 	}
    206 	allString := string(allBytes)
    207 	genAll := NewReplacer(
    208 		allString, "[all]",
    209 		"\xff", "[ff]",
    210 		"\x00", "[00]",
    211 	)
    212 	testCases = append(testCases,
    213 		testCase{genAll, allString, "[all]"},
    214 		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
    215 		testCase{genAll, "", ""},
    216 	)
    217 
    218 	// Test cases with empty old strings.
    219 
    220 	blankToX1 := NewReplacer("", "X")
    221 	blankToX2 := NewReplacer("", "X", "", "")
    222 	blankHighPriority := NewReplacer("", "X", "o", "O")
    223 	blankLowPriority := NewReplacer("o", "O", "", "X")
    224 	blankNoOp1 := NewReplacer("", "")
    225 	blankNoOp2 := NewReplacer("", "", "", "A")
    226 	blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
    227 	testCases = append(testCases,
    228 		testCase{blankToX1, "foo", "XfXoXoX"},
    229 		testCase{blankToX1, "", "X"},
    230 
    231 		testCase{blankToX2, "foo", "XfXoXoX"},
    232 		testCase{blankToX2, "", "X"},
    233 
    234 		testCase{blankHighPriority, "oo", "XOXOX"},
    235 		testCase{blankHighPriority, "ii", "XiXiX"},
    236 		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
    237 		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
    238 		testCase{blankHighPriority, "", "X"},
    239 
    240 		testCase{blankLowPriority, "oo", "OOX"},
    241 		testCase{blankLowPriority, "ii", "XiXiX"},
    242 		testCase{blankLowPriority, "oiio", "OXiXiOX"},
    243 		testCase{blankLowPriority, "iooi", "XiOOXiX"},
    244 		testCase{blankLowPriority, "", "X"},
    245 
    246 		testCase{blankNoOp1, "foo", "foo"},
    247 		testCase{blankNoOp1, "", ""},
    248 
    249 		testCase{blankNoOp2, "foo", "foo"},
    250 		testCase{blankNoOp2, "", ""},
    251 
    252 		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
    253 		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
    254 		testCase{blankFoo, "", "X"},
    255 	)
    256 
    257 	// single string replacer
    258 
    259 	abcMatcher := NewReplacer("abc", "[match]")
    260 
    261 	testCases = append(testCases,
    262 		testCase{abcMatcher, "", ""},
    263 		testCase{abcMatcher, "ab", "ab"},
    264 		testCase{abcMatcher, "abc", "[match]"},
    265 		testCase{abcMatcher, "abcd", "[match]d"},
    266 		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
    267 	)
    268 
    269 	// Issue 6659 cases (more single string replacer)
    270 
    271 	noHello := NewReplacer("Hello", "")
    272 	testCases = append(testCases,
    273 		testCase{noHello, "Hello", ""},
    274 		testCase{noHello, "Hellox", "x"},
    275 		testCase{noHello, "xHello", "x"},
    276 		testCase{noHello, "xHellox", "xx"},
    277 	)
    278 
    279 	// No-arg test cases.
    280 
    281 	nop := NewReplacer()
    282 	testCases = append(testCases,
    283 		testCase{nop, "abc", "abc"},
    284 		testCase{nop, "", ""},
    285 	)
    286 
    287 	// Run the test cases.
    288 
    289 	for i, tc := range testCases {
    290 		if s := tc.r.Replace(tc.in); s != tc.out {
    291 			t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
    292 		}
    293 		var buf bytes.Buffer
    294 		n, err := tc.r.WriteString(&buf, tc.in)
    295 		if err != nil {
    296 			t.Errorf("%d. WriteString: %v", i, err)
    297 			continue
    298 		}
    299 		got := buf.String()
    300 		if got != tc.out {
    301 			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
    302 			continue
    303 		}
    304 		if n != len(tc.out) {
    305 			t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
    306 				i, tc.in, n, len(tc.out), tc.out)
    307 		}
    308 	}
    309 }
    310 
    311 var algorithmTestCases = []struct {
    312 	r    *Replacer
    313 	want string
    314 }{
    315 	{capitalLetters, "*strings.byteReplacer"},
    316 	{htmlEscaper, "*strings.byteStringReplacer"},
    317 	{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
    318 	{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
    319 	{NewReplacer("", "X"), "*strings.genericReplacer"},
    320 	{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
    321 }
    322 
    323 // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
    324 func TestPickAlgorithm(t *testing.T) {
    325 	for i, tc := range algorithmTestCases {
    326 		got := fmt.Sprintf("%T", tc.r.Replacer())
    327 		if got != tc.want {
    328 			t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
    329 		}
    330 	}
    331 }
    332 
    333 type errWriter struct{}
    334 
    335 func (errWriter) Write(p []byte) (n int, err error) {
    336 	return 0, fmt.Errorf("unwritable")
    337 }
    338 
    339 // TestWriteStringError tests that WriteString returns an error
    340 // received from the underlying io.Writer.
    341 func TestWriteStringError(t *testing.T) {
    342 	for i, tc := range algorithmTestCases {
    343 		n, err := tc.r.WriteString(errWriter{}, "abc")
    344 		if n != 0 || err == nil || err.Error() != "unwritable" {
    345 			t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
    346 		}
    347 	}
    348 }
    349 
    350 // TestGenericTrieBuilding verifies the structure of the generated trie. There
    351 // is one node per line, and the key ending with the current line is in the
    352 // trie if it ends with a "+".
    353 func TestGenericTrieBuilding(t *testing.T) {
    354 	testCases := []struct{ in, out string }{
    355 		{"abc;abdef;abdefgh;xx;xy;z", `-
    356 			a-
    357 			.b-
    358 			..c+
    359 			..d-
    360 			...ef+
    361 			.....gh+
    362 			x-
    363 			.x+
    364 			.y+
    365 			z+
    366 			`},
    367 		{"abracadabra;abracadabrakazam;abraham;abrasion", `-
    368 			a-
    369 			.bra-
    370 			....c-
    371 			.....adabra+
    372 			...........kazam+
    373 			....h-
    374 			.....am+
    375 			....s-
    376 			.....ion+
    377 			`},
    378 		{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
    379 			X+
    380 			Y+
    381 			a+
    382 			.a+
    383 			..a+
    384 			i+
    385 			l-
    386 			.ong+
    387 			....er+
    388 			......st+
    389 			x+
    390 			.x+
    391 			`},
    392 		{"foo;;foo;foo1", `+
    393 			f-
    394 			.oo+
    395 			...1+
    396 			`},
    397 	}
    398 
    399 	for _, tc := range testCases {
    400 		keys := Split(tc.in, ";")
    401 		args := make([]string, len(keys)*2)
    402 		for i, key := range keys {
    403 			args[i*2] = key
    404 		}
    405 
    406 		got := NewReplacer(args...).PrintTrie()
    407 		// Remove tabs from tc.out
    408 		wantbuf := make([]byte, 0, len(tc.out))
    409 		for i := 0; i < len(tc.out); i++ {
    410 			if tc.out[i] != '\t' {
    411 				wantbuf = append(wantbuf, tc.out[i])
    412 			}
    413 		}
    414 		want := string(wantbuf)
    415 
    416 		if got != want {
    417 			t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
    418 		}
    419 	}
    420 }
    421 
    422 func BenchmarkGenericNoMatch(b *testing.B) {
    423 	str := Repeat("A", 100) + Repeat("B", 100)
    424 	generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
    425 	for i := 0; i < b.N; i++ {
    426 		generic.Replace(str)
    427 	}
    428 }
    429 
    430 func BenchmarkGenericMatch1(b *testing.B) {
    431 	str := Repeat("a", 100) + Repeat("b", 100)
    432 	generic := NewReplacer("a", "A", "b", "B", "12", "123")
    433 	for i := 0; i < b.N; i++ {
    434 		generic.Replace(str)
    435 	}
    436 }
    437 
    438 func BenchmarkGenericMatch2(b *testing.B) {
    439 	str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
    440 	for i := 0; i < b.N; i++ {
    441 		htmlUnescaper.Replace(str)
    442 	}
    443 }
    444 
    445 func benchmarkSingleString(b *testing.B, pattern, text string) {
    446 	r := NewReplacer(pattern, "[match]")
    447 	b.SetBytes(int64(len(text)))
    448 	b.ResetTimer()
    449 	for i := 0; i < b.N; i++ {
    450 		r.Replace(text)
    451 	}
    452 }
    453 
    454 func BenchmarkSingleMaxSkipping(b *testing.B) {
    455 	benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
    456 }
    457 
    458 func BenchmarkSingleLongSuffixFail(b *testing.B) {
    459 	benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
    460 }
    461 
    462 func BenchmarkSingleMatch(b *testing.B) {
    463 	benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
    464 }
    465 
    466 func BenchmarkByteByteNoMatch(b *testing.B) {
    467 	str := Repeat("A", 100) + Repeat("B", 100)
    468 	for i := 0; i < b.N; i++ {
    469 		capitalLetters.Replace(str)
    470 	}
    471 }
    472 
    473 func BenchmarkByteByteMatch(b *testing.B) {
    474 	str := Repeat("a", 100) + Repeat("b", 100)
    475 	for i := 0; i < b.N; i++ {
    476 		capitalLetters.Replace(str)
    477 	}
    478 }
    479 
    480 func BenchmarkByteStringMatch(b *testing.B) {
    481 	str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
    482 	for i := 0; i < b.N; i++ {
    483 		htmlEscaper.Replace(str)
    484 	}
    485 }
    486 
    487 func BenchmarkHTMLEscapeNew(b *testing.B) {
    488 	str := "I <3 to escape HTML & other text too."
    489 	for i := 0; i < b.N; i++ {
    490 		htmlEscaper.Replace(str)
    491 	}
    492 }
    493 
    494 func BenchmarkHTMLEscapeOld(b *testing.B) {
    495 	str := "I <3 to escape HTML & other text too."
    496 	for i := 0; i < b.N; i++ {
    497 		oldHTMLEscape(str)
    498 	}
    499 }
    500 
    501 func BenchmarkByteStringReplacerWriteString(b *testing.B) {
    502 	str := Repeat("I <3 to escape HTML & other text too.", 100)
    503 	buf := new(bytes.Buffer)
    504 	for i := 0; i < b.N; i++ {
    505 		htmlEscaper.WriteString(buf, str)
    506 		buf.Reset()
    507 	}
    508 }
    509 
    510 func BenchmarkByteReplacerWriteString(b *testing.B) {
    511 	str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
    512 	buf := new(bytes.Buffer)
    513 	for i := 0; i < b.N; i++ {
    514 		capitalLetters.WriteString(buf, str)
    515 		buf.Reset()
    516 	}
    517 }
    518 
    519 // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
    520 func BenchmarkByteByteReplaces(b *testing.B) {
    521 	str := Repeat("a", 100) + Repeat("b", 100)
    522 	for i := 0; i < b.N; i++ {
    523 		Replace(Replace(str, "a", "A", -1), "b", "B", -1)
    524 	}
    525 }
    526 
    527 // BenchmarkByteByteMap compares byteByteImpl against Map.
    528 func BenchmarkByteByteMap(b *testing.B) {
    529 	str := Repeat("a", 100) + Repeat("b", 100)
    530 	fn := func(r rune) rune {
    531 		switch r {
    532 		case 'a':
    533 			return 'A'
    534 		case 'b':
    535 			return 'B'
    536 		}
    537 		return r
    538 	}
    539 	for i := 0; i < b.N; i++ {
    540 		Map(fn, str)
    541 	}
    542 }
    543