1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings_test 6 7 import ( 8 "bytes" 9 "fmt" 10 . "strings" 11 "testing" 12 ) 13 14 var htmlEscaper = NewReplacer( 15 "&", "&", 16 "<", "<", 17 ">", ">", 18 `"`, """, 19 "'", "'", 20 ) 21 22 var htmlUnescaper = NewReplacer( 23 "&", "&", 24 "<", "<", 25 ">", ">", 26 """, `"`, 27 "'", "'", 28 ) 29 30 // The http package's old HTML escaping function. 31 func oldHTMLEscape(s string) string { 32 s = Replace(s, "&", "&", -1) 33 s = Replace(s, "<", "<", -1) 34 s = Replace(s, ">", ">", -1) 35 s = Replace(s, `"`, """, -1) 36 s = Replace(s, "'", "'", -1) 37 return s 38 } 39 40 var capitalLetters = NewReplacer("a", "A", "b", "B") 41 42 // TestReplacer tests the replacer implementations. 43 func TestReplacer(t *testing.T) { 44 type testCase struct { 45 r *Replacer 46 in, out string 47 } 48 var testCases []testCase 49 50 // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. 51 str := func(b byte) string { 52 return string([]byte{b}) 53 } 54 var s []string 55 56 // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". 57 s = nil 58 for i := 0; i < 256; i++ { 59 s = append(s, str(byte(i)), str(byte(i+1))) 60 } 61 inc := NewReplacer(s...) 62 63 // Test cases with 1-byte old strings, 1-byte new strings. 64 testCases = append(testCases, 65 testCase{capitalLetters, "brad", "BrAd"}, 66 testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, 67 testCase{capitalLetters, "", ""}, 68 69 testCase{inc, "brad", "csbe"}, 70 testCase{inc, "\x00\xff", "\x01\x00"}, 71 testCase{inc, "", ""}, 72 73 testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, 74 ) 75 76 // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... 77 s = nil 78 for i := 0; i < 256; i++ { 79 n := i + 1 - 'a' 80 if n < 1 { 81 n = 1 82 } 83 s = append(s, str(byte(i)), Repeat(str(byte(i)), n)) 84 } 85 repeat := NewReplacer(s...) 86 87 // Test cases with 1-byte old strings, variable length new strings. 88 testCases = append(testCases, 89 testCase{htmlEscaper, "No changes", "No changes"}, 90 testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, 91 testCase{htmlEscaper, "&&&", "&&&"}, 92 testCase{htmlEscaper, "", ""}, 93 94 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, 95 testCase{repeat, "abba", "abbbba"}, 96 testCase{repeat, "", ""}, 97 98 testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, 99 ) 100 101 // The remaining test cases have variable length old strings. 102 103 testCases = append(testCases, 104 testCase{htmlUnescaper, "&amp;", "&"}, 105 testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, 106 testCase{htmlUnescaper, "", ""}, 107 108 testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, 109 110 testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, 111 112 testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, 113 ) 114 115 // gen1 has multiple old strings of variable length. There is no 116 // overall non-empty common prefix, but some pairwise common prefixes. 117 gen1 := NewReplacer( 118 "aaa", "3[aaa]", 119 "aa", "2[aa]", 120 "a", "1[a]", 121 "i", "i", 122 "longerst", "most long", 123 "longer", "medium", 124 "long", "short", 125 "xx", "xx", 126 "x", "X", 127 "X", "Y", 128 "Y", "Z", 129 ) 130 testCases = append(testCases, 131 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, 132 testCase{gen1, "long, longerst, longer", "short, most long, medium"}, 133 testCase{gen1, "xxxxx", "xxxxX"}, 134 testCase{gen1, "XiX", "YiY"}, 135 testCase{gen1, "", ""}, 136 ) 137 138 // gen2 has multiple old strings with no pairwise common prefix. 139 gen2 := NewReplacer( 140 "roses", "red", 141 "violets", "blue", 142 "sugar", "sweet", 143 ) 144 testCases = append(testCases, 145 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, 146 testCase{gen2, "", ""}, 147 ) 148 149 // gen3 has multiple old strings with an overall common prefix. 150 gen3 := NewReplacer( 151 "abracadabra", "poof", 152 "abracadabrakazam", "splat", 153 "abraham", "lincoln", 154 "abrasion", "scrape", 155 "abraham", "isaac", 156 ) 157 testCases = append(testCases, 158 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, 159 testCase{gen3, "abrasion abracad", "scrape abracad"}, 160 testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, 161 testCase{gen3, "", ""}, 162 ) 163 164 // foo{1,2,3,4} have multiple old strings with an overall common prefix 165 // and 1- or 2- byte extensions from the common prefix. 166 foo1 := NewReplacer( 167 "foo1", "A", 168 "foo2", "B", 169 "foo3", "C", 170 ) 171 foo2 := NewReplacer( 172 "foo1", "A", 173 "foo2", "B", 174 "foo31", "C", 175 "foo32", "D", 176 ) 177 foo3 := NewReplacer( 178 "foo11", "A", 179 "foo12", "B", 180 "foo31", "C", 181 "foo32", "D", 182 ) 183 foo4 := NewReplacer( 184 "foo12", "B", 185 "foo32", "D", 186 ) 187 testCases = append(testCases, 188 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, 189 testCase{foo1, "", ""}, 190 191 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, 192 testCase{foo2, "", ""}, 193 194 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, 195 testCase{foo3, "", ""}, 196 197 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, 198 testCase{foo4, "", ""}, 199 ) 200 201 // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. 202 allBytes := make([]byte, 256) 203 for i := range allBytes { 204 allBytes[i] = byte(i) 205 } 206 allString := string(allBytes) 207 genAll := NewReplacer( 208 allString, "[all]", 209 "\xff", "[ff]", 210 "\x00", "[00]", 211 ) 212 testCases = append(testCases, 213 testCase{genAll, allString, "[all]"}, 214 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, 215 testCase{genAll, "", ""}, 216 ) 217 218 // Test cases with empty old strings. 219 220 blankToX1 := NewReplacer("", "X") 221 blankToX2 := NewReplacer("", "X", "", "") 222 blankHighPriority := NewReplacer("", "X", "o", "O") 223 blankLowPriority := NewReplacer("o", "O", "", "X") 224 blankNoOp1 := NewReplacer("", "") 225 blankNoOp2 := NewReplacer("", "", "", "A") 226 blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z") 227 testCases = append(testCases, 228 testCase{blankToX1, "foo", "XfXoXoX"}, 229 testCase{blankToX1, "", "X"}, 230 231 testCase{blankToX2, "foo", "XfXoXoX"}, 232 testCase{blankToX2, "", "X"}, 233 234 testCase{blankHighPriority, "oo", "XOXOX"}, 235 testCase{blankHighPriority, "ii", "XiXiX"}, 236 testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, 237 testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, 238 testCase{blankHighPriority, "", "X"}, 239 240 testCase{blankLowPriority, "oo", "OOX"}, 241 testCase{blankLowPriority, "ii", "XiXiX"}, 242 testCase{blankLowPriority, "oiio", "OXiXiOX"}, 243 testCase{blankLowPriority, "iooi", "XiOOXiX"}, 244 testCase{blankLowPriority, "", "X"}, 245 246 testCase{blankNoOp1, "foo", "foo"}, 247 testCase{blankNoOp1, "", ""}, 248 249 testCase{blankNoOp2, "foo", "foo"}, 250 testCase{blankNoOp2, "", ""}, 251 252 testCase{blankFoo, "foobarfoobaz", "XRXZX"}, 253 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, 254 testCase{blankFoo, "", "X"}, 255 ) 256 257 // single string replacer 258 259 abcMatcher := NewReplacer("abc", "[match]") 260 261 testCases = append(testCases, 262 testCase{abcMatcher, "", ""}, 263 testCase{abcMatcher, "ab", "ab"}, 264 testCase{abcMatcher, "abc", "[match]"}, 265 testCase{abcMatcher, "abcd", "[match]d"}, 266 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, 267 ) 268 269 // Issue 6659 cases (more single string replacer) 270 271 noHello := NewReplacer("Hello", "") 272 testCases = append(testCases, 273 testCase{noHello, "Hello", ""}, 274 testCase{noHello, "Hellox", "x"}, 275 testCase{noHello, "xHello", "x"}, 276 testCase{noHello, "xHellox", "xx"}, 277 ) 278 279 // No-arg test cases. 280 281 nop := NewReplacer() 282 testCases = append(testCases, 283 testCase{nop, "abc", "abc"}, 284 testCase{nop, "", ""}, 285 ) 286 287 // Run the test cases. 288 289 for i, tc := range testCases { 290 if s := tc.r.Replace(tc.in); s != tc.out { 291 t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) 292 } 293 var buf bytes.Buffer 294 n, err := tc.r.WriteString(&buf, tc.in) 295 if err != nil { 296 t.Errorf("%d. WriteString: %v", i, err) 297 continue 298 } 299 got := buf.String() 300 if got != tc.out { 301 t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) 302 continue 303 } 304 if n != len(tc.out) { 305 t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", 306 i, tc.in, n, len(tc.out), tc.out) 307 } 308 } 309 } 310 311 var algorithmTestCases = []struct { 312 r *Replacer 313 want string 314 }{ 315 {capitalLetters, "*strings.byteReplacer"}, 316 {htmlEscaper, "*strings.byteStringReplacer"}, 317 {NewReplacer("12", "123"), "*strings.singleStringReplacer"}, 318 {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, 319 {NewReplacer("", "X"), "*strings.genericReplacer"}, 320 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, 321 } 322 323 // TestPickAlgorithm tests that NewReplacer picks the correct algorithm. 324 func TestPickAlgorithm(t *testing.T) { 325 for i, tc := range algorithmTestCases { 326 got := fmt.Sprintf("%T", tc.r.Replacer()) 327 if got != tc.want { 328 t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) 329 } 330 } 331 } 332 333 type errWriter struct{} 334 335 func (errWriter) Write(p []byte) (n int, err error) { 336 return 0, fmt.Errorf("unwritable") 337 } 338 339 // TestWriteStringError tests that WriteString returns an error 340 // received from the underlying io.Writer. 341 func TestWriteStringError(t *testing.T) { 342 for i, tc := range algorithmTestCases { 343 n, err := tc.r.WriteString(errWriter{}, "abc") 344 if n != 0 || err == nil || err.Error() != "unwritable" { 345 t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err) 346 } 347 } 348 } 349 350 // TestGenericTrieBuilding verifies the structure of the generated trie. There 351 // is one node per line, and the key ending with the current line is in the 352 // trie if it ends with a "+". 353 func TestGenericTrieBuilding(t *testing.T) { 354 testCases := []struct{ in, out string }{ 355 {"abc;abdef;abdefgh;xx;xy;z", `- 356 a- 357 .b- 358 ..c+ 359 ..d- 360 ...ef+ 361 .....gh+ 362 x- 363 .x+ 364 .y+ 365 z+ 366 `}, 367 {"abracadabra;abracadabrakazam;abraham;abrasion", `- 368 a- 369 .bra- 370 ....c- 371 .....adabra+ 372 ...........kazam+ 373 ....h- 374 .....am+ 375 ....s- 376 .....ion+ 377 `}, 378 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- 379 X+ 380 Y+ 381 a+ 382 .a+ 383 ..a+ 384 i+ 385 l- 386 .ong+ 387 ....er+ 388 ......st+ 389 x+ 390 .x+ 391 `}, 392 {"foo;;foo;foo1", `+ 393 f- 394 .oo+ 395 ...1+ 396 `}, 397 } 398 399 for _, tc := range testCases { 400 keys := Split(tc.in, ";") 401 args := make([]string, len(keys)*2) 402 for i, key := range keys { 403 args[i*2] = key 404 } 405 406 got := NewReplacer(args...).PrintTrie() 407 // Remove tabs from tc.out 408 wantbuf := make([]byte, 0, len(tc.out)) 409 for i := 0; i < len(tc.out); i++ { 410 if tc.out[i] != '\t' { 411 wantbuf = append(wantbuf, tc.out[i]) 412 } 413 } 414 want := string(wantbuf) 415 416 if got != want { 417 t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) 418 } 419 } 420 } 421 422 func BenchmarkGenericNoMatch(b *testing.B) { 423 str := Repeat("A", 100) + Repeat("B", 100) 424 generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic 425 for i := 0; i < b.N; i++ { 426 generic.Replace(str) 427 } 428 } 429 430 func BenchmarkGenericMatch1(b *testing.B) { 431 str := Repeat("a", 100) + Repeat("b", 100) 432 generic := NewReplacer("a", "A", "b", "B", "12", "123") 433 for i := 0; i < b.N; i++ { 434 generic.Replace(str) 435 } 436 } 437 438 func BenchmarkGenericMatch2(b *testing.B) { 439 str := Repeat("It's <b>HTML</b>!", 100) 440 for i := 0; i < b.N; i++ { 441 htmlUnescaper.Replace(str) 442 } 443 } 444 445 func benchmarkSingleString(b *testing.B, pattern, text string) { 446 r := NewReplacer(pattern, "[match]") 447 b.SetBytes(int64(len(text))) 448 b.ResetTimer() 449 for i := 0; i < b.N; i++ { 450 r.Replace(text) 451 } 452 } 453 454 func BenchmarkSingleMaxSkipping(b *testing.B) { 455 benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000)) 456 } 457 458 func BenchmarkSingleLongSuffixFail(b *testing.B) { 459 benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002)) 460 } 461 462 func BenchmarkSingleMatch(b *testing.B) { 463 benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000)) 464 } 465 466 func BenchmarkByteByteNoMatch(b *testing.B) { 467 str := Repeat("A", 100) + Repeat("B", 100) 468 for i := 0; i < b.N; i++ { 469 capitalLetters.Replace(str) 470 } 471 } 472 473 func BenchmarkByteByteMatch(b *testing.B) { 474 str := Repeat("a", 100) + Repeat("b", 100) 475 for i := 0; i < b.N; i++ { 476 capitalLetters.Replace(str) 477 } 478 } 479 480 func BenchmarkByteStringMatch(b *testing.B) { 481 str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" 482 for i := 0; i < b.N; i++ { 483 htmlEscaper.Replace(str) 484 } 485 } 486 487 func BenchmarkHTMLEscapeNew(b *testing.B) { 488 str := "I <3 to escape HTML & other text too." 489 for i := 0; i < b.N; i++ { 490 htmlEscaper.Replace(str) 491 } 492 } 493 494 func BenchmarkHTMLEscapeOld(b *testing.B) { 495 str := "I <3 to escape HTML & other text too." 496 for i := 0; i < b.N; i++ { 497 oldHTMLEscape(str) 498 } 499 } 500 501 func BenchmarkByteStringReplacerWriteString(b *testing.B) { 502 str := Repeat("I <3 to escape HTML & other text too.", 100) 503 buf := new(bytes.Buffer) 504 for i := 0; i < b.N; i++ { 505 htmlEscaper.WriteString(buf, str) 506 buf.Reset() 507 } 508 } 509 510 func BenchmarkByteReplacerWriteString(b *testing.B) { 511 str := Repeat("abcdefghijklmnopqrstuvwxyz", 100) 512 buf := new(bytes.Buffer) 513 for i := 0; i < b.N; i++ { 514 capitalLetters.WriteString(buf, str) 515 buf.Reset() 516 } 517 } 518 519 // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. 520 func BenchmarkByteByteReplaces(b *testing.B) { 521 str := Repeat("a", 100) + Repeat("b", 100) 522 for i := 0; i < b.N; i++ { 523 Replace(Replace(str, "a", "A", -1), "b", "B", -1) 524 } 525 } 526 527 // BenchmarkByteByteMap compares byteByteImpl against Map. 528 func BenchmarkByteByteMap(b *testing.B) { 529 str := Repeat("a", 100) + Repeat("b", 100) 530 fn := func(r rune) rune { 531 switch r { 532 case 'a': 533 return 'A' 534 case 'b': 535 return 'B' 536 } 537 return r 538 } 539 for i := 0; i < b.N; i++ { 540 Map(fn, str) 541 } 542 } 543