1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package regexp 6 7 import ( 8 "bufio" 9 "compress/bzip2" 10 "fmt" 11 "io" 12 "os" 13 "path/filepath" 14 "regexp/syntax" 15 "strconv" 16 "strings" 17 "testing" 18 "unicode/utf8" 19 ) 20 21 // TestRE2 tests this package's regexp API against test cases 22 // considered during RE2's exhaustive tests, which run all possible 23 // regexps over a given set of atoms and operators, up to a given 24 // complexity, over all possible strings over a given alphabet, 25 // up to a given size. Rather than try to link with RE2, we read a 26 // log file containing the test cases and the expected matches. 27 // The log file, re2-exhaustive.txt, is generated by running 'make log' 28 // in the open source RE2 distribution https://github.com/google/re2/. 29 // 30 // The test file format is a sequence of stanzas like: 31 // 32 // strings 33 // "abc" 34 // "123x" 35 // regexps 36 // "[a-z]+" 37 // 0-3;0-3 38 // -;- 39 // "([0-9])([0-9])([0-9])" 40 // -;- 41 // -;0-3 0-1 1-2 2-3 42 // 43 // The stanza begins by defining a set of strings, quoted 44 // using Go double-quote syntax, one per line. Then the 45 // regexps section gives a sequence of regexps to run on 46 // the strings. In the block that follows a regexp, each line 47 // gives the semicolon-separated match results of running 48 // the regexp on the corresponding string. 49 // Each match result is either a single -, meaning no match, or a 50 // space-separated sequence of pairs giving the match and 51 // submatch indices. An unmatched subexpression formats 52 // its pair as a single - (not illustrated above). For now 53 // each regexp run produces two match results, one for a 54 // ``full match'' that restricts the regexp to matching the entire 55 // string or nothing, and one for a ``partial match'' that gives 56 // the leftmost first match found in the string. 57 // 58 // Lines beginning with # are comments. Lines beginning with 59 // a capital letter are test names printed during RE2's test suite 60 // and are echoed into t but otherwise ignored. 61 // 62 // At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB, 63 // so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly. 64 // 65 func TestRE2Search(t *testing.T) { 66 testRE2(t, "testdata/re2-search.txt") 67 } 68 69 func testRE2(t *testing.T, file string) { 70 f, err := os.Open(file) 71 if err != nil { 72 t.Fatal(err) 73 } 74 defer f.Close() 75 var txt io.Reader 76 if strings.HasSuffix(file, ".bz2") { 77 z := bzip2.NewReader(f) 78 txt = z 79 file = file[:len(file)-len(".bz2")] // for error messages 80 } else { 81 txt = f 82 } 83 lineno := 0 84 scanner := bufio.NewScanner(txt) 85 var ( 86 str []string 87 input []string 88 inStrings bool 89 re *Regexp 90 refull *Regexp 91 nfail int 92 ncase int 93 ) 94 for lineno := 1; scanner.Scan(); lineno++ { 95 line := scanner.Text() 96 switch { 97 case line == "": 98 t.Fatalf("%s:%d: unexpected blank line", file, lineno) 99 case line[0] == '#': 100 continue 101 case 'A' <= line[0] && line[0] <= 'Z': 102 // Test name. 103 t.Logf("%s\n", line) 104 continue 105 case line == "strings": 106 str = str[:0] 107 inStrings = true 108 case line == "regexps": 109 inStrings = false 110 case line[0] == '"': 111 q, err := strconv.Unquote(line) 112 if err != nil { 113 // Fatal because we'll get out of sync. 114 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err) 115 } 116 if inStrings { 117 str = append(str, q) 118 continue 119 } 120 // Is a regexp. 121 if len(input) != 0 { 122 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q) 123 } 124 re, err = tryCompile(q) 125 if err != nil { 126 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" { 127 // We don't and likely never will support \C; keep going. 128 continue 129 } 130 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err) 131 if nfail++; nfail >= 100 { 132 t.Fatalf("stopping after %d errors", nfail) 133 } 134 continue 135 } 136 full := `\A(?:` + q + `)\z` 137 refull, err = tryCompile(full) 138 if err != nil { 139 // Fatal because q worked, so this should always work. 140 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err) 141 } 142 input = str 143 case line[0] == '-' || '0' <= line[0] && line[0] <= '9': 144 // A sequence of match results. 145 ncase++ 146 if re == nil { 147 // Failed to compile: skip results. 148 continue 149 } 150 if len(input) == 0 { 151 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno) 152 } 153 var text string 154 text, input = input[0], input[1:] 155 if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) { 156 // RE2's \B considers every byte position, 157 // so it sees 'not word boundary' in the 158 // middle of UTF-8 sequences. This package 159 // only considers the positions between runes, 160 // so it disagrees. Skip those cases. 161 continue 162 } 163 res := strings.Split(line, ";") 164 if len(res) != len(run) { 165 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run)) 166 } 167 for i := range res { 168 have, suffix := run[i](re, refull, text) 169 want := parseResult(t, file, lineno, res[i]) 170 if !same(have, want) { 171 t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want) 172 if nfail++; nfail >= 100 { 173 t.Fatalf("stopping after %d errors", nfail) 174 } 175 continue 176 } 177 b, suffix := match[i](re, refull, text) 178 if b != (want != nil) { 179 t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b) 180 if nfail++; nfail >= 100 { 181 t.Fatalf("stopping after %d errors", nfail) 182 } 183 continue 184 } 185 } 186 187 default: 188 t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line) 189 } 190 } 191 if err := scanner.Err(); err != nil { 192 t.Fatalf("%s:%d: %v", file, lineno, err) 193 } 194 if len(input) != 0 { 195 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input)) 196 } 197 t.Logf("%d cases tested", ncase) 198 } 199 200 var run = []func(*Regexp, *Regexp, string) ([]int, string){ 201 runFull, 202 runPartial, 203 runFullLongest, 204 runPartialLongest, 205 } 206 207 func runFull(re, refull *Regexp, text string) ([]int, string) { 208 refull.longest = false 209 return refull.FindStringSubmatchIndex(text), "[full]" 210 } 211 212 func runPartial(re, refull *Regexp, text string) ([]int, string) { 213 re.longest = false 214 return re.FindStringSubmatchIndex(text), "" 215 } 216 217 func runFullLongest(re, refull *Regexp, text string) ([]int, string) { 218 refull.longest = true 219 return refull.FindStringSubmatchIndex(text), "[full,longest]" 220 } 221 222 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) { 223 re.longest = true 224 return re.FindStringSubmatchIndex(text), "[longest]" 225 } 226 227 var match = []func(*Regexp, *Regexp, string) (bool, string){ 228 matchFull, 229 matchPartial, 230 matchFullLongest, 231 matchPartialLongest, 232 } 233 234 func matchFull(re, refull *Regexp, text string) (bool, string) { 235 refull.longest = false 236 return refull.MatchString(text), "[full]" 237 } 238 239 func matchPartial(re, refull *Regexp, text string) (bool, string) { 240 re.longest = false 241 return re.MatchString(text), "" 242 } 243 244 func matchFullLongest(re, refull *Regexp, text string) (bool, string) { 245 refull.longest = true 246 return refull.MatchString(text), "[full,longest]" 247 } 248 249 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) { 250 re.longest = true 251 return re.MatchString(text), "[longest]" 252 } 253 254 func isSingleBytes(s string) bool { 255 for _, c := range s { 256 if c >= utf8.RuneSelf { 257 return false 258 } 259 } 260 return true 261 } 262 263 func tryCompile(s string) (re *Regexp, err error) { 264 // Protect against panic during Compile. 265 defer func() { 266 if r := recover(); r != nil { 267 err = fmt.Errorf("panic: %v", r) 268 } 269 }() 270 return Compile(s) 271 } 272 273 func parseResult(t *testing.T, file string, lineno int, res string) []int { 274 // A single - indicates no match. 275 if res == "-" { 276 return nil 277 } 278 // Otherwise, a space-separated list of pairs. 279 n := 1 280 for j := 0; j < len(res); j++ { 281 if res[j] == ' ' { 282 n++ 283 } 284 } 285 out := make([]int, 2*n) 286 i := 0 287 n = 0 288 for j := 0; j <= len(res); j++ { 289 if j == len(res) || res[j] == ' ' { 290 // Process a single pair. - means no submatch. 291 pair := res[i:j] 292 if pair == "-" { 293 out[n] = -1 294 out[n+1] = -1 295 } else { 296 k := strings.Index(pair, "-") 297 if k < 0 { 298 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) 299 } 300 lo, err1 := strconv.Atoi(pair[:k]) 301 hi, err2 := strconv.Atoi(pair[k+1:]) 302 if err1 != nil || err2 != nil || lo > hi { 303 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) 304 } 305 out[n] = lo 306 out[n+1] = hi 307 } 308 n += 2 309 i = j + 1 310 } 311 } 312 return out 313 } 314 315 func same(x, y []int) bool { 316 if len(x) != len(y) { 317 return false 318 } 319 for i, xi := range x { 320 if xi != y[i] { 321 return false 322 } 323 } 324 return true 325 } 326 327 // TestFowler runs this package's regexp API against the 328 // POSIX regular expression tests collected by Glenn Fowler 329 // at http://www2.research.att.com/~astopen/testregex/testregex.html. 330 func TestFowler(t *testing.T) { 331 files, err := filepath.Glob("testdata/*.dat") 332 if err != nil { 333 t.Fatal(err) 334 } 335 for _, file := range files { 336 t.Log(file) 337 testFowler(t, file) 338 } 339 } 340 341 var notab = MustCompilePOSIX(`[^\t]+`) 342 343 func testFowler(t *testing.T, file string) { 344 f, err := os.Open(file) 345 if err != nil { 346 t.Error(err) 347 return 348 } 349 defer f.Close() 350 b := bufio.NewReader(f) 351 lineno := 0 352 lastRegexp := "" 353 Reading: 354 for { 355 lineno++ 356 line, err := b.ReadString('\n') 357 if err != nil { 358 if err != io.EOF { 359 t.Errorf("%s:%d: %v", file, lineno, err) 360 } 361 break Reading 362 } 363 364 // http://www2.research.att.com/~astopen/man/man1/testregex.html 365 // 366 // INPUT FORMAT 367 // Input lines may be blank, a comment beginning with #, or a test 368 // specification. A specification is five fields separated by one 369 // or more tabs. NULL denotes the empty string and NIL denotes the 370 // 0 pointer. 371 if line[0] == '#' || line[0] == '\n' { 372 continue Reading 373 } 374 line = line[:len(line)-1] 375 field := notab.FindAllString(line, -1) 376 for i, f := range field { 377 if f == "NULL" { 378 field[i] = "" 379 } 380 if f == "NIL" { 381 t.Logf("%s:%d: skip: %s", file, lineno, line) 382 continue Reading 383 } 384 } 385 if len(field) == 0 { 386 continue Reading 387 } 388 389 // Field 1: the regex(3) flags to apply, one character per REG_feature 390 // flag. The test is skipped if REG_feature is not supported by the 391 // implementation. If the first character is not [BEASKLP] then the 392 // specification is a global control line. One or more of [BEASKLP] may be 393 // specified; the test will be repeated for each mode. 394 // 395 // B basic BRE (grep, ed, sed) 396 // E REG_EXTENDED ERE (egrep) 397 // A REG_AUGMENTED ARE (egrep with negation) 398 // S REG_SHELL SRE (sh glob) 399 // K REG_SHELL|REG_AUGMENTED KRE (ksh glob) 400 // L REG_LITERAL LRE (fgrep) 401 // 402 // a REG_LEFT|REG_RIGHT implicit ^...$ 403 // b REG_NOTBOL lhs does not match ^ 404 // c REG_COMMENT ignore space and #...\n 405 // d REG_SHELL_DOT explicit leading . match 406 // e REG_NOTEOL rhs does not match $ 407 // f REG_MULTIPLE multiple \n separated patterns 408 // g FNM_LEADING_DIR testfnmatch only -- match until / 409 // h REG_MULTIREF multiple digit backref 410 // i REG_ICASE ignore case 411 // j REG_SPAN . matches \n 412 // k REG_ESCAPE \ to ecape [...] delimiter 413 // l REG_LEFT implicit ^... 414 // m REG_MINIMAL minimal match 415 // n REG_NEWLINE explicit \n match 416 // o REG_ENCLOSED (|&) magic inside [@|&](...) 417 // p REG_SHELL_PATH explicit / match 418 // q REG_DELIMITED delimited pattern 419 // r REG_RIGHT implicit ...$ 420 // s REG_SHELL_ESCAPED \ not special 421 // t REG_MUSTDELIM all delimiters must be specified 422 // u standard unspecified behavior -- errors not counted 423 // v REG_CLASS_ESCAPE \ special inside [...] 424 // w REG_NOSUB no subexpression match array 425 // x REG_LENIENT let some errors slide 426 // y REG_LEFT regexec() implicit ^... 427 // z REG_NULL NULL subexpressions ok 428 // $ expand C \c escapes in fields 2 and 3 429 // / field 2 is a regsubcomp() expression 430 // = field 3 is a regdecomp() expression 431 // 432 // Field 1 control lines: 433 // 434 // C set LC_COLLATE and LC_CTYPE to locale in field 2 435 // 436 // ?test ... output field 5 if passed and != EXPECTED, silent otherwise 437 // &test ... output field 5 if current and previous passed 438 // |test ... output field 5 if current passed and previous failed 439 // ; ... output field 2 if previous failed 440 // {test ... skip if failed until } 441 // } end of skip 442 // 443 // : comment comment copied as output NOTE 444 // :comment:test :comment: ignored 445 // N[OTE] comment comment copied as output NOTE 446 // T[EST] comment comment 447 // 448 // number use number for nmatch (20 by default) 449 flag := field[0] 450 switch flag[0] { 451 case '?', '&', '|', ';', '{', '}': 452 // Ignore all the control operators. 453 // Just run everything. 454 flag = flag[1:] 455 if flag == "" { 456 continue Reading 457 } 458 case ':': 459 i := strings.Index(flag[1:], ":") 460 if i < 0 { 461 t.Logf("skip: %s", line) 462 continue Reading 463 } 464 flag = flag[1+i+1:] 465 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 466 t.Logf("skip: %s", line) 467 continue Reading 468 } 469 470 // Can check field count now that we've handled the myriad comment formats. 471 if len(field) < 4 { 472 t.Errorf("%s:%d: too few fields: %s", file, lineno, line) 473 continue Reading 474 } 475 476 // Expand C escapes (a.k.a. Go escapes). 477 if strings.Contains(flag, "$") { 478 f := `"` + field[1] + `"` 479 if field[1], err = strconv.Unquote(f); err != nil { 480 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f) 481 } 482 f = `"` + field[2] + `"` 483 if field[2], err = strconv.Unquote(f); err != nil { 484 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f) 485 } 486 } 487 488 // Field 2: the regular expression pattern; SAME uses the pattern from 489 // the previous specification. 490 // 491 if field[1] == "SAME" { 492 field[1] = lastRegexp 493 } 494 lastRegexp = field[1] 495 496 // Field 3: the string to match. 497 text := field[2] 498 499 // Field 4: the test outcome... 500 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3]) 501 if !ok { 502 t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3]) 503 continue Reading 504 } 505 506 // Field 5: optional comment appended to the report. 507 508 Testing: 509 // Run test once for each specified capital letter mode that we support. 510 for _, c := range flag { 511 pattern := field[1] 512 syn := syntax.POSIX | syntax.ClassNL 513 switch c { 514 default: 515 continue Testing 516 case 'E': 517 // extended regexp (what we support) 518 case 'L': 519 // literal 520 pattern = QuoteMeta(pattern) 521 } 522 523 for _, c := range flag { 524 switch c { 525 case 'i': 526 syn |= syntax.FoldCase 527 } 528 } 529 530 re, err := compile(pattern, syn, true) 531 if err != nil { 532 if shouldCompile { 533 t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern) 534 } 535 continue Testing 536 } 537 if !shouldCompile { 538 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern) 539 continue Testing 540 } 541 match := re.MatchString(text) 542 if match != shouldMatch { 543 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch) 544 continue Testing 545 } 546 have := re.FindStringSubmatchIndex(text) 547 if (len(have) > 0) != match { 548 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have) 549 continue Testing 550 } 551 if len(have) > len(pos) { 552 have = have[:len(pos)] 553 } 554 if !same(have, pos) { 555 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos) 556 } 557 } 558 } 559 } 560 561 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) { 562 // Field 4: the test outcome. This is either one of the posix error 563 // codes (with REG_ omitted) or the match array, a list of (m,n) 564 // entries with m and n being first and last+1 positions in the 565 // field 3 string, or NULL if REG_NOSUB is in effect and success 566 // is expected. BADPAT is acceptable in place of any regcomp(3) 567 // error code. The match[] array is initialized to (-2,-2) before 568 // each test. All array elements from 0 to nmatch-1 must be specified 569 // in the outcome. Unspecified endpoints (offset -1) are denoted by ?. 570 // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a 571 // matched (?{...}) expression, where x is the text enclosed by {...}, 572 // o is the expression ordinal counting from 1, and n is the length of 573 // the unmatched portion of the subject string. If x starts with a 574 // number then that is the return value of re_execf(), otherwise 0 is 575 // returned. 576 switch { 577 case s == "": 578 // Match with no position information. 579 ok = true 580 compiled = true 581 matched = true 582 return 583 case s == "NOMATCH": 584 // Match failure. 585 ok = true 586 compiled = true 587 matched = false 588 return 589 case 'A' <= s[0] && s[0] <= 'Z': 590 // All the other error codes are compile errors. 591 ok = true 592 compiled = false 593 return 594 } 595 compiled = true 596 597 var x []int 598 for s != "" { 599 var end byte = ')' 600 if len(x)%2 == 0 { 601 if s[0] != '(' { 602 ok = false 603 return 604 } 605 s = s[1:] 606 end = ',' 607 } 608 i := 0 609 for i < len(s) && s[i] != end { 610 i++ 611 } 612 if i == 0 || i == len(s) { 613 ok = false 614 return 615 } 616 var v = -1 617 var err error 618 if s[:i] != "?" { 619 v, err = strconv.Atoi(s[:i]) 620 if err != nil { 621 ok = false 622 return 623 } 624 } 625 x = append(x, v) 626 s = s[i+1:] 627 } 628 if len(x)%2 != 0 { 629 ok = false 630 return 631 } 632 ok = true 633 matched = true 634 pos = x 635 return 636 } 637 638 var text []byte 639 640 func makeText(n int) []byte { 641 if len(text) >= n { 642 return text[:n] 643 } 644 text = make([]byte, n) 645 x := ^uint32(0) 646 for i := range text { 647 x += x 648 x ^= 1 649 if int32(x) < 0 { 650 x ^= 0x88888eef 651 } 652 if x%31 == 0 { 653 text[i] = '\n' 654 } else { 655 text[i] = byte(x%(0x7E+1-0x20) + 0x20) 656 } 657 } 658 return text 659 } 660 661 func benchmark(b *testing.B, re string, n int) { 662 r := MustCompile(re) 663 t := makeText(n) 664 b.ResetTimer() 665 b.SetBytes(int64(n)) 666 for i := 0; i < b.N; i++ { 667 if r.Match(t) { 668 b.Fatal("match!") 669 } 670 } 671 } 672 673 const ( 674 easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" 675 easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" 676 medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" 677 hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" 678 parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" + 679 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" 680 ) 681 682 func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) } 683 func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } 684 func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } 685 func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } 686 func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } 687 func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) } 688 func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } 689 func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } 690 func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) } 691 func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) } 692 func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) } 693 func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) } 694 func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) } 695 func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) } 696 func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) } 697 func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) } 698 func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } 699 func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } 700 func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } 701 func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } 702 703 func TestLongest(t *testing.T) { 704 re, err := Compile(`a(|b)`) 705 if err != nil { 706 t.Fatal(err) 707 } 708 if g, w := re.FindString("ab"), "a"; g != w { 709 t.Errorf("first match was %q, want %q", g, w) 710 } 711 re.Longest() 712 if g, w := re.FindString("ab"), "ab"; g != w { 713 t.Errorf("longest match was %q, want %q", g, w) 714 } 715 } 716 717 // TestProgramTooLongForBacktrack tests that a regex which is too long 718 // for the backtracker still executes properly. 719 func TestProgramTooLongForBacktrack(t *testing.T) { 720 longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`) 721 if !longRegex.MatchString("two") { 722 t.Errorf("longRegex.MatchString(\"two\") was false, want true") 723 } 724 if longRegex.MatchString("xxx") { 725 t.Errorf("longRegex.MatchString(\"xxx\") was true, want false") 726 } 727 } 728