Home | History | Annotate | Download | only in regexp
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package regexp
      6 
      7 import (
      8 	"bufio"
      9 	"compress/bzip2"
     10 	"fmt"
     11 	"io"
     12 	"os"
     13 	"path/filepath"
     14 	"regexp/syntax"
     15 	"strconv"
     16 	"strings"
     17 	"testing"
     18 	"unicode/utf8"
     19 )
     20 
     21 // TestRE2 tests this package's regexp API against test cases
     22 // considered during RE2's exhaustive tests, which run all possible
     23 // regexps over a given set of atoms and operators, up to a given
     24 // complexity, over all possible strings over a given alphabet,
     25 // up to a given size.  Rather than try to link with RE2, we read a
     26 // log file containing the test cases and the expected matches.
     27 // The log file, re2-exhaustive.txt, is generated by running 'make log'
     28 // in the open source RE2 distribution https://github.com/google/re2/.
     29 //
     30 // The test file format is a sequence of stanzas like:
     31 //
     32 //	strings
     33 //	"abc"
     34 //	"123x"
     35 //	regexps
     36 //	"[a-z]+"
     37 //	0-3;0-3
     38 //	-;-
     39 //	"([0-9])([0-9])([0-9])"
     40 //	-;-
     41 //	-;0-3 0-1 1-2 2-3
     42 //
     43 // The stanza begins by defining a set of strings, quoted
     44 // using Go double-quote syntax, one per line.  Then the
     45 // regexps section gives a sequence of regexps to run on
     46 // the strings.  In the block that follows a regexp, each line
     47 // gives the semicolon-separated match results of running
     48 // the regexp on the corresponding string.
     49 // Each match result is either a single -, meaning no match, or a
     50 // space-separated sequence of pairs giving the match and
     51 // submatch indices.  An unmatched subexpression formats
     52 // its pair as a single - (not illustrated above).  For now
     53 // each regexp run produces two match results, one for a
     54 // ``full match'' that restricts the regexp to matching the entire
     55 // string or nothing, and one for a ``partial match'' that gives
     56 // the leftmost first match found in the string.
     57 //
     58 // Lines beginning with # are comments.  Lines beginning with
     59 // a capital letter are test names printed during RE2's test suite
     60 // and are echoed into t but otherwise ignored.
     61 //
     62 // At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
     63 // so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
     64 //
     65 func TestRE2Search(t *testing.T) {
     66 	testRE2(t, "testdata/re2-search.txt")
     67 }
     68 
     69 func testRE2(t *testing.T, file string) {
     70 	f, err := os.Open(file)
     71 	if err != nil {
     72 		t.Fatal(err)
     73 	}
     74 	defer f.Close()
     75 	var txt io.Reader
     76 	if strings.HasSuffix(file, ".bz2") {
     77 		z := bzip2.NewReader(f)
     78 		txt = z
     79 		file = file[:len(file)-len(".bz2")] // for error messages
     80 	} else {
     81 		txt = f
     82 	}
     83 	lineno := 0
     84 	scanner := bufio.NewScanner(txt)
     85 	var (
     86 		str       []string
     87 		input     []string
     88 		inStrings bool
     89 		re        *Regexp
     90 		refull    *Regexp
     91 		nfail     int
     92 		ncase     int
     93 	)
     94 	for lineno := 1; scanner.Scan(); lineno++ {
     95 		line := scanner.Text()
     96 		switch {
     97 		case line == "":
     98 			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
     99 		case line[0] == '#':
    100 			continue
    101 		case 'A' <= line[0] && line[0] <= 'Z':
    102 			// Test name.
    103 			t.Logf("%s\n", line)
    104 			continue
    105 		case line == "strings":
    106 			str = str[:0]
    107 			inStrings = true
    108 		case line == "regexps":
    109 			inStrings = false
    110 		case line[0] == '"':
    111 			q, err := strconv.Unquote(line)
    112 			if err != nil {
    113 				// Fatal because we'll get out of sync.
    114 				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
    115 			}
    116 			if inStrings {
    117 				str = append(str, q)
    118 				continue
    119 			}
    120 			// Is a regexp.
    121 			if len(input) != 0 {
    122 				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
    123 			}
    124 			re, err = tryCompile(q)
    125 			if err != nil {
    126 				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
    127 					// We don't and likely never will support \C; keep going.
    128 					continue
    129 				}
    130 				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
    131 				if nfail++; nfail >= 100 {
    132 					t.Fatalf("stopping after %d errors", nfail)
    133 				}
    134 				continue
    135 			}
    136 			full := `\A(?:` + q + `)\z`
    137 			refull, err = tryCompile(full)
    138 			if err != nil {
    139 				// Fatal because q worked, so this should always work.
    140 				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
    141 			}
    142 			input = str
    143 		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
    144 			// A sequence of match results.
    145 			ncase++
    146 			if re == nil {
    147 				// Failed to compile: skip results.
    148 				continue
    149 			}
    150 			if len(input) == 0 {
    151 				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
    152 			}
    153 			var text string
    154 			text, input = input[0], input[1:]
    155 			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
    156 				// RE2's \B considers every byte position,
    157 				// so it sees 'not word boundary' in the
    158 				// middle of UTF-8 sequences.  This package
    159 				// only considers the positions between runes,
    160 				// so it disagrees.  Skip those cases.
    161 				continue
    162 			}
    163 			res := strings.Split(line, ";")
    164 			if len(res) != len(run) {
    165 				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
    166 			}
    167 			for i := range res {
    168 				have, suffix := run[i](re, refull, text)
    169 				want := parseResult(t, file, lineno, res[i])
    170 				if !same(have, want) {
    171 					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
    172 					if nfail++; nfail >= 100 {
    173 						t.Fatalf("stopping after %d errors", nfail)
    174 					}
    175 					continue
    176 				}
    177 				b, suffix := match[i](re, refull, text)
    178 				if b != (want != nil) {
    179 					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
    180 					if nfail++; nfail >= 100 {
    181 						t.Fatalf("stopping after %d errors", nfail)
    182 					}
    183 					continue
    184 				}
    185 			}
    186 
    187 		default:
    188 			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
    189 		}
    190 	}
    191 	if err := scanner.Err(); err != nil {
    192 		t.Fatalf("%s:%d: %v", file, lineno, err)
    193 	}
    194 	if len(input) != 0 {
    195 		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
    196 	}
    197 	t.Logf("%d cases tested", ncase)
    198 }
    199 
    200 var run = []func(*Regexp, *Regexp, string) ([]int, string){
    201 	runFull,
    202 	runPartial,
    203 	runFullLongest,
    204 	runPartialLongest,
    205 }
    206 
    207 func runFull(re, refull *Regexp, text string) ([]int, string) {
    208 	refull.longest = false
    209 	return refull.FindStringSubmatchIndex(text), "[full]"
    210 }
    211 
    212 func runPartial(re, refull *Regexp, text string) ([]int, string) {
    213 	re.longest = false
    214 	return re.FindStringSubmatchIndex(text), ""
    215 }
    216 
    217 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
    218 	refull.longest = true
    219 	return refull.FindStringSubmatchIndex(text), "[full,longest]"
    220 }
    221 
    222 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
    223 	re.longest = true
    224 	return re.FindStringSubmatchIndex(text), "[longest]"
    225 }
    226 
    227 var match = []func(*Regexp, *Regexp, string) (bool, string){
    228 	matchFull,
    229 	matchPartial,
    230 	matchFullLongest,
    231 	matchPartialLongest,
    232 }
    233 
    234 func matchFull(re, refull *Regexp, text string) (bool, string) {
    235 	refull.longest = false
    236 	return refull.MatchString(text), "[full]"
    237 }
    238 
    239 func matchPartial(re, refull *Regexp, text string) (bool, string) {
    240 	re.longest = false
    241 	return re.MatchString(text), ""
    242 }
    243 
    244 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
    245 	refull.longest = true
    246 	return refull.MatchString(text), "[full,longest]"
    247 }
    248 
    249 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
    250 	re.longest = true
    251 	return re.MatchString(text), "[longest]"
    252 }
    253 
    254 func isSingleBytes(s string) bool {
    255 	for _, c := range s {
    256 		if c >= utf8.RuneSelf {
    257 			return false
    258 		}
    259 	}
    260 	return true
    261 }
    262 
    263 func tryCompile(s string) (re *Regexp, err error) {
    264 	// Protect against panic during Compile.
    265 	defer func() {
    266 		if r := recover(); r != nil {
    267 			err = fmt.Errorf("panic: %v", r)
    268 		}
    269 	}()
    270 	return Compile(s)
    271 }
    272 
    273 func parseResult(t *testing.T, file string, lineno int, res string) []int {
    274 	// A single - indicates no match.
    275 	if res == "-" {
    276 		return nil
    277 	}
    278 	// Otherwise, a space-separated list of pairs.
    279 	n := 1
    280 	for j := 0; j < len(res); j++ {
    281 		if res[j] == ' ' {
    282 			n++
    283 		}
    284 	}
    285 	out := make([]int, 2*n)
    286 	i := 0
    287 	n = 0
    288 	for j := 0; j <= len(res); j++ {
    289 		if j == len(res) || res[j] == ' ' {
    290 			// Process a single pair.  - means no submatch.
    291 			pair := res[i:j]
    292 			if pair == "-" {
    293 				out[n] = -1
    294 				out[n+1] = -1
    295 			} else {
    296 				k := strings.Index(pair, "-")
    297 				if k < 0 {
    298 					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
    299 				}
    300 				lo, err1 := strconv.Atoi(pair[:k])
    301 				hi, err2 := strconv.Atoi(pair[k+1:])
    302 				if err1 != nil || err2 != nil || lo > hi {
    303 					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
    304 				}
    305 				out[n] = lo
    306 				out[n+1] = hi
    307 			}
    308 			n += 2
    309 			i = j + 1
    310 		}
    311 	}
    312 	return out
    313 }
    314 
    315 func same(x, y []int) bool {
    316 	if len(x) != len(y) {
    317 		return false
    318 	}
    319 	for i, xi := range x {
    320 		if xi != y[i] {
    321 			return false
    322 		}
    323 	}
    324 	return true
    325 }
    326 
    327 // TestFowler runs this package's regexp API against the
    328 // POSIX regular expression tests collected by Glenn Fowler
    329 // at http://www2.research.att.com/~astopen/testregex/testregex.html.
    330 func TestFowler(t *testing.T) {
    331 	files, err := filepath.Glob("testdata/*.dat")
    332 	if err != nil {
    333 		t.Fatal(err)
    334 	}
    335 	for _, file := range files {
    336 		t.Log(file)
    337 		testFowler(t, file)
    338 	}
    339 }
    340 
    341 var notab = MustCompilePOSIX(`[^\t]+`)
    342 
    343 func testFowler(t *testing.T, file string) {
    344 	f, err := os.Open(file)
    345 	if err != nil {
    346 		t.Error(err)
    347 		return
    348 	}
    349 	defer f.Close()
    350 	b := bufio.NewReader(f)
    351 	lineno := 0
    352 	lastRegexp := ""
    353 Reading:
    354 	for {
    355 		lineno++
    356 		line, err := b.ReadString('\n')
    357 		if err != nil {
    358 			if err != io.EOF {
    359 				t.Errorf("%s:%d: %v", file, lineno, err)
    360 			}
    361 			break Reading
    362 		}
    363 
    364 		// http://www2.research.att.com/~astopen/man/man1/testregex.html
    365 		//
    366 		// INPUT FORMAT
    367 		//   Input lines may be blank, a comment beginning with #, or a test
    368 		//   specification. A specification is five fields separated by one
    369 		//   or more tabs. NULL denotes the empty string and NIL denotes the
    370 		//   0 pointer.
    371 		if line[0] == '#' || line[0] == '\n' {
    372 			continue Reading
    373 		}
    374 		line = line[:len(line)-1]
    375 		field := notab.FindAllString(line, -1)
    376 		for i, f := range field {
    377 			if f == "NULL" {
    378 				field[i] = ""
    379 			}
    380 			if f == "NIL" {
    381 				t.Logf("%s:%d: skip: %s", file, lineno, line)
    382 				continue Reading
    383 			}
    384 		}
    385 		if len(field) == 0 {
    386 			continue Reading
    387 		}
    388 
    389 		//   Field 1: the regex(3) flags to apply, one character per REG_feature
    390 		//   flag. The test is skipped if REG_feature is not supported by the
    391 		//   implementation. If the first character is not [BEASKLP] then the
    392 		//   specification is a global control line. One or more of [BEASKLP] may be
    393 		//   specified; the test will be repeated for each mode.
    394 		//
    395 		//     B 	basic			BRE	(grep, ed, sed)
    396 		//     E 	REG_EXTENDED		ERE	(egrep)
    397 		//     A	REG_AUGMENTED		ARE	(egrep with negation)
    398 		//     S	REG_SHELL		SRE	(sh glob)
    399 		//     K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
    400 		//     L	REG_LITERAL		LRE	(fgrep)
    401 		//
    402 		//     a	REG_LEFT|REG_RIGHT	implicit ^...$
    403 		//     b	REG_NOTBOL		lhs does not match ^
    404 		//     c	REG_COMMENT		ignore space and #...\n
    405 		//     d	REG_SHELL_DOT		explicit leading . match
    406 		//     e	REG_NOTEOL		rhs does not match $
    407 		//     f	REG_MULTIPLE		multiple \n separated patterns
    408 		//     g	FNM_LEADING_DIR		testfnmatch only -- match until /
    409 		//     h	REG_MULTIREF		multiple digit backref
    410 		//     i	REG_ICASE		ignore case
    411 		//     j	REG_SPAN		. matches \n
    412 		//     k	REG_ESCAPE		\ to ecape [...] delimiter
    413 		//     l	REG_LEFT		implicit ^...
    414 		//     m	REG_MINIMAL		minimal match
    415 		//     n	REG_NEWLINE		explicit \n match
    416 		//     o	REG_ENCLOSED		(|&) magic inside [@|&](...)
    417 		//     p	REG_SHELL_PATH		explicit / match
    418 		//     q	REG_DELIMITED		delimited pattern
    419 		//     r	REG_RIGHT		implicit ...$
    420 		//     s	REG_SHELL_ESCAPED	\ not special
    421 		//     t	REG_MUSTDELIM		all delimiters must be specified
    422 		//     u	standard unspecified behavior -- errors not counted
    423 		//     v	REG_CLASS_ESCAPE	\ special inside [...]
    424 		//     w	REG_NOSUB		no subexpression match array
    425 		//     x	REG_LENIENT		let some errors slide
    426 		//     y	REG_LEFT		regexec() implicit ^...
    427 		//     z	REG_NULL		NULL subexpressions ok
    428 		//     $	                        expand C \c escapes in fields 2 and 3
    429 		//     /	                        field 2 is a regsubcomp() expression
    430 		//     =	                        field 3 is a regdecomp() expression
    431 		//
    432 		//   Field 1 control lines:
    433 		//
    434 		//     C		set LC_COLLATE and LC_CTYPE to locale in field 2
    435 		//
    436 		//     ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
    437 		//     &test ...	output field 5 if current and previous passed
    438 		//     |test ...	output field 5 if current passed and previous failed
    439 		//     ; ...	output field 2 if previous failed
    440 		//     {test ...	skip if failed until }
    441 		//     }		end of skip
    442 		//
    443 		//     : comment		comment copied as output NOTE
    444 		//     :comment:test	:comment: ignored
    445 		//     N[OTE] comment	comment copied as output NOTE
    446 		//     T[EST] comment	comment
    447 		//
    448 		//     number		use number for nmatch (20 by default)
    449 		flag := field[0]
    450 		switch flag[0] {
    451 		case '?', '&', '|', ';', '{', '}':
    452 			// Ignore all the control operators.
    453 			// Just run everything.
    454 			flag = flag[1:]
    455 			if flag == "" {
    456 				continue Reading
    457 			}
    458 		case ':':
    459 			i := strings.Index(flag[1:], ":")
    460 			if i < 0 {
    461 				t.Logf("skip: %s", line)
    462 				continue Reading
    463 			}
    464 			flag = flag[1+i+1:]
    465 		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
    466 			t.Logf("skip: %s", line)
    467 			continue Reading
    468 		}
    469 
    470 		// Can check field count now that we've handled the myriad comment formats.
    471 		if len(field) < 4 {
    472 			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
    473 			continue Reading
    474 		}
    475 
    476 		// Expand C escapes (a.k.a. Go escapes).
    477 		if strings.Contains(flag, "$") {
    478 			f := `"` + field[1] + `"`
    479 			if field[1], err = strconv.Unquote(f); err != nil {
    480 				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
    481 			}
    482 			f = `"` + field[2] + `"`
    483 			if field[2], err = strconv.Unquote(f); err != nil {
    484 				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
    485 			}
    486 		}
    487 
    488 		//   Field 2: the regular expression pattern; SAME uses the pattern from
    489 		//     the previous specification.
    490 		//
    491 		if field[1] == "SAME" {
    492 			field[1] = lastRegexp
    493 		}
    494 		lastRegexp = field[1]
    495 
    496 		//   Field 3: the string to match.
    497 		text := field[2]
    498 
    499 		//   Field 4: the test outcome...
    500 		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
    501 		if !ok {
    502 			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
    503 			continue Reading
    504 		}
    505 
    506 		//   Field 5: optional comment appended to the report.
    507 
    508 	Testing:
    509 		// Run test once for each specified capital letter mode that we support.
    510 		for _, c := range flag {
    511 			pattern := field[1]
    512 			syn := syntax.POSIX | syntax.ClassNL
    513 			switch c {
    514 			default:
    515 				continue Testing
    516 			case 'E':
    517 				// extended regexp (what we support)
    518 			case 'L':
    519 				// literal
    520 				pattern = QuoteMeta(pattern)
    521 			}
    522 
    523 			for _, c := range flag {
    524 				switch c {
    525 				case 'i':
    526 					syn |= syntax.FoldCase
    527 				}
    528 			}
    529 
    530 			re, err := compile(pattern, syn, true)
    531 			if err != nil {
    532 				if shouldCompile {
    533 					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
    534 				}
    535 				continue Testing
    536 			}
    537 			if !shouldCompile {
    538 				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
    539 				continue Testing
    540 			}
    541 			match := re.MatchString(text)
    542 			if match != shouldMatch {
    543 				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
    544 				continue Testing
    545 			}
    546 			have := re.FindStringSubmatchIndex(text)
    547 			if (len(have) > 0) != match {
    548 				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
    549 				continue Testing
    550 			}
    551 			if len(have) > len(pos) {
    552 				have = have[:len(pos)]
    553 			}
    554 			if !same(have, pos) {
    555 				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
    556 			}
    557 		}
    558 	}
    559 }
    560 
    561 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
    562 	//   Field 4: the test outcome. This is either one of the posix error
    563 	//     codes (with REG_ omitted) or the match array, a list of (m,n)
    564 	//     entries with m and n being first and last+1 positions in the
    565 	//     field 3 string, or NULL if REG_NOSUB is in effect and success
    566 	//     is expected. BADPAT is acceptable in place of any regcomp(3)
    567 	//     error code. The match[] array is initialized to (-2,-2) before
    568 	//     each test. All array elements from 0 to nmatch-1 must be specified
    569 	//     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
    570 	//     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
    571 	//     matched (?{...}) expression, where x is the text enclosed by {...},
    572 	//     o is the expression ordinal counting from 1, and n is the length of
    573 	//     the unmatched portion of the subject string. If x starts with a
    574 	//     number then that is the return value of re_execf(), otherwise 0 is
    575 	//     returned.
    576 	switch {
    577 	case s == "":
    578 		// Match with no position information.
    579 		ok = true
    580 		compiled = true
    581 		matched = true
    582 		return
    583 	case s == "NOMATCH":
    584 		// Match failure.
    585 		ok = true
    586 		compiled = true
    587 		matched = false
    588 		return
    589 	case 'A' <= s[0] && s[0] <= 'Z':
    590 		// All the other error codes are compile errors.
    591 		ok = true
    592 		compiled = false
    593 		return
    594 	}
    595 	compiled = true
    596 
    597 	var x []int
    598 	for s != "" {
    599 		var end byte = ')'
    600 		if len(x)%2 == 0 {
    601 			if s[0] != '(' {
    602 				ok = false
    603 				return
    604 			}
    605 			s = s[1:]
    606 			end = ','
    607 		}
    608 		i := 0
    609 		for i < len(s) && s[i] != end {
    610 			i++
    611 		}
    612 		if i == 0 || i == len(s) {
    613 			ok = false
    614 			return
    615 		}
    616 		var v = -1
    617 		var err error
    618 		if s[:i] != "?" {
    619 			v, err = strconv.Atoi(s[:i])
    620 			if err != nil {
    621 				ok = false
    622 				return
    623 			}
    624 		}
    625 		x = append(x, v)
    626 		s = s[i+1:]
    627 	}
    628 	if len(x)%2 != 0 {
    629 		ok = false
    630 		return
    631 	}
    632 	ok = true
    633 	matched = true
    634 	pos = x
    635 	return
    636 }
    637 
    638 var text []byte
    639 
    640 func makeText(n int) []byte {
    641 	if len(text) >= n {
    642 		return text[:n]
    643 	}
    644 	text = make([]byte, n)
    645 	x := ^uint32(0)
    646 	for i := range text {
    647 		x += x
    648 		x ^= 1
    649 		if int32(x) < 0 {
    650 			x ^= 0x88888eef
    651 		}
    652 		if x%31 == 0 {
    653 			text[i] = '\n'
    654 		} else {
    655 			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
    656 		}
    657 	}
    658 	return text
    659 }
    660 
    661 func benchmark(b *testing.B, re string, n int) {
    662 	r := MustCompile(re)
    663 	t := makeText(n)
    664 	b.ResetTimer()
    665 	b.SetBytes(int64(n))
    666 	for i := 0; i < b.N; i++ {
    667 		if r.Match(t) {
    668 			b.Fatal("match!")
    669 		}
    670 	}
    671 }
    672 
    673 const (
    674 	easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
    675 	easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
    676 	medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
    677 	hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
    678 	parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
    679 		"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
    680 )
    681 
    682 func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
    683 func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
    684 func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
    685 func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
    686 func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
    687 func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
    688 func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
    689 func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
    690 func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
    691 func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
    692 func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 32<<0) }
    693 func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
    694 func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
    695 func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
    696 func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
    697 func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
    698 func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
    699 func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
    700 func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
    701 func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }
    702 
    703 func TestLongest(t *testing.T) {
    704 	re, err := Compile(`a(|b)`)
    705 	if err != nil {
    706 		t.Fatal(err)
    707 	}
    708 	if g, w := re.FindString("ab"), "a"; g != w {
    709 		t.Errorf("first match was %q, want %q", g, w)
    710 	}
    711 	re.Longest()
    712 	if g, w := re.FindString("ab"), "ab"; g != w {
    713 		t.Errorf("longest match was %q, want %q", g, w)
    714 	}
    715 }
    716 
    717 // TestProgramTooLongForBacktrack tests that a regex which is too long
    718 // for the backtracker still executes properly.
    719 func TestProgramTooLongForBacktrack(t *testing.T) {
    720 	longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
    721 	if !longRegex.MatchString("two") {
    722 		t.Errorf("longRegex.MatchString(\"two\") was false, want true")
    723 	}
    724 	if longRegex.MatchString("xxx") {
    725 		t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
    726 	}
    727 }
    728