Home | History | Annotate | Download | only in regexp
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package regexp
      6 
      7 import (
      8 	"fmt"
      9 	"strings"
     10 	"testing"
     11 )
     12 
     13 // For each pattern/text pair, what is the expected output of each function?
     14 // We can derive the textual results from the indexed results, the non-submatch
     15 // results from the submatched results, the single results from the 'all' results,
     16 // and the byte results from the string results. Therefore the table includes
     17 // only the FindAllStringSubmatchIndex result.
     18 type FindTest struct {
     19 	pat     string
     20 	text    string
     21 	matches [][]int
     22 }
     23 
     24 func (t FindTest) String() string {
     25 	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
     26 }
     27 
     28 var findTests = []FindTest{
     29 	{``, ``, build(1, 0, 0)},
     30 	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
     31 	{`a+`, "baaab", build(1, 1, 4)},
     32 	{"abcd..", "abcdef", build(1, 0, 6)},
     33 	{`a`, "a", build(1, 0, 1)},
     34 	{`x`, "y", nil},
     35 	{`b`, "abc", build(1, 1, 2)},
     36 	{`.`, "a", build(1, 0, 1)},
     37 	{`.*`, "abcdef", build(1, 0, 6)},
     38 	{`^`, "abcde", build(1, 0, 0)},
     39 	{`$`, "abcde", build(1, 5, 5)},
     40 	{`^abcd$`, "abcd", build(1, 0, 4)},
     41 	{`^bcd'`, "abcdef", nil},
     42 	{`^abcd$`, "abcde", nil},
     43 	{`a+`, "baaab", build(1, 1, 4)},
     44 	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
     45 	{`[a-z]+`, "abcd", build(1, 0, 4)},
     46 	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
     47 	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
     48 	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
     49 	{`[]+`, "", build(1, 0, 18)},
     50 	{`+`, "", build(1, 0, 9)},
     51 	{`+`, "", build(1, 0, 18)},
     52 	{`()`, "", build(1, 0, 0, 0, 0)},
     53 	{`(a)`, "a", build(1, 0, 1, 0, 1)},
     54 	{`(.)(.)`, "a", build(1, 0, 4, 0, 3, 3, 4)},
     55 	{`(.*)`, "", build(1, 0, 0, 0, 0)},
     56 	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
     57 	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
     58 	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
     59 	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
     60 	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
     61 	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
     62 	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
     63 
     64 	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
     65 	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
     66 	{`[.]`, ".", build(1, 0, 1)},
     67 	{`/$`, "/abc/", build(1, 4, 5)},
     68 	{`/$`, "/abc", nil},
     69 
     70 	// multiple matches
     71 	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
     72 	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
     73 	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
     74 	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
     75 	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
     76 
     77 	// fixed bugs
     78 	{`ab$`, "cab", build(1, 1, 3)},
     79 	{`axxb$`, "axxcb", nil},
     80 	{`data`, "daXY data", build(1, 5, 9)},
     81 	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
     82 	{`zx+`, "zzx", build(1, 1, 3)},
     83 	{`ab$`, "abcab", build(1, 3, 5)},
     84 	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
     85 	{`(?:.|(?:.a))`, "", nil},
     86 	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
     87 	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
     88 	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
     89 	{`(?-s)(?:(?:^).)`, "\n", nil},
     90 	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
     91 	{`(?:(?:^).)`, "\n", nil},
     92 	{`\b`, "x", build(2, 0, 0, 1, 1)},
     93 	{`\b`, "xx", build(2, 0, 0, 2, 2)},
     94 	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
     95 	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
     96 	{`\B`, "x", nil},
     97 	{`\B`, "xx", build(1, 1, 1)},
     98 	{`\B`, "x y", nil},
     99 	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
    100 
    101 	// RE2 tests
    102 	{`[^\S\s]`, "abcd", nil},
    103 	{`[^\S[:space:]]`, "abcd", nil},
    104 	{`[^\D\d]`, "abcd", nil},
    105 	{`[^\D[:digit:]]`, "abcd", nil},
    106 	{`(?i)\W`, "x", nil},
    107 	{`(?i)\W`, "k", nil},
    108 	{`(?i)\W`, "s", nil},
    109 
    110 	// can backslash-escape any punctuation
    111 	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
    112 		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
    113 	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
    114 		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
    115 	{"\\`", "`", build(1, 0, 1)},
    116 	{"[\\`]+", "`", build(1, 0, 1)},
    117 
    118 	// long set of matches (longer than startSize)
    119 	{
    120 		".",
    121 		"qwertyuiopasdfghjklzxcvbnm1234567890",
    122 		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
    123 			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
    124 			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
    125 			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
    126 	},
    127 }
    128 
    129 // build is a helper to construct a [][]int by extracting n sequences from x.
    130 // This represents n matches with len(x)/n submatches each.
    131 func build(n int, x ...int) [][]int {
    132 	ret := make([][]int, n)
    133 	runLength := len(x) / n
    134 	j := 0
    135 	for i := range ret {
    136 		ret[i] = make([]int, runLength)
    137 		copy(ret[i], x[j:])
    138 		j += runLength
    139 		if j > len(x) {
    140 			panic("invalid build entry")
    141 		}
    142 	}
    143 	return ret
    144 }
    145 
    146 // First the simple cases.
    147 
    148 func TestFind(t *testing.T) {
    149 	for _, test := range findTests {
    150 		re := MustCompile(test.pat)
    151 		if re.String() != test.pat {
    152 			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
    153 		}
    154 		result := re.Find([]byte(test.text))
    155 		switch {
    156 		case len(test.matches) == 0 && len(result) == 0:
    157 			// ok
    158 		case test.matches == nil && result != nil:
    159 			t.Errorf("expected no match; got one: %s", test)
    160 		case test.matches != nil && result == nil:
    161 			t.Errorf("expected match; got none: %s", test)
    162 		case test.matches != nil && result != nil:
    163 			expect := test.text[test.matches[0][0]:test.matches[0][1]]
    164 			if expect != string(result) {
    165 				t.Errorf("expected %q got %q: %s", expect, result, test)
    166 			}
    167 		}
    168 	}
    169 }
    170 
    171 func TestFindString(t *testing.T) {
    172 	for _, test := range findTests {
    173 		result := MustCompile(test.pat).FindString(test.text)
    174 		switch {
    175 		case len(test.matches) == 0 && len(result) == 0:
    176 			// ok
    177 		case test.matches == nil && result != "":
    178 			t.Errorf("expected no match; got one: %s", test)
    179 		case test.matches != nil && result == "":
    180 			// Tricky because an empty result has two meanings: no match or empty match.
    181 			if test.matches[0][0] != test.matches[0][1] {
    182 				t.Errorf("expected match; got none: %s", test)
    183 			}
    184 		case test.matches != nil && result != "":
    185 			expect := test.text[test.matches[0][0]:test.matches[0][1]]
    186 			if expect != result {
    187 				t.Errorf("expected %q got %q: %s", expect, result, test)
    188 			}
    189 		}
    190 	}
    191 }
    192 
    193 func testFindIndex(test *FindTest, result []int, t *testing.T) {
    194 	switch {
    195 	case len(test.matches) == 0 && len(result) == 0:
    196 		// ok
    197 	case test.matches == nil && result != nil:
    198 		t.Errorf("expected no match; got one: %s", test)
    199 	case test.matches != nil && result == nil:
    200 		t.Errorf("expected match; got none: %s", test)
    201 	case test.matches != nil && result != nil:
    202 		expect := test.matches[0]
    203 		if expect[0] != result[0] || expect[1] != result[1] {
    204 			t.Errorf("expected %v got %v: %s", expect, result, test)
    205 		}
    206 	}
    207 }
    208 
    209 func TestFindIndex(t *testing.T) {
    210 	for _, test := range findTests {
    211 		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
    212 	}
    213 }
    214 
    215 func TestFindStringIndex(t *testing.T) {
    216 	for _, test := range findTests {
    217 		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
    218 	}
    219 }
    220 
    221 func TestFindReaderIndex(t *testing.T) {
    222 	for _, test := range findTests {
    223 		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
    224 	}
    225 }
    226 
    227 // Now come the simple All cases.
    228 
    229 func TestFindAll(t *testing.T) {
    230 	for _, test := range findTests {
    231 		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
    232 		switch {
    233 		case test.matches == nil && result == nil:
    234 			// ok
    235 		case test.matches == nil && result != nil:
    236 			t.Errorf("expected no match; got one: %s", test)
    237 		case test.matches != nil && result == nil:
    238 			t.Fatalf("expected match; got none: %s", test)
    239 		case test.matches != nil && result != nil:
    240 			if len(test.matches) != len(result) {
    241 				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
    242 				continue
    243 			}
    244 			for k, e := range test.matches {
    245 				expect := test.text[e[0]:e[1]]
    246 				if expect != string(result[k]) {
    247 					t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
    248 				}
    249 			}
    250 		}
    251 	}
    252 }
    253 
    254 func TestFindAllString(t *testing.T) {
    255 	for _, test := range findTests {
    256 		result := MustCompile(test.pat).FindAllString(test.text, -1)
    257 		switch {
    258 		case test.matches == nil && result == nil:
    259 			// ok
    260 		case test.matches == nil && result != nil:
    261 			t.Errorf("expected no match; got one: %s", test)
    262 		case test.matches != nil && result == nil:
    263 			t.Errorf("expected match; got none: %s", test)
    264 		case test.matches != nil && result != nil:
    265 			if len(test.matches) != len(result) {
    266 				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
    267 				continue
    268 			}
    269 			for k, e := range test.matches {
    270 				expect := test.text[e[0]:e[1]]
    271 				if expect != result[k] {
    272 					t.Errorf("expected %q got %q: %s", expect, result, test)
    273 				}
    274 			}
    275 		}
    276 	}
    277 }
    278 
    279 func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
    280 	switch {
    281 	case test.matches == nil && result == nil:
    282 		// ok
    283 	case test.matches == nil && result != nil:
    284 		t.Errorf("expected no match; got one: %s", test)
    285 	case test.matches != nil && result == nil:
    286 		t.Errorf("expected match; got none: %s", test)
    287 	case test.matches != nil && result != nil:
    288 		if len(test.matches) != len(result) {
    289 			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
    290 			return
    291 		}
    292 		for k, e := range test.matches {
    293 			if e[0] != result[k][0] || e[1] != result[k][1] {
    294 				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
    295 			}
    296 		}
    297 	}
    298 }
    299 
    300 func TestFindAllIndex(t *testing.T) {
    301 	for _, test := range findTests {
    302 		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
    303 	}
    304 }
    305 
    306 func TestFindAllStringIndex(t *testing.T) {
    307 	for _, test := range findTests {
    308 		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
    309 	}
    310 }
    311 
    312 // Now come the Submatch cases.
    313 
    314 func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
    315 	if len(submatches) != len(result)*2 {
    316 		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
    317 		return
    318 	}
    319 	for k := 0; k < len(submatches); k += 2 {
    320 		if submatches[k] == -1 {
    321 			if result[k/2] != nil {
    322 				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
    323 			}
    324 			continue
    325 		}
    326 		expect := test.text[submatches[k]:submatches[k+1]]
    327 		if expect != string(result[k/2]) {
    328 			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
    329 			return
    330 		}
    331 	}
    332 }
    333 
    334 func TestFindSubmatch(t *testing.T) {
    335 	for _, test := range findTests {
    336 		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
    337 		switch {
    338 		case test.matches == nil && result == nil:
    339 			// ok
    340 		case test.matches == nil && result != nil:
    341 			t.Errorf("expected no match; got one: %s", test)
    342 		case test.matches != nil && result == nil:
    343 			t.Errorf("expected match; got none: %s", test)
    344 		case test.matches != nil && result != nil:
    345 			testSubmatchBytes(&test, 0, test.matches[0], result, t)
    346 		}
    347 	}
    348 }
    349 
    350 func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
    351 	if len(submatches) != len(result)*2 {
    352 		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
    353 		return
    354 	}
    355 	for k := 0; k < len(submatches); k += 2 {
    356 		if submatches[k] == -1 {
    357 			if result[k/2] != "" {
    358 				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
    359 			}
    360 			continue
    361 		}
    362 		expect := test.text[submatches[k]:submatches[k+1]]
    363 		if expect != result[k/2] {
    364 			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
    365 			return
    366 		}
    367 	}
    368 }
    369 
    370 func TestFindStringSubmatch(t *testing.T) {
    371 	for _, test := range findTests {
    372 		result := MustCompile(test.pat).FindStringSubmatch(test.text)
    373 		switch {
    374 		case test.matches == nil && result == nil:
    375 			// ok
    376 		case test.matches == nil && result != nil:
    377 			t.Errorf("expected no match; got one: %s", test)
    378 		case test.matches != nil && result == nil:
    379 			t.Errorf("expected match; got none: %s", test)
    380 		case test.matches != nil && result != nil:
    381 			testSubmatchString(&test, 0, test.matches[0], result, t)
    382 		}
    383 	}
    384 }
    385 
    386 func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
    387 	if len(expect) != len(result) {
    388 		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
    389 		return
    390 	}
    391 	for k, e := range expect {
    392 		if e != result[k] {
    393 			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
    394 		}
    395 	}
    396 }
    397 
    398 func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
    399 	switch {
    400 	case test.matches == nil && result == nil:
    401 		// ok
    402 	case test.matches == nil && result != nil:
    403 		t.Errorf("expected no match; got one: %s", test)
    404 	case test.matches != nil && result == nil:
    405 		t.Errorf("expected match; got none: %s", test)
    406 	case test.matches != nil && result != nil:
    407 		testSubmatchIndices(test, 0, test.matches[0], result, t)
    408 	}
    409 }
    410 
    411 func TestFindSubmatchIndex(t *testing.T) {
    412 	for _, test := range findTests {
    413 		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
    414 	}
    415 }
    416 
    417 func TestFindStringSubmatchIndex(t *testing.T) {
    418 	for _, test := range findTests {
    419 		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
    420 	}
    421 }
    422 
    423 func TestFindReaderSubmatchIndex(t *testing.T) {
    424 	for _, test := range findTests {
    425 		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
    426 	}
    427 }
    428 
    429 // Now come the monster AllSubmatch cases.
    430 
    431 func TestFindAllSubmatch(t *testing.T) {
    432 	for _, test := range findTests {
    433 		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
    434 		switch {
    435 		case test.matches == nil && result == nil:
    436 			// ok
    437 		case test.matches == nil && result != nil:
    438 			t.Errorf("expected no match; got one: %s", test)
    439 		case test.matches != nil && result == nil:
    440 			t.Errorf("expected match; got none: %s", test)
    441 		case len(test.matches) != len(result):
    442 			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
    443 		case test.matches != nil && result != nil:
    444 			for k, match := range test.matches {
    445 				testSubmatchBytes(&test, k, match, result[k], t)
    446 			}
    447 		}
    448 	}
    449 }
    450 
    451 func TestFindAllStringSubmatch(t *testing.T) {
    452 	for _, test := range findTests {
    453 		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
    454 		switch {
    455 		case test.matches == nil && result == nil:
    456 			// ok
    457 		case test.matches == nil && result != nil:
    458 			t.Errorf("expected no match; got one: %s", test)
    459 		case test.matches != nil && result == nil:
    460 			t.Errorf("expected match; got none: %s", test)
    461 		case len(test.matches) != len(result):
    462 			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
    463 		case test.matches != nil && result != nil:
    464 			for k, match := range test.matches {
    465 				testSubmatchString(&test, k, match, result[k], t)
    466 			}
    467 		}
    468 	}
    469 }
    470 
    471 func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
    472 	switch {
    473 	case test.matches == nil && result == nil:
    474 		// ok
    475 	case test.matches == nil && result != nil:
    476 		t.Errorf("expected no match; got one: %s", test)
    477 	case test.matches != nil && result == nil:
    478 		t.Errorf("expected match; got none: %s", test)
    479 	case len(test.matches) != len(result):
    480 		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
    481 	case test.matches != nil && result != nil:
    482 		for k, match := range test.matches {
    483 			testSubmatchIndices(test, k, match, result[k], t)
    484 		}
    485 	}
    486 }
    487 
    488 func TestFindAllSubmatchIndex(t *testing.T) {
    489 	for _, test := range findTests {
    490 		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
    491 	}
    492 }
    493 
    494 func TestFindAllStringSubmatchIndex(t *testing.T) {
    495 	for _, test := range findTests {
    496 		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
    497 	}
    498 }
    499