1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package regexp 6 7 import ( 8 "fmt" 9 "strings" 10 "testing" 11 ) 12 13 // For each pattern/text pair, what is the expected output of each function? 14 // We can derive the textual results from the indexed results, the non-submatch 15 // results from the submatched results, the single results from the 'all' results, 16 // and the byte results from the string results. Therefore the table includes 17 // only the FindAllStringSubmatchIndex result. 18 type FindTest struct { 19 pat string 20 text string 21 matches [][]int 22 } 23 24 func (t FindTest) String() string { 25 return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) 26 } 27 28 var findTests = []FindTest{ 29 {``, ``, build(1, 0, 0)}, 30 {`^abcdefg`, "abcdefg", build(1, 0, 7)}, 31 {`a+`, "baaab", build(1, 1, 4)}, 32 {"abcd..", "abcdef", build(1, 0, 6)}, 33 {`a`, "a", build(1, 0, 1)}, 34 {`x`, "y", nil}, 35 {`b`, "abc", build(1, 1, 2)}, 36 {`.`, "a", build(1, 0, 1)}, 37 {`.*`, "abcdef", build(1, 0, 6)}, 38 {`^`, "abcde", build(1, 0, 0)}, 39 {`$`, "abcde", build(1, 5, 5)}, 40 {`^abcd$`, "abcd", build(1, 0, 4)}, 41 {`^bcd'`, "abcdef", nil}, 42 {`^abcd$`, "abcde", nil}, 43 {`a+`, "baaab", build(1, 1, 4)}, 44 {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)}, 45 {`[a-z]+`, "abcd", build(1, 0, 4)}, 46 {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, 47 {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, 48 {`[^\n]+`, "abcd\n", build(1, 0, 4)}, 49 {`[]+`, "", build(1, 0, 18)}, 50 {`+`, "", build(1, 0, 9)}, 51 {`+`, "", build(1, 0, 18)}, 52 {`()`, "", build(1, 0, 0, 0, 0)}, 53 {`(a)`, "a", build(1, 0, 1, 0, 1)}, 54 {`(.)(.)`, "a", build(1, 0, 4, 0, 3, 3, 4)}, 55 {`(.*)`, "", build(1, 0, 0, 0, 0)}, 56 {`(.*)`, "abcd", build(1, 0, 4, 0, 4)}, 57 {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, 58 {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, 59 {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, 60 {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, 61 {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)}, 62 {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)}, 63 64 {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)}, 65 {`(.*).*`, "ab", build(1, 0, 2, 0, 2)}, 66 {`[.]`, ".", build(1, 0, 1)}, 67 {`/$`, "/abc/", build(1, 4, 5)}, 68 {`/$`, "/abc", nil}, 69 70 // multiple matches 71 {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, 72 {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, 73 {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, 74 {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, 75 {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, 76 77 // fixed bugs 78 {`ab$`, "cab", build(1, 1, 3)}, 79 {`axxb$`, "axxcb", nil}, 80 {`data`, "daXY data", build(1, 5, 9)}, 81 {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, 82 {`zx+`, "zzx", build(1, 1, 3)}, 83 {`ab$`, "abcab", build(1, 3, 5)}, 84 {`(aa)*$`, "a", build(1, 1, 1, -1, -1)}, 85 {`(?:.|(?:.a))`, "", nil}, 86 {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)}, 87 {`(?:A|(?:A|a))`, "a", build(1, 0, 1)}, 88 {`(a){0}`, "", build(1, 0, 0, -1, -1)}, 89 {`(?-s)(?:(?:^).)`, "\n", nil}, 90 {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)}, 91 {`(?:(?:^).)`, "\n", nil}, 92 {`\b`, "x", build(2, 0, 0, 1, 1)}, 93 {`\b`, "xx", build(2, 0, 0, 2, 2)}, 94 {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)}, 95 {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)}, 96 {`\B`, "x", nil}, 97 {`\B`, "xx", build(1, 1, 1)}, 98 {`\B`, "x y", nil}, 99 {`\B`, "xx yy", build(2, 1, 1, 4, 4)}, 100 101 // RE2 tests 102 {`[^\S\s]`, "abcd", nil}, 103 {`[^\S[:space:]]`, "abcd", nil}, 104 {`[^\D\d]`, "abcd", nil}, 105 {`[^\D[:digit:]]`, "abcd", nil}, 106 {`(?i)\W`, "x", nil}, 107 {`(?i)\W`, "k", nil}, 108 {`(?i)\W`, "s", nil}, 109 110 // can backslash-escape any punctuation 111 {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, 112 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, 113 {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, 114 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, 115 {"\\`", "`", build(1, 0, 1)}, 116 {"[\\`]+", "`", build(1, 0, 1)}, 117 118 // long set of matches (longer than startSize) 119 { 120 ".", 121 "qwertyuiopasdfghjklzxcvbnm1234567890", 122 build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 123 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 124 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 125 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), 126 }, 127 } 128 129 // build is a helper to construct a [][]int by extracting n sequences from x. 130 // This represents n matches with len(x)/n submatches each. 131 func build(n int, x ...int) [][]int { 132 ret := make([][]int, n) 133 runLength := len(x) / n 134 j := 0 135 for i := range ret { 136 ret[i] = make([]int, runLength) 137 copy(ret[i], x[j:]) 138 j += runLength 139 if j > len(x) { 140 panic("invalid build entry") 141 } 142 } 143 return ret 144 } 145 146 // First the simple cases. 147 148 func TestFind(t *testing.T) { 149 for _, test := range findTests { 150 re := MustCompile(test.pat) 151 if re.String() != test.pat { 152 t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) 153 } 154 result := re.Find([]byte(test.text)) 155 switch { 156 case len(test.matches) == 0 && len(result) == 0: 157 // ok 158 case test.matches == nil && result != nil: 159 t.Errorf("expected no match; got one: %s", test) 160 case test.matches != nil && result == nil: 161 t.Errorf("expected match; got none: %s", test) 162 case test.matches != nil && result != nil: 163 expect := test.text[test.matches[0][0]:test.matches[0][1]] 164 if expect != string(result) { 165 t.Errorf("expected %q got %q: %s", expect, result, test) 166 } 167 } 168 } 169 } 170 171 func TestFindString(t *testing.T) { 172 for _, test := range findTests { 173 result := MustCompile(test.pat).FindString(test.text) 174 switch { 175 case len(test.matches) == 0 && len(result) == 0: 176 // ok 177 case test.matches == nil && result != "": 178 t.Errorf("expected no match; got one: %s", test) 179 case test.matches != nil && result == "": 180 // Tricky because an empty result has two meanings: no match or empty match. 181 if test.matches[0][0] != test.matches[0][1] { 182 t.Errorf("expected match; got none: %s", test) 183 } 184 case test.matches != nil && result != "": 185 expect := test.text[test.matches[0][0]:test.matches[0][1]] 186 if expect != result { 187 t.Errorf("expected %q got %q: %s", expect, result, test) 188 } 189 } 190 } 191 } 192 193 func testFindIndex(test *FindTest, result []int, t *testing.T) { 194 switch { 195 case len(test.matches) == 0 && len(result) == 0: 196 // ok 197 case test.matches == nil && result != nil: 198 t.Errorf("expected no match; got one: %s", test) 199 case test.matches != nil && result == nil: 200 t.Errorf("expected match; got none: %s", test) 201 case test.matches != nil && result != nil: 202 expect := test.matches[0] 203 if expect[0] != result[0] || expect[1] != result[1] { 204 t.Errorf("expected %v got %v: %s", expect, result, test) 205 } 206 } 207 } 208 209 func TestFindIndex(t *testing.T) { 210 for _, test := range findTests { 211 testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) 212 } 213 } 214 215 func TestFindStringIndex(t *testing.T) { 216 for _, test := range findTests { 217 testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) 218 } 219 } 220 221 func TestFindReaderIndex(t *testing.T) { 222 for _, test := range findTests { 223 testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) 224 } 225 } 226 227 // Now come the simple All cases. 228 229 func TestFindAll(t *testing.T) { 230 for _, test := range findTests { 231 result := MustCompile(test.pat).FindAll([]byte(test.text), -1) 232 switch { 233 case test.matches == nil && result == nil: 234 // ok 235 case test.matches == nil && result != nil: 236 t.Errorf("expected no match; got one: %s", test) 237 case test.matches != nil && result == nil: 238 t.Fatalf("expected match; got none: %s", test) 239 case test.matches != nil && result != nil: 240 if len(test.matches) != len(result) { 241 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 242 continue 243 } 244 for k, e := range test.matches { 245 expect := test.text[e[0]:e[1]] 246 if expect != string(result[k]) { 247 t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test) 248 } 249 } 250 } 251 } 252 } 253 254 func TestFindAllString(t *testing.T) { 255 for _, test := range findTests { 256 result := MustCompile(test.pat).FindAllString(test.text, -1) 257 switch { 258 case test.matches == nil && result == nil: 259 // ok 260 case test.matches == nil && result != nil: 261 t.Errorf("expected no match; got one: %s", test) 262 case test.matches != nil && result == nil: 263 t.Errorf("expected match; got none: %s", test) 264 case test.matches != nil && result != nil: 265 if len(test.matches) != len(result) { 266 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 267 continue 268 } 269 for k, e := range test.matches { 270 expect := test.text[e[0]:e[1]] 271 if expect != result[k] { 272 t.Errorf("expected %q got %q: %s", expect, result, test) 273 } 274 } 275 } 276 } 277 } 278 279 func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { 280 switch { 281 case test.matches == nil && result == nil: 282 // ok 283 case test.matches == nil && result != nil: 284 t.Errorf("expected no match; got one: %s", test) 285 case test.matches != nil && result == nil: 286 t.Errorf("expected match; got none: %s", test) 287 case test.matches != nil && result != nil: 288 if len(test.matches) != len(result) { 289 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 290 return 291 } 292 for k, e := range test.matches { 293 if e[0] != result[k][0] || e[1] != result[k][1] { 294 t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) 295 } 296 } 297 } 298 } 299 300 func TestFindAllIndex(t *testing.T) { 301 for _, test := range findTests { 302 testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) 303 } 304 } 305 306 func TestFindAllStringIndex(t *testing.T) { 307 for _, test := range findTests { 308 testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) 309 } 310 } 311 312 // Now come the Submatch cases. 313 314 func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { 315 if len(submatches) != len(result)*2 { 316 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) 317 return 318 } 319 for k := 0; k < len(submatches); k += 2 { 320 if submatches[k] == -1 { 321 if result[k/2] != nil { 322 t.Errorf("match %d: expected nil got %q: %s", n, result, test) 323 } 324 continue 325 } 326 expect := test.text[submatches[k]:submatches[k+1]] 327 if expect != string(result[k/2]) { 328 t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) 329 return 330 } 331 } 332 } 333 334 func TestFindSubmatch(t *testing.T) { 335 for _, test := range findTests { 336 result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) 337 switch { 338 case test.matches == nil && result == nil: 339 // ok 340 case test.matches == nil && result != nil: 341 t.Errorf("expected no match; got one: %s", test) 342 case test.matches != nil && result == nil: 343 t.Errorf("expected match; got none: %s", test) 344 case test.matches != nil && result != nil: 345 testSubmatchBytes(&test, 0, test.matches[0], result, t) 346 } 347 } 348 } 349 350 func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { 351 if len(submatches) != len(result)*2 { 352 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) 353 return 354 } 355 for k := 0; k < len(submatches); k += 2 { 356 if submatches[k] == -1 { 357 if result[k/2] != "" { 358 t.Errorf("match %d: expected nil got %q: %s", n, result, test) 359 } 360 continue 361 } 362 expect := test.text[submatches[k]:submatches[k+1]] 363 if expect != result[k/2] { 364 t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) 365 return 366 } 367 } 368 } 369 370 func TestFindStringSubmatch(t *testing.T) { 371 for _, test := range findTests { 372 result := MustCompile(test.pat).FindStringSubmatch(test.text) 373 switch { 374 case test.matches == nil && result == nil: 375 // ok 376 case test.matches == nil && result != nil: 377 t.Errorf("expected no match; got one: %s", test) 378 case test.matches != nil && result == nil: 379 t.Errorf("expected match; got none: %s", test) 380 case test.matches != nil && result != nil: 381 testSubmatchString(&test, 0, test.matches[0], result, t) 382 } 383 } 384 } 385 386 func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { 387 if len(expect) != len(result) { 388 t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) 389 return 390 } 391 for k, e := range expect { 392 if e != result[k] { 393 t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) 394 } 395 } 396 } 397 398 func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { 399 switch { 400 case test.matches == nil && result == nil: 401 // ok 402 case test.matches == nil && result != nil: 403 t.Errorf("expected no match; got one: %s", test) 404 case test.matches != nil && result == nil: 405 t.Errorf("expected match; got none: %s", test) 406 case test.matches != nil && result != nil: 407 testSubmatchIndices(test, 0, test.matches[0], result, t) 408 } 409 } 410 411 func TestFindSubmatchIndex(t *testing.T) { 412 for _, test := range findTests { 413 testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) 414 } 415 } 416 417 func TestFindStringSubmatchIndex(t *testing.T) { 418 for _, test := range findTests { 419 testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) 420 } 421 } 422 423 func TestFindReaderSubmatchIndex(t *testing.T) { 424 for _, test := range findTests { 425 testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) 426 } 427 } 428 429 // Now come the monster AllSubmatch cases. 430 431 func TestFindAllSubmatch(t *testing.T) { 432 for _, test := range findTests { 433 result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) 434 switch { 435 case test.matches == nil && result == nil: 436 // ok 437 case test.matches == nil && result != nil: 438 t.Errorf("expected no match; got one: %s", test) 439 case test.matches != nil && result == nil: 440 t.Errorf("expected match; got none: %s", test) 441 case len(test.matches) != len(result): 442 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 443 case test.matches != nil && result != nil: 444 for k, match := range test.matches { 445 testSubmatchBytes(&test, k, match, result[k], t) 446 } 447 } 448 } 449 } 450 451 func TestFindAllStringSubmatch(t *testing.T) { 452 for _, test := range findTests { 453 result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) 454 switch { 455 case test.matches == nil && result == nil: 456 // ok 457 case test.matches == nil && result != nil: 458 t.Errorf("expected no match; got one: %s", test) 459 case test.matches != nil && result == nil: 460 t.Errorf("expected match; got none: %s", test) 461 case len(test.matches) != len(result): 462 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 463 case test.matches != nil && result != nil: 464 for k, match := range test.matches { 465 testSubmatchString(&test, k, match, result[k], t) 466 } 467 } 468 } 469 } 470 471 func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { 472 switch { 473 case test.matches == nil && result == nil: 474 // ok 475 case test.matches == nil && result != nil: 476 t.Errorf("expected no match; got one: %s", test) 477 case test.matches != nil && result == nil: 478 t.Errorf("expected match; got none: %s", test) 479 case len(test.matches) != len(result): 480 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 481 case test.matches != nil && result != nil: 482 for k, match := range test.matches { 483 testSubmatchIndices(test, k, match, result[k], t) 484 } 485 } 486 } 487 488 func TestFindAllSubmatchIndex(t *testing.T) { 489 for _, test := range findTests { 490 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) 491 } 492 } 493 494 func TestFindAllStringSubmatchIndex(t *testing.T) { 495 for _, test := range findTests { 496 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) 497 } 498 } 499