1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import "testing" 8 9 var simplifyTests = []struct { 10 Regexp string 11 Simple string 12 }{ 13 // Already-simple constructs 14 {`a`, `a`}, 15 {`ab`, `ab`}, 16 {`a|b`, `[a-b]`}, 17 {`ab|cd`, `ab|cd`}, 18 {`(ab)*`, `(ab)*`}, 19 {`(ab)+`, `(ab)+`}, 20 {`(ab)?`, `(ab)?`}, 21 {`.`, `(?s:.)`}, 22 {`^`, `^`}, 23 {`$`, `$`}, 24 {`[ac]`, `[ac]`}, 25 {`[^ac]`, `[^ac]`}, 26 27 // Posix character classes 28 {`[[:alnum:]]`, `[0-9A-Za-z]`}, 29 {`[[:alpha:]]`, `[A-Za-z]`}, 30 {`[[:blank:]]`, `[\t ]`}, 31 {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`}, 32 {`[[:digit:]]`, `[0-9]`}, 33 {`[[:graph:]]`, `[!-~]`}, 34 {`[[:lower:]]`, `[a-z]`}, 35 {`[[:print:]]`, `[ -~]`}, 36 {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"}, 37 {`[[:space:]]`, `[\t-\r ]`}, 38 {`[[:upper:]]`, `[A-Z]`}, 39 {`[[:xdigit:]]`, `[0-9A-Fa-f]`}, 40 41 // Perl character classes 42 {`\d`, `[0-9]`}, 43 {`\s`, `[\t-\n\f-\r ]`}, 44 {`\w`, `[0-9A-Z_a-z]`}, 45 {`\D`, `[^0-9]`}, 46 {`\S`, `[^\t-\n\f-\r ]`}, 47 {`\W`, `[^0-9A-Z_a-z]`}, 48 {`[\d]`, `[0-9]`}, 49 {`[\s]`, `[\t-\n\f-\r ]`}, 50 {`[\w]`, `[0-9A-Z_a-z]`}, 51 {`[\D]`, `[^0-9]`}, 52 {`[\S]`, `[^\t-\n\f-\r ]`}, 53 {`[\W]`, `[^0-9A-Z_a-z]`}, 54 55 // Posix repetitions 56 {`a{1}`, `a`}, 57 {`a{2}`, `aa`}, 58 {`a{5}`, `aaaaa`}, 59 {`a{0,1}`, `a?`}, 60 // The next three are illegible because Simplify inserts (?:) 61 // parens instead of () parens to avoid creating extra 62 // captured subexpressions. The comments show a version with fewer parens. 63 {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)? 64 {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)? 65 {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? 66 {`a{0,2}`, `(?:aa?)?`}, // (aa?)? 67 {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)? 68 {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)? 69 {`a{0,}`, `a*`}, 70 {`a{1,}`, `a+`}, 71 {`a{2,}`, `aa+`}, 72 {`a{5,}`, `aaaaa+`}, 73 74 // Test that operators simplify their arguments. 75 {`(?:a{1,}){1,}`, `a+`}, 76 {`(a{1,}b{1,})`, `(a+b+)`}, 77 {`a{1,}|b{1,}`, `a+|b+`}, 78 {`(?:a{1,})*`, `(?:a+)*`}, 79 {`(?:a{1,})+`, `a+`}, 80 {`(?:a{1,})?`, `(?:a+)?`}, 81 {``, `(?:)`}, 82 {`a{0}`, `(?:)`}, 83 84 // Character class simplification 85 {`[ab]`, `[a-b]`}, 86 {`[a-za-za-z]`, `[a-z]`}, 87 {`[A-Za-zA-Za-z]`, `[A-Za-z]`}, 88 {`[ABCDEFGH]`, `[A-H]`}, 89 {`[AB-CD-EF-GH]`, `[A-H]`}, 90 {`[W-ZP-XE-R]`, `[E-Z]`}, 91 {`[a-ee-gg-m]`, `[a-m]`}, 92 {`[a-ea-ha-m]`, `[a-m]`}, 93 {`[a-ma-ha-e]`, `[a-m]`}, 94 {`[a-zA-Z0-9 -~]`, `[ -~]`}, 95 96 // Empty character classes 97 {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`}, 98 99 // Full character classes 100 {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`}, 101 102 // Unicode case folding. 103 {`(?i)A`, `(?i:A)`}, 104 {`(?i)a`, `(?i:A)`}, 105 {`(?i)[A]`, `(?i:A)`}, 106 {`(?i)[a]`, `(?i:A)`}, 107 {`(?i)K`, `(?i:K)`}, 108 {`(?i)k`, `(?i:K)`}, 109 {`(?i)\x{212a}`, "(?i:K)"}, 110 {`(?i)[K]`, "[Kk\u212A]"}, 111 {`(?i)[k]`, "[Kk\u212A]"}, 112 {`(?i)[\x{212a}]`, "[Kk\u212A]"}, 113 {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"}, 114 {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"}, 115 {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`}, 116 117 // Empty string as a regular expression. 118 // The empty string must be preserved inside parens in order 119 // to make submatches work right, so these tests are less 120 // interesting than they might otherwise be. String inserts 121 // explicit (?:) in place of non-parenthesized empty strings, 122 // to make them easier to spot for other parsers. 123 {`(a|b|)`, `([a-b]|(?:))`}, 124 {`(|)`, `()`}, 125 {`a()`, `a()`}, 126 {`(()|())`, `(()|())`}, 127 {`(a|)`, `(a|(?:))`}, 128 {`ab()cd()`, `ab()cd()`}, 129 {`()`, `()`}, 130 {`()*`, `()*`}, 131 {`()+`, `()+`}, 132 {`()?`, `()?`}, 133 {`(){0}`, `(?:)`}, 134 {`(){1}`, `()`}, 135 {`(){1,}`, `()+`}, 136 {`(){0,2}`, `(?:()()?)?`}, 137 } 138 139 func TestSimplify(t *testing.T) { 140 for _, tt := range simplifyTests { 141 re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine) 142 if err != nil { 143 t.Errorf("Parse(%#q) = error %v", tt.Regexp, err) 144 continue 145 } 146 s := re.Simplify().String() 147 if s != tt.Simple { 148 t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple) 149 } 150 } 151 } 152