1 #!/usr/bin/env python 2 # -*- mode: python -*- 3 4 # Re test suite and benchmark suite v1.5 5 6 # The 3 possible outcomes for each pattern 7 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3) 8 9 # Benchmark suite (needs expansion) 10 # 11 # The benchmark suite does not test correctness, just speed. The 12 # first element of each tuple is the regex pattern; the second is a 13 # string to match it against. The benchmarking code will embed the 14 # second string inside several sizes of padding, to test how regex 15 # matching performs on large strings. 16 17 benchmarks = [ 18 19 # test common prefix 20 ('Python|Perl', 'Perl'), # Alternation 21 ('(Python|Perl)', 'Perl'), # Grouped alternation 22 23 ('Python|Perl|Tcl', 'Perl'), # Alternation 24 ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation 25 26 ('(Python)\\1', 'PythonPython'), # Backreference 27 ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization 28 ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets 29 30 ('Python', 'Python'), # Simple text literal 31 ('.*Python', 'Python'), # Bad text literal 32 ('.*Python.*', 'Python'), # Worse text literal 33 ('.*(Python)', 'Python'), # Bad text literal with grouping 34 35 ] 36 37 # Test suite (for verifying correctness) 38 # 39 # The test suite is a list of 5- or 3-tuples. The 5 parts of a 40 # complete tuple are: 41 # element 0: a string containing the pattern 42 # 1: the string to match against the pattern 43 # 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR) 44 # 3: a string that will be eval()'ed to produce a test string. 45 # This is an arbitrary Python expression; the available 46 # variables are "found" (the whole match), and "g1", "g2", ... 47 # up to "g99" contain the contents of each group, or the 48 # string 'None' if the group wasn't given a value, or the 49 # string 'Error' if the group index was out of range; 50 # also "groups", the return value of m.group() (a tuple). 51 # 4: The expected result of evaluating the expression. 52 # If the two don't match, an error is reported. 53 # 54 # If the regex isn't expected to work, the latter two elements can be omitted. 55 56 tests = [ 57 # Test ?P< and ?P= extensions 58 ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier 59 ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit 60 ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char 61 ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char 62 63 # Same tests, for the ?P= form 64 ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR), 65 ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR), 66 ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR), 67 ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group 68 69 ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'), 70 ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'), 71 72 # Test octal escapes 73 ('\\1', 'a', SYNTAX_ERROR), # Backreference 74 ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character 75 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), 76 ('\\141', 'a', SUCCEED, 'found', 'a'), 77 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), 78 79 # Test \0 is handled everywhere 80 (r'\0', '\0', SUCCEED, 'found', '\0'), 81 (r'[\0a]', '\0', SUCCEED, 'found', '\0'), 82 (r'[a\0]', '\0', SUCCEED, 'found', '\0'), 83 (r'[^a\0]', '\0', FAIL), 84 85 # Test various letter escapes 86 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), 87 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), 88 # NOTE: not an error under PCRE/PRE: 89 # (r'\u', '', SYNTAX_ERROR), # A Perl escape 90 (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'), 91 (r'\xff', '\377', SUCCEED, 'found', chr(255)), 92 # new \x semantics 93 (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)), 94 (r'\x00f', '\017', FAIL, 'found', chr(15)), 95 (r'\x00fe', '\376', FAIL, 'found', chr(254)), 96 # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)), 97 # (r'\x00f', '\017', SUCCEED, 'found', chr(15)), 98 # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)), 99 100 (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", 101 SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"), 102 103 # Test that . only matches \n in DOTALL mode 104 ('a.b', 'acb', SUCCEED, 'found', 'acb'), 105 ('a.b', 'a\nb', FAIL), 106 ('a.*b', 'acc\nccb', FAIL), 107 ('a.{4,5}b', 'acc\nccb', FAIL), 108 ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'), 109 ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'), 110 ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), 111 ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), 112 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), 113 114 (')', '', SYNTAX_ERROR), # Unmatched right bracket 115 ('', '', SUCCEED, 'found', ''), # Empty pattern 116 ('abc', 'abc', SUCCEED, 'found', 'abc'), 117 ('abc', 'xbc', FAIL), 118 ('abc', 'axc', FAIL), 119 ('abc', 'abx', FAIL), 120 ('abc', 'xabcy', SUCCEED, 'found', 'abc'), 121 ('abc', 'ababc', SUCCEED, 'found', 'abc'), 122 ('ab*c', 'abc', SUCCEED, 'found', 'abc'), 123 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), 124 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), 125 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 126 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), 127 ('ab+bc', 'abc', FAIL), 128 ('ab+bc', 'abq', FAIL), 129 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 130 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), 131 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), 132 ('ab?bc', 'abbbbc', FAIL), 133 ('ab?c', 'abc', SUCCEED, 'found', 'abc'), 134 ('^abc$', 'abc', SUCCEED, 'found', 'abc'), 135 ('^abc$', 'abcc', FAIL), 136 ('^abc', 'abcc', SUCCEED, 'found', 'abc'), 137 ('^abc$', 'aabc', FAIL), 138 ('abc$', 'aabc', SUCCEED, 'found', 'abc'), 139 ('^', 'abc', SUCCEED, 'found+"-"', '-'), 140 ('$', 'abc', SUCCEED, 'found+"-"', '-'), 141 ('a.c', 'abc', SUCCEED, 'found', 'abc'), 142 ('a.c', 'axc', SUCCEED, 'found', 'axc'), 143 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), 144 ('a.*c', 'axyzd', FAIL), 145 ('a[bc]d', 'abc', FAIL), 146 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), 147 ('a[b-d]e', 'abd', FAIL), 148 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), 149 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), 150 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), 151 ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'), 152 # NOTE: not an error under PCRE/PRE: 153 # ('a[b-]', 'a-', SYNTAX_ERROR), 154 ('a[]b', '-', SYNTAX_ERROR), 155 ('a[', '-', SYNTAX_ERROR), 156 ('a\\', '-', SYNTAX_ERROR), 157 ('abc)', '-', SYNTAX_ERROR), 158 ('(abc', '-', SYNTAX_ERROR), 159 ('a]', 'a]', SUCCEED, 'found', 'a]'), 160 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), 161 ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'), 162 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), 163 ('a[^bc]d', 'abd', FAIL), 164 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), 165 ('a[^-b]c', 'a-c', FAIL), 166 ('a[^]b]c', 'a]c', FAIL), 167 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), 168 ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'), 169 ('\\ba\\b', '-a', SUCCEED, '"-"', '-'), 170 ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'), 171 ('\\by\\b', 'xy', FAIL), 172 ('\\by\\b', 'yz', FAIL), 173 ('\\by\\b', 'xyz', FAIL), 174 ('x\\b', 'xyz', FAIL), 175 ('x\\B', 'xyz', SUCCEED, '"-"', '-'), 176 ('\\Bz', 'xyz', SUCCEED, '"-"', '-'), 177 ('z\\B', 'xyz', FAIL), 178 ('\\Bx', 'xyz', FAIL), 179 ('\\Ba\\B', 'a-', FAIL, '"-"', '-'), 180 ('\\Ba\\B', '-a', FAIL, '"-"', '-'), 181 ('\\Ba\\B', '-a-', FAIL, '"-"', '-'), 182 ('\\By\\B', 'xy', FAIL), 183 ('\\By\\B', 'yz', FAIL), 184 ('\\By\\b', 'xy', SUCCEED, '"-"', '-'), 185 ('\\by\\B', 'yz', SUCCEED, '"-"', '-'), 186 ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'), 187 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), 188 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), 189 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), 190 ('$b', 'b', FAIL), 191 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), 192 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), 193 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), 194 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), 195 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), 196 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), 197 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), 198 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 199 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 200 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 201 (')(', '-', SYNTAX_ERROR), 202 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), 203 ('abc', '', FAIL), 204 ('a*', '', SUCCEED, 'found', ''), 205 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), 206 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), 207 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), 208 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), 209 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), 210 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), 211 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), 212 ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'), 213 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), 214 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), 215 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), 216 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), 217 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 218 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 219 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 220 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), 221 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 222 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), 223 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 224 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), 225 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), 226 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 227 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 228 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), 229 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), 230 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 231 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), 232 ('multiple words of text', 'uh-uh', FAIL), 233 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), 234 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 235 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), 236 ('[k]', 'ab', FAIL), 237 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), 238 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 239 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 240 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), 241 ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'), 242 ('^(a+).\\1$', 'aaaa', FAIL), 243 ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 244 ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 245 ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 246 ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 247 ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 248 ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'), 249 ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'), 250 ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 251 ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 252 ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 253 ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 254 ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'), 255 ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'), 256 ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'), 257 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'), 258 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'), 259 ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'), 260 ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'), 261 ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'), 262 ('([abc]*)x', 'abc', FAIL), 263 ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'), 264 ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'), 265 266 # Test symbolic groups 267 268 ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR), 269 ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'), 270 ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'), 271 ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR), 272 273 # Test octal escapes/memory references 274 275 ('\\1', 'a', SYNTAX_ERROR), 276 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), 277 ('\\141', 'a', SUCCEED, 'found', 'a'), 278 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), 279 280 # All tests from Perl 281 282 ('abc', 'abc', SUCCEED, 'found', 'abc'), 283 ('abc', 'xbc', FAIL), 284 ('abc', 'axc', FAIL), 285 ('abc', 'abx', FAIL), 286 ('abc', 'xabcy', SUCCEED, 'found', 'abc'), 287 ('abc', 'ababc', SUCCEED, 'found', 'abc'), 288 ('ab*c', 'abc', SUCCEED, 'found', 'abc'), 289 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), 290 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), 291 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 292 ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 293 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), 294 ('ab+bc', 'abc', FAIL), 295 ('ab+bc', 'abq', FAIL), 296 ('ab{1,}bc', 'abq', FAIL), 297 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 298 ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 299 ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 300 ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 301 ('ab{4,5}bc', 'abbbbc', FAIL), 302 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), 303 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), 304 ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'), 305 ('ab?bc', 'abbbbc', FAIL), 306 ('ab?c', 'abc', SUCCEED, 'found', 'abc'), 307 ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'), 308 ('^abc$', 'abc', SUCCEED, 'found', 'abc'), 309 ('^abc$', 'abcc', FAIL), 310 ('^abc', 'abcc', SUCCEED, 'found', 'abc'), 311 ('^abc$', 'aabc', FAIL), 312 ('abc$', 'aabc', SUCCEED, 'found', 'abc'), 313 ('^', 'abc', SUCCEED, 'found', ''), 314 ('$', 'abc', SUCCEED, 'found', ''), 315 ('a.c', 'abc', SUCCEED, 'found', 'abc'), 316 ('a.c', 'axc', SUCCEED, 'found', 'axc'), 317 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), 318 ('a.*c', 'axyzd', FAIL), 319 ('a[bc]d', 'abc', FAIL), 320 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), 321 ('a[b-d]e', 'abd', FAIL), 322 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), 323 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), 324 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), 325 ('a[b-]', 'a-', SUCCEED, 'found', 'a-'), 326 ('a[b-a]', '-', SYNTAX_ERROR), 327 ('a[]b', '-', SYNTAX_ERROR), 328 ('a[', '-', SYNTAX_ERROR), 329 ('a]', 'a]', SUCCEED, 'found', 'a]'), 330 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), 331 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), 332 ('a[^bc]d', 'abd', FAIL), 333 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), 334 ('a[^-b]c', 'a-c', FAIL), 335 ('a[^]b]c', 'a]c', FAIL), 336 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), 337 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), 338 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), 339 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), 340 ('*a', '-', SYNTAX_ERROR), 341 ('(*)b', '-', SYNTAX_ERROR), 342 ('$b', 'b', FAIL), 343 ('a\\', '-', SYNTAX_ERROR), 344 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), 345 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), 346 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), 347 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), 348 ('abc)', '-', SYNTAX_ERROR), 349 ('(abc', '-', SYNTAX_ERROR), 350 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), 351 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), 352 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), 353 ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'), 354 ('a**', '-', SYNTAX_ERROR), 355 ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'), 356 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 357 ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 358 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 359 ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 360 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 361 ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 362 (')(', '-', SYNTAX_ERROR), 363 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), 364 ('abc', '', FAIL), 365 ('a*', '', SUCCEED, 'found', ''), 366 ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'), 367 ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'), 368 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), 369 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), 370 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), 371 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), 372 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), 373 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), 374 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), 375 ('^(ab|cd)e', 'abcde', FAIL), 376 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), 377 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), 378 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), 379 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), 380 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 381 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 382 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 383 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), 384 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 385 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), 386 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 387 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), 388 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), 389 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 390 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 391 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), 392 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), 393 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 394 ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'), 395 ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'), 396 # Python does not have the same rules for \\41 so this is a syntax error 397 # ('((((((((((a))))))))))\\41', 'aa', FAIL), 398 # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), 399 ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR), 400 ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR), 401 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), 402 ('multiple words of text', 'uh-uh', FAIL), 403 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), 404 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 405 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), 406 ('[k]', 'ab', FAIL), 407 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), 408 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 409 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 410 ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'), 411 ('(?i)abc', 'XBC', FAIL), 412 ('(?i)abc', 'AXC', FAIL), 413 ('(?i)abc', 'ABX', FAIL), 414 ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'), 415 ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'), 416 ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'), 417 ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'), 418 ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 419 ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 420 ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 421 ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 422 ('(?i)ab+bc', 'ABC', FAIL), 423 ('(?i)ab+bc', 'ABQ', FAIL), 424 ('(?i)ab{1,}bc', 'ABQ', FAIL), 425 ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 426 ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 427 ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 428 ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 429 ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL), 430 ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 431 ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'), 432 ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'), 433 ('(?i)ab??bc', 'ABBBBC', FAIL), 434 ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'), 435 ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'), 436 ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'), 437 ('(?i)^abc$', 'ABCC', FAIL), 438 ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'), 439 ('(?i)^abc$', 'AABC', FAIL), 440 ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'), 441 ('(?i)^', 'ABC', SUCCEED, 'found', ''), 442 ('(?i)$', 'ABC', SUCCEED, 'found', ''), 443 ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'), 444 ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'), 445 ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'), 446 ('(?i)a.*c', 'AXYZD', FAIL), 447 ('(?i)a[bc]d', 'ABC', FAIL), 448 ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'), 449 ('(?i)a[b-d]e', 'ABD', FAIL), 450 ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'), 451 ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'), 452 ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'), 453 ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'), 454 ('(?i)a[b-a]', '-', SYNTAX_ERROR), 455 ('(?i)a[]b', '-', SYNTAX_ERROR), 456 ('(?i)a[', '-', SYNTAX_ERROR), 457 ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'), 458 ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'), 459 ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'), 460 ('(?i)a[^bc]d', 'ABD', FAIL), 461 ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'), 462 ('(?i)a[^-b]c', 'A-C', FAIL), 463 ('(?i)a[^]b]c', 'A]C', FAIL), 464 ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'), 465 ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'), 466 ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'), 467 ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'), 468 ('(?i)*a', '-', SYNTAX_ERROR), 469 ('(?i)(*)b', '-', SYNTAX_ERROR), 470 ('(?i)$b', 'B', FAIL), 471 ('(?i)a\\', '-', SYNTAX_ERROR), 472 ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'), 473 ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'), 474 ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'), 475 ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'), 476 ('(?i)abc)', '-', SYNTAX_ERROR), 477 ('(?i)(abc', '-', SYNTAX_ERROR), 478 ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'), 479 ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'), 480 ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'), 481 ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'), 482 ('(?i)a**', '-', SYNTAX_ERROR), 483 ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 484 ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 485 ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 486 ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 487 ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 488 ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 489 ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 490 ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), 491 ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), 492 ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'), 493 ('(?i))(', '-', SYNTAX_ERROR), 494 ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'), 495 ('(?i)abc', '', FAIL), 496 ('(?i)a*', '', SUCCEED, 'found', ''), 497 ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'), 498 ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'), 499 ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'), 500 ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'), 501 ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'), 502 ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'), 503 ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'), 504 ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'), 505 ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'), 506 ('(?i)^(ab|cd)e', 'ABCDE', FAIL), 507 ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'), 508 ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'), 509 ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'), 510 ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'), 511 ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'), 512 ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'), 513 ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'), 514 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'), 515 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL), 516 ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'), 517 ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'), 518 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'), 519 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'), 520 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), 521 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'), 522 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL), 523 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL), 524 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), 525 ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'), 526 ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'), 527 #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), 528 #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), 529 ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'), 530 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'), 531 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'), 532 ('(?i)multiple words of text', 'UH-UH', FAIL), 533 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'), 534 ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'), 535 ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'), 536 ('(?i)[k]', 'AB', FAIL), 537 # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'), 538 # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'), 539 ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'), 540 ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), 541 ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), 542 ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'), 543 ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'), 544 ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'), 545 ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'), 546 ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'), 547 ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'), 548 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'), 549 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), 550 551 # lookbehind: split by : but not if it is escaped by -. 552 ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ), 553 # escaping with \ as we know it 554 ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), 555 # terminating with ' and escaping with ? as in edifact 556 ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ), 557 558 # Comments using the (?#...) syntax 559 560 ('w(?# comment', 'w', SYNTAX_ERROR), 561 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'), 562 563 # Check odd placement of embedded pattern modifiers 564 565 # not an error under PCRE/PRE: 566 ('w(?i)', 'W', SUCCEED, 'found', 'W'), 567 # ('w(?i)', 'W', SYNTAX_ERROR), 568 569 # Comments using the x embedded pattern modifier 570 571 ("""(?x)w# comment 1 572 x y 573 # comment 2 574 z""", 'wxyz', SUCCEED, 'found', 'wxyz'), 575 576 # using the m embedded pattern modifier 577 578 ('^abc', """jkl 579 abc 580 xyz""", FAIL), 581 ('(?m)^abc', """jkl 582 abc 583 xyz""", SUCCEED, 'found', 'abc'), 584 585 ('(?m)abc$', """jkl 586 xyzabc 587 123""", SUCCEED, 'found', 'abc'), 588 589 # using the s embedded pattern modifier 590 591 ('a.b', 'a\nb', FAIL), 592 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), 593 594 # test \w, etc. both inside and outside character classes 595 596 ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), 597 ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), 598 ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'), 599 ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'), 600 ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'), 601 # not an error under PCRE/PRE: 602 # ('[\\d-x]', '-', SYNTAX_ERROR), 603 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), 604 (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), 605 606 (r'\xff', '\377', SUCCEED, 'found', chr(255)), 607 # new \x semantics 608 (r'\x00ff', '\377', FAIL), 609 # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), 610 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), 611 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), 612 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)), 613 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'), 614 615 # 616 # post-1.5.2 additions 617 618 # xmllib problem 619 (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'), 620 # bug 110866: reference to undefined group 621 (r'((.)\1+)', '', SYNTAX_ERROR), 622 # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't) 623 (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'), 624 # bug 112468: various expected syntax errors 625 (r'(', '', SYNTAX_ERROR), 626 (r'[\41]', '!', SUCCEED, 'found', '!'), 627 # bug 114033: nothing to repeat 628 (r'(x?)?', 'x', SUCCEED, 'found', 'x'), 629 # bug 115040: rescan if flags are modified inside pattern 630 (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), 631 # bug 115618: negative lookahead 632 (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'), 633 # bug 116251: character class bug 634 (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'), 635 # bug 123769+127259: non-greedy backtracking bug 636 (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'), 637 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), 638 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), 639 # bug 127259: \Z shouldn't depend on multiline mode 640 (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''), 641 # bug 128899: uppercase literals under the ignorecase flag 642 (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'), 643 (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'), 644 (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'), 645 (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'), 646 # bug 130748: ^* should be an error (nothing to repeat) 647 (r'^*', '', SYNTAX_ERROR), 648 # bug 133283: minimizing repeat problem 649 (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'), 650 # bug 477728: minimizing repeat problem 651 (r'^.*?$', 'one\ntwo\nthree\n', FAIL), 652 # bug 483789: minimizing repeat problem 653 (r'a[^>]*?b', 'a>b', FAIL), 654 # bug 490573: minimizing repeat problem 655 (r'^a*?$', 'foo', FAIL), 656 # bug 470582: nested groups problem 657 (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'), 658 # another minimizing repeat problem (capturing groups in assertions) 659 ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 660 ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 661 ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 662 ] 663 664 try: 665 u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'") 666 except SyntaxError: 667 pass 668 else: 669 tests.extend([ 670 # bug 410271: \b broken under locales 671 (r'\b.\b', 'a', SUCCEED, 'found', 'a'), 672 (r'(?u)\b.\b', u, SUCCEED, 'found', u), 673 (r'(?u)\w', u, SUCCEED, 'found', u), 674 ]) 675