Home | History | Annotate | Download | only in test
      1 from test.test_support import verbose, run_unittest, import_module
      2 from test.test_support import precisionbigmemtest, _2G, cpython_only
      3 import re
      4 from re import Scanner
      5 import sre_constants
      6 import sys
      7 import string
      8 import traceback
      9 from weakref import proxy
     10 
     11 
     12 # Misc tests from Tim Peters' re.doc
     13 
     14 # WARNING: Don't change details in these tests if you don't know
     15 # what you're doing. Some of these tests were carefully modeled to
     16 # cover most of the code.
     17 
     18 import unittest
     19 
     20 class ReTests(unittest.TestCase):
     21 
     22     def test_weakref(self):
     23         s = 'QabbbcR'
     24         x = re.compile('ab+c')
     25         y = proxy(x)
     26         self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
     27 
     28     def test_search_star_plus(self):
     29         self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
     30         self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
     31         self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
     32         self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
     33         self.assertEqual(re.search('x', 'aaa'), None)
     34         self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
     35         self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
     36         self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
     37         self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
     38         self.assertEqual(re.match('a+', 'xxx'), None)
     39 
     40     def bump_num(self, matchobj):
     41         int_value = int(matchobj.group(0))
     42         return str(int_value + 1)
     43 
     44     def test_basic_re_sub(self):
     45         self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
     46         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
     47                          '9.3 -3 24x100y')
     48         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
     49                          '9.3 -3 23x99y')
     50 
     51         self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
     52         self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
     53 
     54         s = r"\1\1"
     55         self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
     56         self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
     57         self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
     58 
     59         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
     60         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
     61         self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
     62         self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
     63 
     64         self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
     65                          '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
     66         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
     67         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
     68                          (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
     69 
     70         self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
     71 
     72     def test_bug_449964(self):
     73         # fails for group followed by other escape
     74         self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
     75                          'xx\bxx\b')
     76 
     77     def test_bug_449000(self):
     78         # Test for sub() on escaped characters
     79         self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
     80                          'abc\ndef\n')
     81         self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
     82                          'abc\ndef\n')
     83         self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
     84                          'abc\ndef\n')
     85         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
     86                          'abc\ndef\n')
     87 
     88     def test_bug_1140(self):
     89         # re.sub(x, y, u'') should return u'', not '', and
     90         # re.sub(x, y, '') should return '', not u''.
     91         # Also:
     92         # re.sub(x, y, unicode(x)) should return unicode(y), and
     93         # re.sub(x, y, str(x)) should return
     94         #     str(y) if isinstance(y, str) else unicode(y).
     95         for x in 'x', u'x':
     96             for y in 'y', u'y':
     97                 z = re.sub(x, y, u'')
     98                 self.assertEqual(z, u'')
     99                 self.assertEqual(type(z), unicode)
    100                 #
    101                 z = re.sub(x, y, '')
    102                 self.assertEqual(z, '')
    103                 self.assertEqual(type(z), str)
    104                 #
    105                 z = re.sub(x, y, unicode(x))
    106                 self.assertEqual(z, y)
    107                 self.assertEqual(type(z), unicode)
    108                 #
    109                 z = re.sub(x, y, str(x))
    110                 self.assertEqual(z, y)
    111                 self.assertEqual(type(z), type(y))
    112 
    113     def test_bug_1661(self):
    114         # Verify that flags do not get silently ignored with compiled patterns
    115         pattern = re.compile('.')
    116         self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
    117         self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
    118         self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
    119         self.assertRaises(ValueError, re.compile, pattern, re.I)
    120 
    121     def test_bug_3629(self):
    122         # A regex that triggered a bug in the sre-code validator
    123         re.compile("(?P<quote>)(?(quote))")
    124 
    125     def test_sub_template_numeric_escape(self):
    126         # bug 776311 and friends
    127         self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
    128         self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
    129         self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
    130         self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
    131         self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
    132         self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
    133         self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
    134 
    135         self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
    136         self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
    137 
    138         self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
    139         self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
    140         self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
    141         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
    142         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
    143 
    144         self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
    145         self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
    146 
    147         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
    148         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
    149         self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
    150         self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
    151         self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
    152         self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
    153         self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
    154         self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
    155         self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
    156         self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
    157         self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
    158         self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
    159 
    160         # in python2.3 (etc), these loop endlessly in sre_parser.py
    161         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
    162         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
    163                          'xz8')
    164         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
    165                          'xza')
    166 
    167     def test_qualified_re_sub(self):
    168         self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
    169         self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
    170 
    171     def test_bug_114660(self):
    172         self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
    173                          'hello there')
    174 
    175     def test_bug_462270(self):
    176         # Test for empty sub() behaviour, see SF bug #462270
    177         self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
    178         self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
    179 
    180     def test_symbolic_groups(self):
    181         re.compile('(?P<a>x)(?P=a)(?(a)y)')
    182         re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
    183         self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
    184         self.assertRaises(re.error, re.compile, '(?Px)')
    185         self.assertRaises(re.error, re.compile, '(?P=)')
    186         self.assertRaises(re.error, re.compile, '(?P=1)')
    187         self.assertRaises(re.error, re.compile, '(?P=a)')
    188         self.assertRaises(re.error, re.compile, '(?P=a1)')
    189         self.assertRaises(re.error, re.compile, '(?P=a.)')
    190         self.assertRaises(re.error, re.compile, '(?P<)')
    191         self.assertRaises(re.error, re.compile, '(?P<>)')
    192         self.assertRaises(re.error, re.compile, '(?P<1>)')
    193         self.assertRaises(re.error, re.compile, '(?P<a.>)')
    194         self.assertRaises(re.error, re.compile, '(?())')
    195         self.assertRaises(re.error, re.compile, '(?(a))')
    196         self.assertRaises(re.error, re.compile, '(?(1a))')
    197         self.assertRaises(re.error, re.compile, '(?(a.))')
    198 
    199     def test_symbolic_refs(self):
    200         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
    201         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
    202         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
    203         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
    204         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
    205         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
    206         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
    207         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
    208         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
    209         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
    210 
    211     def test_re_subn(self):
    212         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
    213         self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
    214         self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
    215         self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
    216         self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
    217 
    218     def test_re_split(self):
    219         self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
    220         self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
    221         self.assertEqual(re.split("(:*)", ":a:b::c"),
    222                          ['', ':', 'a', ':', 'b', '::', 'c'])
    223         self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
    224         self.assertEqual(re.split("(:)*", ":a:b::c"),
    225                          ['', ':', 'a', ':', 'b', ':', 'c'])
    226         self.assertEqual(re.split("([b:]+)", ":a:b::c"),
    227                          ['', ':', 'a', ':b::', 'c'])
    228         self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
    229                          ['', None, ':', 'a', None, ':', '', 'b', None, '',
    230                           None, '::', 'c'])
    231         self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
    232                          ['', 'a', '', '', 'c'])
    233 
    234     def test_qualified_re_split(self):
    235         self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
    236         self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
    237         self.assertEqual(re.split("(:)", ":a:b::c", 2),
    238                          ['', ':', 'a', ':', 'b::c'])
    239         self.assertEqual(re.split("(:*)", ":a:b::c", 2),
    240                          ['', ':', 'a', ':', 'b::c'])
    241 
    242     def test_re_findall(self):
    243         self.assertEqual(re.findall(":+", "abc"), [])
    244         self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
    245         self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
    246         self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
    247                                                                (":", ":"),
    248                                                                (":", "::")])
    249 
    250     def test_bug_117612(self):
    251         self.assertEqual(re.findall(r"(a|(b))", "aba"),
    252                          [("a", ""),("b", "b"),("a", "")])
    253 
    254     def test_re_match(self):
    255         self.assertEqual(re.match('a', 'a').groups(), ())
    256         self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
    257         self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
    258         self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
    259         self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
    260 
    261         pat = re.compile('((a)|(b))(c)?')
    262         self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
    263         self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
    264         self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
    265         self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
    266         self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
    267 
    268         # A single group
    269         m = re.match('(a)', 'a')
    270         self.assertEqual(m.group(0), 'a')
    271         self.assertEqual(m.group(0), 'a')
    272         self.assertEqual(m.group(1), 'a')
    273         self.assertEqual(m.group(1, 1), ('a', 'a'))
    274 
    275         pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
    276         self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
    277         self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
    278                          (None, 'b', None))
    279         self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
    280 
    281     def test_re_groupref_exists(self):
    282         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
    283                          ('(', 'a'))
    284         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
    285                          (None, 'a'))
    286         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
    287         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
    288         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
    289                          ('a', 'b'))
    290         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
    291                          (None, 'd'))
    292         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
    293                          (None, 'd'))
    294         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
    295                          ('a', ''))
    296 
    297         # Tests for bug #1177831: exercise groups other than the first group
    298         p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
    299         self.assertEqual(p.match('abc').groups(),
    300                          ('a', 'b', 'c'))
    301         self.assertEqual(p.match('ad').groups(),
    302                          ('a', None, 'd'))
    303         self.assertEqual(p.match('abd'), None)
    304         self.assertEqual(p.match('ac'), None)
    305 
    306 
    307     def test_re_groupref(self):
    308         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
    309                          ('|', 'a'))
    310         self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
    311                          (None, 'a'))
    312         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
    313         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
    314         self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
    315                          ('a', 'a'))
    316         self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
    317                          (None, None))
    318 
    319     def test_groupdict(self):
    320         self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
    321                                   'first second').groupdict(),
    322                          {'first':'first', 'second':'second'})
    323 
    324     def test_expand(self):
    325         self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
    326                                   "first second")
    327                                   .expand(r"\2 \1 \g<second> \g<first>"),
    328                          "second first second first")
    329 
    330     def test_repeat_minmax(self):
    331         self.assertEqual(re.match("^(\w){1}$", "abc"), None)
    332         self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
    333         self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
    334         self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
    335 
    336         self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
    337         self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
    338         self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
    339         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
    340         self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
    341         self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
    342         self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
    343         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
    344 
    345         self.assertEqual(re.match("^x{1}$", "xxx"), None)
    346         self.assertEqual(re.match("^x{1}?$", "xxx"), None)
    347         self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
    348         self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
    349 
    350         self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
    351         self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
    352         self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
    353         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
    354         self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
    355         self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
    356         self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
    357         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
    358 
    359         self.assertEqual(re.match("^x{}$", "xxx"), None)
    360         self.assertNotEqual(re.match("^x{}$", "x{}"), None)
    361 
    362     def test_getattr(self):
    363         self.assertEqual(re.match("(a)", "a").pos, 0)
    364         self.assertEqual(re.match("(a)", "a").endpos, 1)
    365         self.assertEqual(re.match("(a)", "a").string, "a")
    366         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
    367         self.assertNotEqual(re.match("(a)", "a").re, None)
    368 
    369     def test_special_escapes(self):
    370         self.assertEqual(re.search(r"\b(b.)\b",
    371                                    "abcd abc bcd bx").group(1), "bx")
    372         self.assertEqual(re.search(r"\B(b.)\B",
    373                                    "abc bcd bc abxd").group(1), "bx")
    374         self.assertEqual(re.search(r"\b(b.)\b",
    375                                    "abcd abc bcd bx", re.LOCALE).group(1), "bx")
    376         self.assertEqual(re.search(r"\B(b.)\B",
    377                                    "abc bcd bc abxd", re.LOCALE).group(1), "bx")
    378         self.assertEqual(re.search(r"\b(b.)\b",
    379                                    "abcd abc bcd bx", re.UNICODE).group(1), "bx")
    380         self.assertEqual(re.search(r"\B(b.)\B",
    381                                    "abc bcd bc abxd", re.UNICODE).group(1), "bx")
    382         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
    383         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
    384         self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
    385         self.assertEqual(re.search(r"\b(b.)\b",
    386                                    u"abcd abc bcd bx").group(1), "bx")
    387         self.assertEqual(re.search(r"\B(b.)\B",
    388                                    u"abc bcd bc abxd").group(1), "bx")
    389         self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
    390         self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
    391         self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
    392         self.assertEqual(re.search(r"\d\D\w\W\s\S",
    393                                    "1aa! a").group(0), "1aa! a")
    394         self.assertEqual(re.search(r"\d\D\w\W\s\S",
    395                                    "1aa! a", re.LOCALE).group(0), "1aa! a")
    396         self.assertEqual(re.search(r"\d\D\w\W\s\S",
    397                                    "1aa! a", re.UNICODE).group(0), "1aa! a")
    398 
    399     def test_string_boundaries(self):
    400         # See http://bugs.python.org/issue10713
    401         self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
    402                          "abc")
    403         # There's a word boundary at the start of a string.
    404         self.assertTrue(re.match(r"\b", "abc"))
    405         # A non-empty string includes a non-boundary zero-length match.
    406         self.assertTrue(re.search(r"\B", "abc"))
    407         # There is no non-boundary match at the start of a string.
    408         self.assertFalse(re.match(r"\B", "abc"))
    409         # However, an empty string contains no word boundaries, and also no
    410         # non-boundaries.
    411         self.assertEqual(re.search(r"\B", ""), None)
    412         # This one is questionable and different from the perlre behaviour,
    413         # but describes current behavior.
    414         self.assertEqual(re.search(r"\b", ""), None)
    415         # A single word-character string has two boundaries, but no
    416         # non-boundary gaps.
    417         self.assertEqual(len(re.findall(r"\b", "a")), 2)
    418         self.assertEqual(len(re.findall(r"\B", "a")), 0)
    419         # If there are no words, there are no boundaries
    420         self.assertEqual(len(re.findall(r"\b", " ")), 0)
    421         self.assertEqual(len(re.findall(r"\b", "   ")), 0)
    422         # Can match around the whitespace.
    423         self.assertEqual(len(re.findall(r"\B", " ")), 2)
    424 
    425     def test_bigcharset(self):
    426         self.assertEqual(re.match(u"([\u2222\u2223])",
    427                                   u"\u2222").group(1), u"\u2222")
    428         self.assertEqual(re.match(u"([\u2222\u2223])",
    429                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
    430 
    431     def test_big_codesize(self):
    432         # Issue #1160
    433         r = re.compile('|'.join(('%d'%x for x in range(10000))))
    434         self.assertIsNotNone(r.match('1000'))
    435         self.assertIsNotNone(r.match('9999'))
    436 
    437     def test_anyall(self):
    438         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
    439                          "a\nb")
    440         self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
    441                          "a\n\nb")
    442 
    443     def test_non_consuming(self):
    444         self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
    445         self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
    446         self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
    447         self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
    448         self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
    449         self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
    450         self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
    451 
    452         self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
    453         self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
    454         self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
    455         self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
    456 
    457     def test_ignore_case(self):
    458         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
    459         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
    460         self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
    461         self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
    462         self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
    463         self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
    464         self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
    465         self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
    466         self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
    467         self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
    468 
    469     def test_category(self):
    470         self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
    471 
    472     def test_getlower(self):
    473         import _sre
    474         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
    475         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
    476         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
    477 
    478         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
    479         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
    480 
    481     def test_not_literal(self):
    482         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
    483         self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
    484 
    485     def test_search_coverage(self):
    486         self.assertEqual(re.search("\s(b)", " b").group(1), "b")
    487         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
    488 
    489     def assertMatch(self, pattern, text, match=None, span=None,
    490                     matcher=re.match):
    491         if match is None and span is None:
    492             # the pattern matches the whole text
    493             match = text
    494             span = (0, len(text))
    495         elif match is None or span is None:
    496             raise ValueError('If match is not None, span should be specified '
    497                              '(and vice versa).')
    498         m = matcher(pattern, text)
    499         self.assertTrue(m)
    500         self.assertEqual(m.group(), match)
    501         self.assertEqual(m.span(), span)
    502 
    503     def test_re_escape(self):
    504         alnum_chars = string.ascii_letters + string.digits
    505         p = u''.join(unichr(i) for i in range(256))
    506         for c in p:
    507             if c in alnum_chars:
    508                 self.assertEqual(re.escape(c), c)
    509             elif c == u'\x00':
    510                 self.assertEqual(re.escape(c), u'\\000')
    511             else:
    512                 self.assertEqual(re.escape(c), u'\\' + c)
    513             self.assertMatch(re.escape(c), c)
    514         self.assertMatch(re.escape(p), p)
    515 
    516     def test_re_escape_byte(self):
    517         alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
    518         p = ''.join(chr(i) for i in range(256))
    519         for b in p:
    520             if b in alnum_chars:
    521                 self.assertEqual(re.escape(b), b)
    522             elif b == b'\x00':
    523                 self.assertEqual(re.escape(b), b'\\000')
    524             else:
    525                 self.assertEqual(re.escape(b), b'\\' + b)
    526             self.assertMatch(re.escape(b), b)
    527         self.assertMatch(re.escape(p), p)
    528 
    529     def test_re_escape_non_ascii(self):
    530         s = u'xxx\u2620\u2620\u2620xxx'
    531         s_escaped = re.escape(s)
    532         self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
    533         self.assertMatch(s_escaped, s)
    534         self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
    535                          u'x\u2620\u2620\u2620x', (2, 7), re.search)
    536 
    537     def test_re_escape_non_ascii_bytes(self):
    538         b = u'y\u2620y\u2620y'.encode('utf-8')
    539         b_escaped = re.escape(b)
    540         self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
    541         self.assertMatch(b_escaped, b)
    542         res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
    543         self.assertEqual(len(res), 2)
    544 
    545     def test_pickling(self):
    546         import pickle
    547         self.pickle_test(pickle)
    548         import cPickle
    549         self.pickle_test(cPickle)
    550         # old pickles expect the _compile() reconstructor in sre module
    551         import_module("sre", deprecated=True)
    552         from sre import _compile
    553 
    554     def pickle_test(self, pickle):
    555         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
    556         s = pickle.dumps(oldpat)
    557         newpat = pickle.loads(s)
    558         self.assertEqual(oldpat, newpat)
    559 
    560     def test_constants(self):
    561         self.assertEqual(re.I, re.IGNORECASE)
    562         self.assertEqual(re.L, re.LOCALE)
    563         self.assertEqual(re.M, re.MULTILINE)
    564         self.assertEqual(re.S, re.DOTALL)
    565         self.assertEqual(re.X, re.VERBOSE)
    566 
    567     def test_flags(self):
    568         for flag in [re.I, re.M, re.X, re.S, re.L]:
    569             self.assertNotEqual(re.compile('^pattern$', flag), None)
    570 
    571     def test_sre_character_literals(self):
    572         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
    573             self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
    574             self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
    575             self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
    576             self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
    577             self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
    578             self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
    579         self.assertRaises(re.error, re.match, "\911", "")
    580 
    581     def test_sre_character_class_literals(self):
    582         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
    583             self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
    584             self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
    585             self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
    586             self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
    587             self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
    588             self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
    589         self.assertRaises(re.error, re.match, "[\911]", "")
    590 
    591     def test_bug_113254(self):
    592         self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
    593         self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
    594         self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
    595 
    596     def test_bug_527371(self):
    597         # bug described in patches 527371/672491
    598         self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
    599         self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
    600         self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
    601         self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
    602         self.assertEqual(re.match("((a))", "a").lastindex, 1)
    603 
    604     def test_bug_545855(self):
    605         # bug 545855 -- This pattern failed to cause a compile error as it
    606         # should, instead provoking a TypeError.
    607         self.assertRaises(re.error, re.compile, 'foo[a-')
    608 
    609     def test_bug_418626(self):
    610         # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
    611         # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
    612         # pattern '*?' on a long string.
    613         self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
    614         self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
    615                          20003)
    616         self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
    617         # non-simple '*?' still used to hit the recursion limit, before the
    618         # non-recursive scheme was implemented.
    619         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
    620 
    621     def test_bug_612074(self):
    622         pat=u"["+re.escape(u"\u2039")+u"]"
    623         self.assertEqual(re.compile(pat) and 1, 1)
    624 
    625     def test_stack_overflow(self):
    626         # nasty cases that used to overflow the straightforward recursive
    627         # implementation of repeated groups.
    628         self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
    629         self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
    630         self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
    631 
    632     def test_unlimited_zero_width_repeat(self):
    633         # Issue #9669
    634         self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
    635         self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
    636         self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
    637         self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
    638         self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
    639         self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
    640 
    641     def test_scanner(self):
    642         def s_ident(scanner, token): return token
    643         def s_operator(scanner, token): return "op%s" % token
    644         def s_float(scanner, token): return float(token)
    645         def s_int(scanner, token): return int(token)
    646 
    647         scanner = Scanner([
    648             (r"[a-zA-Z_]\w*", s_ident),
    649             (r"\d+\.\d*", s_float),
    650             (r"\d+", s_int),
    651             (r"=|\+|-|\*|/", s_operator),
    652             (r"\s+", None),
    653             ])
    654 
    655         self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
    656 
    657         self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
    658                          (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
    659                            'op+', 'bar'], ''))
    660 
    661     def test_bug_448951(self):
    662         # bug 448951 (similar to 429357, but with single char match)
    663         # (Also test greedy matches.)
    664         for op in '','?','*':
    665             self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
    666                              (None, None))
    667             self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
    668                              ('a:', 'a'))
    669 
    670     def test_bug_725106(self):
    671         # capturing groups in alternatives in repeats
    672         self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
    673                          ('b', 'a'))
    674         self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
    675                          ('c', 'b'))
    676         self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
    677                          ('b', None))
    678         self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
    679                          ('b', None))
    680         self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
    681                          ('b', 'a'))
    682         self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
    683                          ('c', 'b'))
    684         self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
    685                          ('b', None))
    686         self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
    687                          ('b', None))
    688 
    689     def test_bug_725149(self):
    690         # mark_stack_base restoring before restoring marks
    691         self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
    692                          ('a', None))
    693         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
    694                          ('a', None, None))
    695 
    696     def test_bug_764548(self):
    697         # bug 764548, re.compile() barfs on str/unicode subclasses
    698         try:
    699             unicode
    700         except NameError:
    701             return  # no problem if we have no unicode
    702         class my_unicode(unicode): pass
    703         pat = re.compile(my_unicode("abc"))
    704         self.assertEqual(pat.match("xyz"), None)
    705 
    706     def test_finditer(self):
    707         iter = re.finditer(r":+", "a:b::c:::d")
    708         self.assertEqual([item.group(0) for item in iter],
    709                          [":", "::", ":::"])
    710 
    711     def test_bug_926075(self):
    712         try:
    713             unicode
    714         except NameError:
    715             return # no problem if we have no unicode
    716         self.assertTrue(re.compile('bug_926075') is not
    717                      re.compile(eval("u'bug_926075'")))
    718 
    719     def test_bug_931848(self):
    720         try:
    721             unicode
    722         except NameError:
    723             pass
    724         pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
    725         self.assertEqual(re.compile(pattern).split("a.b.c"),
    726                          ['a','b','c'])
    727 
    728     def test_bug_581080(self):
    729         iter = re.finditer(r"\s", "a b")
    730         self.assertEqual(iter.next().span(), (1,2))
    731         self.assertRaises(StopIteration, iter.next)
    732 
    733         scanner = re.compile(r"\s").scanner("a b")
    734         self.assertEqual(scanner.search().span(), (1, 2))
    735         self.assertEqual(scanner.search(), None)
    736 
    737     def test_bug_817234(self):
    738         iter = re.finditer(r".*", "asdf")
    739         self.assertEqual(iter.next().span(), (0, 4))
    740         self.assertEqual(iter.next().span(), (4, 4))
    741         self.assertRaises(StopIteration, iter.next)
    742 
    743     def test_bug_6561(self):
    744         # '\d' should match characters in Unicode category 'Nd'
    745         # (Number, Decimal Digit), but not those in 'Nl' (Number,
    746         # Letter) or 'No' (Number, Other).
    747         decimal_digits = [
    748             u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
    749             u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
    750             u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
    751             ]
    752         for x in decimal_digits:
    753             self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
    754 
    755         not_decimal_digits = [
    756             u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
    757             u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
    758             u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
    759             u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
    760             ]
    761         for x in not_decimal_digits:
    762             self.assertIsNone(re.match('^\d$', x, re.UNICODE))
    763 
    764     def test_empty_array(self):
    765         # SF buf 1647541
    766         import array
    767         for typecode in 'cbBuhHiIlLfd':
    768             a = array.array(typecode)
    769             self.assertEqual(re.compile("bla").match(a), None)
    770             self.assertEqual(re.compile("").match(a).groups(), ())
    771 
    772     def test_inline_flags(self):
    773         # Bug #1700
    774         upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
    775         lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
    776 
    777         p = re.compile(upper_char, re.I | re.U)
    778         q = p.match(lower_char)
    779         self.assertNotEqual(q, None)
    780 
    781         p = re.compile(lower_char, re.I | re.U)
    782         q = p.match(upper_char)
    783         self.assertNotEqual(q, None)
    784 
    785         p = re.compile('(?i)' + upper_char, re.U)
    786         q = p.match(lower_char)
    787         self.assertNotEqual(q, None)
    788 
    789         p = re.compile('(?i)' + lower_char, re.U)
    790         q = p.match(upper_char)
    791         self.assertNotEqual(q, None)
    792 
    793         p = re.compile('(?iu)' + upper_char)
    794         q = p.match(lower_char)
    795         self.assertNotEqual(q, None)
    796 
    797         p = re.compile('(?iu)' + lower_char)
    798         q = p.match(upper_char)
    799         self.assertNotEqual(q, None)
    800 
    801     def test_dollar_matches_twice(self):
    802         "$ matches the end of string, and just before the terminating \n"
    803         pattern = re.compile('$')
    804         self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
    805         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
    806         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
    807 
    808         pattern = re.compile('$', re.MULTILINE)
    809         self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
    810         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
    811         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
    812 
    813     def test_dealloc(self):
    814         # issue 3299: check for segfault in debug build
    815         import _sre
    816         # the overflow limit is different on wide and narrow builds and it
    817         # depends on the definition of SRE_CODE (see sre.h).
    818         # 2**128 should be big enough to overflow on both. For smaller values
    819         # a RuntimeError is raised instead of OverflowError.
    820         long_overflow = 2**128
    821         self.assertRaises(TypeError, re.finditer, "a", {})
    822         self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
    823 
    824     def test_compile(self):
    825         # Test return value when given string and pattern as parameter
    826         pattern = re.compile('random pattern')
    827         self.assertIsInstance(pattern, re._pattern_type)
    828         same_pattern = re.compile(pattern)
    829         self.assertIsInstance(same_pattern, re._pattern_type)
    830         self.assertIs(same_pattern, pattern)
    831         # Test behaviour when not given a string or pattern as parameter
    832         self.assertRaises(TypeError, re.compile, 0)
    833 
    834     def test_bug_13899(self):
    835         # Issue #13899: re pattern r"[\A]" should work like "A" but matches
    836         # nothing. Ditto B and Z.
    837         self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
    838                          ['A', 'B', '\b', 'C', 'Z'])
    839 
    840     @precisionbigmemtest(size=_2G, memuse=1)
    841     def test_large_search(self, size):
    842         # Issue #10182: indices were 32-bit-truncated.
    843         s = 'a' * size
    844         m = re.search('$', s)
    845         self.assertIsNotNone(m)
    846         self.assertEqual(m.start(), size)
    847         self.assertEqual(m.end(), size)
    848 
    849     # The huge memuse is because of re.sub() using a list and a join()
    850     # to create the replacement result.
    851     @precisionbigmemtest(size=_2G, memuse=16 + 2)
    852     def test_large_subn(self, size):
    853         # Issue #10182: indices were 32-bit-truncated.
    854         s = 'a' * size
    855         r, n = re.subn('', '', s)
    856         self.assertEqual(r, s)
    857         self.assertEqual(n, size + 1)
    858 
    859 
    860     def test_repeat_minmax_overflow(self):
    861         # Issue #13169
    862         string = "x" * 100000
    863         self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
    864         self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
    865         self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
    866         self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
    867         self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
    868         self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
    869         # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
    870         self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
    871         self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
    872         self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
    873         self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
    874 
    875     @cpython_only
    876     def test_repeat_minmax_overflow_maxrepeat(self):
    877         try:
    878             from _sre import MAXREPEAT
    879         except ImportError:
    880             self.skipTest('requires _sre.MAXREPEAT constant')
    881         string = "x" * 100000
    882         self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
    883         self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
    884                          (0, 100000))
    885         self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
    886         self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
    887         self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
    888         self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
    889 
    890     def test_backref_group_name_in_exception(self):
    891         # Issue 17341: Poor error message when compiling invalid regex
    892         with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
    893             re.compile('(?P=<foo>)')
    894 
    895     def test_group_name_in_exception(self):
    896         # Issue 17341: Poor error message when compiling invalid regex
    897         with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
    898             re.compile('(?P<?foo>)')
    899 
    900 
    901 def run_re_tests():
    902     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
    903     if verbose:
    904         print 'Running re_tests test suite'
    905     else:
    906         # To save time, only run the first and last 10 tests
    907         #tests = tests[:10] + tests[-10:]
    908         pass
    909 
    910     for t in tests:
    911         sys.stdout.flush()
    912         pattern = s = outcome = repl = expected = None
    913         if len(t) == 5:
    914             pattern, s, outcome, repl, expected = t
    915         elif len(t) == 3:
    916             pattern, s, outcome = t
    917         else:
    918             raise ValueError, ('Test tuples should have 3 or 5 fields', t)
    919 
    920         try:
    921             obj = re.compile(pattern)
    922         except re.error:
    923             if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
    924             else:
    925                 print '=== Syntax error:', t
    926         except KeyboardInterrupt: raise KeyboardInterrupt
    927         except:
    928             print '*** Unexpected error ***', t
    929             if verbose:
    930                 traceback.print_exc(file=sys.stdout)
    931         else:
    932             try:
    933                 result = obj.search(s)
    934             except re.error, msg:
    935                 print '=== Unexpected exception', t, repr(msg)
    936             if outcome == SYNTAX_ERROR:
    937                 # This should have been a syntax error; forget it.
    938                 pass
    939             elif outcome == FAIL:
    940                 if result is None: pass   # No match, as expected
    941                 else: print '=== Succeeded incorrectly', t
    942             elif outcome == SUCCEED:
    943                 if result is not None:
    944                     # Matched, as expected, so now we compute the
    945                     # result string and compare it to our expected result.
    946                     start, end = result.span(0)
    947                     vardict={'found': result.group(0),
    948                              'groups': result.group(),
    949                              'flags': result.re.flags}
    950                     for i in range(1, 100):
    951                         try:
    952                             gi = result.group(i)
    953                             # Special hack because else the string concat fails:
    954                             if gi is None:
    955                                 gi = "None"
    956                         except IndexError:
    957                             gi = "Error"
    958                         vardict['g%d' % i] = gi
    959                     for i in result.re.groupindex.keys():
    960                         try:
    961                             gi = result.group(i)
    962                             if gi is None:
    963                                 gi = "None"
    964                         except IndexError:
    965                             gi = "Error"
    966                         vardict[i] = gi
    967                     repl = eval(repl, vardict)
    968                     if repl != expected:
    969                         print '=== grouping error', t,
    970                         print repr(repl) + ' should be ' + repr(expected)
    971                 else:
    972                     print '=== Failed incorrectly', t
    973 
    974                 # Try the match on a unicode string, and check that it
    975                 # still succeeds.
    976                 try:
    977                     result = obj.search(unicode(s, "latin-1"))
    978                     if result is None:
    979                         print '=== Fails on unicode match', t
    980                 except NameError:
    981                     continue # 1.5.2
    982                 except TypeError:
    983                     continue # unicode test case
    984 
    985                 # Try the match on a unicode pattern, and check that it
    986                 # still succeeds.
    987                 obj=re.compile(unicode(pattern, "latin-1"))
    988                 result = obj.search(s)
    989                 if result is None:
    990                     print '=== Fails on unicode pattern match', t
    991 
    992                 # Try the match with the search area limited to the extent
    993                 # of the match and see if it still succeeds.  \B will
    994                 # break (because it won't match at the end or start of a
    995                 # string), so we'll ignore patterns that feature it.
    996 
    997                 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
    998                                and result is not None:
    999                     obj = re.compile(pattern)
   1000                     result = obj.search(s, result.start(0), result.end(0) + 1)
   1001                     if result is None:
   1002                         print '=== Failed on range-limited match', t
   1003 
   1004                 # Try the match with IGNORECASE enabled, and check that it
   1005                 # still succeeds.
   1006                 obj = re.compile(pattern, re.IGNORECASE)
   1007                 result = obj.search(s)
   1008                 if result is None:
   1009                     print '=== Fails on case-insensitive match', t
   1010 
   1011                 # Try the match with LOCALE enabled, and check that it
   1012                 # still succeeds.
   1013                 obj = re.compile(pattern, re.LOCALE)
   1014                 result = obj.search(s)
   1015                 if result is None:
   1016                     print '=== Fails on locale-sensitive match', t
   1017 
   1018                 # Try the match with UNICODE locale enabled, and check
   1019                 # that it still succeeds.
   1020                 obj = re.compile(pattern, re.UNICODE)
   1021                 result = obj.search(s)
   1022                 if result is None:
   1023                     print '=== Fails on unicode-sensitive match', t
   1024 
   1025 def test_main():
   1026     run_unittest(ReTests)
   1027     run_re_tests()
   1028 
   1029 if __name__ == "__main__":
   1030     test_main()
   1031