Home | History | Annotate | Download | only in test
      1 from test.test_support import verbose, run_unittest, import_module
      2 import re
      3 from re import Scanner
      4 import sys
      5 import string
      6 import traceback
      7 from weakref import proxy
      8 
      9 # Misc tests from Tim Peters' re.doc

     10 
     11 # WARNING: Don't change details in these tests if you don't know

     12 # what you're doing. Some of these tests were carefully modeled to

     13 # cover most of the code.

     14 
     15 import unittest
     16 
     17 class ReTests(unittest.TestCase):
     18 
     19     def test_weakref(self):
     20         s = 'QabbbcR'
     21         x = re.compile('ab+c')
     22         y = proxy(x)
     23         self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
     24 
     25     def test_search_star_plus(self):
     26         self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
     27         self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
     28         self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
     29         self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
     30         self.assertEqual(re.search('x', 'aaa'), None)
     31         self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
     32         self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
     33         self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
     34         self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
     35         self.assertEqual(re.match('a+', 'xxx'), None)
     36 
     37     def bump_num(self, matchobj):
     38         int_value = int(matchobj.group(0))
     39         return str(int_value + 1)
     40 
     41     def test_basic_re_sub(self):
     42         self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
     43         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
     44                          '9.3 -3 24x100y')
     45         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
     46                          '9.3 -3 23x99y')
     47 
     48         self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
     49         self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
     50 
     51         s = r"\1\1"
     52         self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
     53         self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
     54         self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
     55 
     56         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
     57         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
     58         self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
     59         self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
     60 
     61         self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
     62                          '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
     63         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
     64         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
     65                          (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
     66 
     67         self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
     68 
     69     def test_bug_449964(self):
     70         # fails for group followed by other escape

     71         self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
     72                          'xx\bxx\b')
     73 
     74     def test_bug_449000(self):
     75         # Test for sub() on escaped characters

     76         self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
     77                          'abc\ndef\n')
     78         self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
     79                          'abc\ndef\n')
     80         self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
     81                          'abc\ndef\n')
     82         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
     83                          'abc\ndef\n')
     84 
     85     def test_bug_1140(self):
     86         # re.sub(x, y, u'') should return u'', not '', and

     87         # re.sub(x, y, '') should return '', not u''.

     88         # Also:

     89         # re.sub(x, y, unicode(x)) should return unicode(y), and

     90         # re.sub(x, y, str(x)) should return

     91         #     str(y) if isinstance(y, str) else unicode(y).

     92         for x in 'x', u'x':
     93             for y in 'y', u'y':
     94                 z = re.sub(x, y, u'')
     95                 self.assertEqual(z, u'')
     96                 self.assertEqual(type(z), unicode)
     97                 #

     98                 z = re.sub(x, y, '')
     99                 self.assertEqual(z, '')
    100                 self.assertEqual(type(z), str)
    101                 #

    102                 z = re.sub(x, y, unicode(x))
    103                 self.assertEqual(z, y)
    104                 self.assertEqual(type(z), unicode)
    105                 #

    106                 z = re.sub(x, y, str(x))
    107                 self.assertEqual(z, y)
    108                 self.assertEqual(type(z), type(y))
    109 
    110     def test_bug_1661(self):
    111         # Verify that flags do not get silently ignored with compiled patterns

    112         pattern = re.compile('.')
    113         self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
    114         self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
    115         self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
    116         self.assertRaises(ValueError, re.compile, pattern, re.I)
    117 
    118     def test_bug_3629(self):
    119         # A regex that triggered a bug in the sre-code validator

    120         re.compile("(?P<quote>)(?(quote))")
    121 
    122     def test_sub_template_numeric_escape(self):
    123         # bug 776311 and friends

    124         self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
    125         self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
    126         self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
    127         self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
    128         self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
    129         self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
    130         self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
    131 
    132         self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
    133         self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
    134 
    135         self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
    136         self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
    137         self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
    138         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
    139         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
    140 
    141         self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
    142         self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
    143 
    144         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
    145         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
    146         self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
    147         self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
    148         self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
    149         self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
    150         self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
    151         self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
    152         self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'

    153         self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
    154         self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'

    155         self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'

    156 
    157         # in python2.3 (etc), these loop endlessly in sre_parser.py

    158         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
    159         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
    160                          'xz8')
    161         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
    162                          'xza')
    163 
    164     def test_qualified_re_sub(self):
    165         self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
    166         self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
    167 
    168     def test_bug_114660(self):
    169         self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
    170                          'hello there')
    171 
    172     def test_bug_462270(self):
    173         # Test for empty sub() behaviour, see SF bug #462270

    174         self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
    175         self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
    176 
    177     def test_symbolic_refs(self):
    178         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
    179         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
    180         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
    181         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
    182         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
    183         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
    184         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
    185         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
    186         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
    187 
    188     def test_re_subn(self):
    189         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
    190         self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
    191         self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
    192         self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
    193         self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
    194 
    195     def test_re_split(self):
    196         self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
    197         self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
    198         self.assertEqual(re.split("(:*)", ":a:b::c"),
    199                          ['', ':', 'a', ':', 'b', '::', 'c'])
    200         self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
    201         self.assertEqual(re.split("(:)*", ":a:b::c"),
    202                          ['', ':', 'a', ':', 'b', ':', 'c'])
    203         self.assertEqual(re.split("([b:]+)", ":a:b::c"),
    204                          ['', ':', 'a', ':b::', 'c'])
    205         self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
    206                          ['', None, ':', 'a', None, ':', '', 'b', None, '',
    207                           None, '::', 'c'])
    208         self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
    209                          ['', 'a', '', '', 'c'])
    210 
    211     def test_qualified_re_split(self):
    212         self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
    213         self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
    214         self.assertEqual(re.split("(:)", ":a:b::c", 2),
    215                          ['', ':', 'a', ':', 'b::c'])
    216         self.assertEqual(re.split("(:*)", ":a:b::c", 2),
    217                          ['', ':', 'a', ':', 'b::c'])
    218 
    219     def test_re_findall(self):
    220         self.assertEqual(re.findall(":+", "abc"), [])
    221         self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
    222         self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
    223         self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
    224                                                                (":", ":"),
    225                                                                (":", "::")])
    226 
    227     def test_bug_117612(self):
    228         self.assertEqual(re.findall(r"(a|(b))", "aba"),
    229                          [("a", ""),("b", "b"),("a", "")])
    230 
    231     def test_re_match(self):
    232         self.assertEqual(re.match('a', 'a').groups(), ())
    233         self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
    234         self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
    235         self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
    236         self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
    237 
    238         pat = re.compile('((a)|(b))(c)?')
    239         self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
    240         self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
    241         self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
    242         self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
    243         self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
    244 
    245         # A single group

    246         m = re.match('(a)', 'a')
    247         self.assertEqual(m.group(0), 'a')
    248         self.assertEqual(m.group(0), 'a')
    249         self.assertEqual(m.group(1), 'a')
    250         self.assertEqual(m.group(1, 1), ('a', 'a'))
    251 
    252         pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
    253         self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
    254         self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
    255                          (None, 'b', None))
    256         self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
    257 
    258     def test_re_groupref_exists(self):
    259         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
    260                          ('(', 'a'))
    261         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
    262                          (None, 'a'))
    263         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
    264         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
    265         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
    266                          ('a', 'b'))
    267         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
    268                          (None, 'd'))
    269         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
    270                          (None, 'd'))
    271         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
    272                          ('a', ''))
    273 
    274         # Tests for bug #1177831: exercise groups other than the first group

    275         p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
    276         self.assertEqual(p.match('abc').groups(),
    277                          ('a', 'b', 'c'))
    278         self.assertEqual(p.match('ad').groups(),
    279                          ('a', None, 'd'))
    280         self.assertEqual(p.match('abd'), None)
    281         self.assertEqual(p.match('ac'), None)
    282 
    283 
    284     def test_re_groupref(self):
    285         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
    286                          ('|', 'a'))
    287         self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
    288                          (None, 'a'))
    289         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
    290         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
    291         self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
    292                          ('a', 'a'))
    293         self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
    294                          (None, None))
    295 
    296     def test_groupdict(self):
    297         self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
    298                                   'first second').groupdict(),
    299                          {'first':'first', 'second':'second'})
    300 
    301     def test_expand(self):
    302         self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
    303                                   "first second")
    304                                   .expand(r"\2 \1 \g<second> \g<first>"),
    305                          "second first second first")
    306 
    307     def test_repeat_minmax(self):
    308         self.assertEqual(re.match("^(\w){1}$", "abc"), None)
    309         self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
    310         self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
    311         self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
    312 
    313         self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
    314         self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
    315         self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
    316         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
    317         self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
    318         self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
    319         self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
    320         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
    321 
    322         self.assertEqual(re.match("^x{1}$", "xxx"), None)
    323         self.assertEqual(re.match("^x{1}?$", "xxx"), None)
    324         self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
    325         self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
    326 
    327         self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
    328         self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
    329         self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
    330         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
    331         self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
    332         self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
    333         self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
    334         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
    335 
    336         self.assertEqual(re.match("^x{}$", "xxx"), None)
    337         self.assertNotEqual(re.match("^x{}$", "x{}"), None)
    338 
    339     def test_getattr(self):
    340         self.assertEqual(re.match("(a)", "a").pos, 0)
    341         self.assertEqual(re.match("(a)", "a").endpos, 1)
    342         self.assertEqual(re.match("(a)", "a").string, "a")
    343         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
    344         self.assertNotEqual(re.match("(a)", "a").re, None)
    345 
    346     def test_special_escapes(self):
    347         self.assertEqual(re.search(r"\b(b.)\b",
    348                                    "abcd abc bcd bx").group(1), "bx")
    349         self.assertEqual(re.search(r"\B(b.)\B",
    350                                    "abc bcd bc abxd").group(1), "bx")
    351         self.assertEqual(re.search(r"\b(b.)\b",
    352                                    "abcd abc bcd bx", re.LOCALE).group(1), "bx")
    353         self.assertEqual(re.search(r"\B(b.)\B",
    354                                    "abc bcd bc abxd", re.LOCALE).group(1), "bx")
    355         self.assertEqual(re.search(r"\b(b.)\b",
    356                                    "abcd abc bcd bx", re.UNICODE).group(1), "bx")
    357         self.assertEqual(re.search(r"\B(b.)\B",
    358                                    "abc bcd bc abxd", re.UNICODE).group(1), "bx")
    359         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
    360         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
    361         self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
    362         self.assertEqual(re.search(r"\b(b.)\b",
    363                                    u"abcd abc bcd bx").group(1), "bx")
    364         self.assertEqual(re.search(r"\B(b.)\B",
    365                                    u"abc bcd bc abxd").group(1), "bx")
    366         self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
    367         self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
    368         self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
    369         self.assertEqual(re.search(r"\d\D\w\W\s\S",
    370                                    "1aa! a").group(0), "1aa! a")
    371         self.assertEqual(re.search(r"\d\D\w\W\s\S",
    372                                    "1aa! a", re.LOCALE).group(0), "1aa! a")
    373         self.assertEqual(re.search(r"\d\D\w\W\s\S",
    374                                    "1aa! a", re.UNICODE).group(0), "1aa! a")
    375 
    376     def test_bigcharset(self):
    377         self.assertEqual(re.match(u"([\u2222\u2223])",
    378                                   u"\u2222").group(1), u"\u2222")
    379         self.assertEqual(re.match(u"([\u2222\u2223])",
    380                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
    381 
    382     def test_anyall(self):
    383         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
    384                          "a\nb")
    385         self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
    386                          "a\n\nb")
    387 
    388     def test_non_consuming(self):
    389         self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
    390         self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
    391         self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
    392         self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
    393         self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
    394         self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
    395         self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
    396 
    397         self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
    398         self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
    399         self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
    400         self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
    401 
    402     def test_ignore_case(self):
    403         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
    404         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
    405         self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
    406         self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
    407         self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
    408         self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
    409         self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
    410         self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
    411         self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
    412         self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
    413 
    414     def test_category(self):
    415         self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
    416 
    417     def test_getlower(self):
    418         import _sre
    419         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
    420         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
    421         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
    422 
    423         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
    424         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
    425 
    426     def test_not_literal(self):
    427         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
    428         self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
    429 
    430     def test_search_coverage(self):
    431         self.assertEqual(re.search("\s(b)", " b").group(1), "b")
    432         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
    433 
    434     def assertMatch(self, pattern, text, match=None, span=None,
    435                     matcher=re.match):
    436         if match is None and span is None:
    437             # the pattern matches the whole text

    438             match = text
    439             span = (0, len(text))
    440         elif match is None or span is None:
    441             raise ValueError('If match is not None, span should be specified '
    442                              '(and vice versa).')
    443         m = matcher(pattern, text)
    444         self.assertTrue(m)
    445         self.assertEqual(m.group(), match)
    446         self.assertEqual(m.span(), span)
    447 
    448     def test_re_escape(self):
    449         alnum_chars = string.ascii_letters + string.digits
    450         p = u''.join(unichr(i) for i in range(256))
    451         for c in p:
    452             if c in alnum_chars:
    453                 self.assertEqual(re.escape(c), c)
    454             elif c == u'\x00':
    455                 self.assertEqual(re.escape(c), u'\\000')
    456             else:
    457                 self.assertEqual(re.escape(c), u'\\' + c)
    458             self.assertMatch(re.escape(c), c)
    459         self.assertMatch(re.escape(p), p)
    460 
    461     def test_re_escape_byte(self):
    462         alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
    463         p = ''.join(chr(i) for i in range(256))
    464         for b in p:
    465             if b in alnum_chars:
    466                 self.assertEqual(re.escape(b), b)
    467             elif b == b'\x00':
    468                 self.assertEqual(re.escape(b), b'\\000')
    469             else:
    470                 self.assertEqual(re.escape(b), b'\\' + b)
    471             self.assertMatch(re.escape(b), b)
    472         self.assertMatch(re.escape(p), p)
    473 
    474     def test_re_escape_non_ascii(self):
    475         s = u'xxx\u2620\u2620\u2620xxx'
    476         s_escaped = re.escape(s)
    477         self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
    478         self.assertMatch(s_escaped, s)
    479         self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
    480                          u'x\u2620\u2620\u2620x', (2, 7), re.search)
    481 
    482     def test_re_escape_non_ascii_bytes(self):
    483         b = u'y\u2620y\u2620y'.encode('utf-8')
    484         b_escaped = re.escape(b)
    485         self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
    486         self.assertMatch(b_escaped, b)
    487         res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
    488         self.assertEqual(len(res), 2)
    489 
    490     def test_pickling(self):
    491         import pickle
    492         self.pickle_test(pickle)
    493         import cPickle
    494         self.pickle_test(cPickle)
    495         # old pickles expect the _compile() reconstructor in sre module
    496         import_module("sre", deprecated=True)
    497         from sre import _compile
    498 
    499     def pickle_test(self, pickle):
    500         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
    501         s = pickle.dumps(oldpat)
    502         newpat = pickle.loads(s)
    503         self.assertEqual(oldpat, newpat)
    504 
    505     def test_constants(self):
    506         self.assertEqual(re.I, re.IGNORECASE)
    507         self.assertEqual(re.L, re.LOCALE)
    508         self.assertEqual(re.M, re.MULTILINE)
    509         self.assertEqual(re.S, re.DOTALL)
    510         self.assertEqual(re.X, re.VERBOSE)
    511 
    512     def test_flags(self):
    513         for flag in [re.I, re.M, re.X, re.S, re.L]:
    514             self.assertNotEqual(re.compile('^pattern$', flag), None)
    515 
    516     def test_sre_character_literals(self):
    517         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
    518             self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
    519             self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
    520             self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
    521             self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
    522             self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
    523             self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
    524         self.assertRaises(re.error, re.match, "\911", "")
    525 
    526     def test_sre_character_class_literals(self):
    527         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
    528             self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
    529             self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
    530             self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
    531             self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
    532             self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
    533             self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
    534         self.assertRaises(re.error, re.match, "[\911]", "")
    535 
    536     def test_bug_113254(self):
    537         self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
    538         self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
    539         self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
    540 
    541     def test_bug_527371(self):
    542         # bug described in patches 527371/672491
    543         self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
    544         self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
    545         self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
    546         self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
    547         self.assertEqual(re.match("((a))", "a").lastindex, 1)
    548 
    549     def test_bug_545855(self):
    550         # bug 545855 -- This pattern failed to cause a compile error as it
    551         # should, instead provoking a TypeError.
    552         self.assertRaises(re.error, re.compile, 'foo[a-')
    553 
    554     def test_bug_418626(self):
    555         # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
    556         # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of

    557         # pattern '*?' on a long string.

    558         self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
    559         self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
    560                          20003)
    561         self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
    562         # non-simple '*?' still used to hit the recursion limit, before the

    563         # non-recursive scheme was implemented.

    564         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
    565 
    566     def test_bug_612074(self):
    567         pat=u"["+re.escape(u"\u2039")+u"]"
    568         self.assertEqual(re.compile(pat) and 1, 1)
    569 
    570     def test_stack_overflow(self):
    571         # nasty cases that used to overflow the straightforward recursive

    572         # implementation of repeated groups.

    573         self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
    574         self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
    575         self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
    576 
    577     def test_scanner(self):
    578         def s_ident(scanner, token): return token
    579         def s_operator(scanner, token): return "op%s" % token
    580         def s_float(scanner, token): return float(token)
    581         def s_int(scanner, token): return int(token)
    582 
    583         scanner = Scanner([
    584             (r"[a-zA-Z_]\w*", s_ident),
    585             (r"\d+\.\d*", s_float),
    586             (r"\d+", s_int),
    587             (r"=|\+|-|\*|/", s_operator),
    588             (r"\s+", None),
    589             ])
    590 
    591         self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
    592 
    593         self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
    594                          (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
    595                            'op+', 'bar'], ''))
    596 
    597     def test_bug_448951(self):
    598         # bug 448951 (similar to 429357, but with single char match)

    599         # (Also test greedy matches.)

    600         for op in '','?','*':
    601             self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
    602                              (None, None))
    603             self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
    604                              ('a:', 'a'))
    605 
    606     def test_bug_725106(self):
    607         # capturing groups in alternatives in repeats

    608         self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
    609                          ('b', 'a'))
    610         self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
    611                          ('c', 'b'))
    612         self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
    613                          ('b', None))
    614         self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
    615                          ('b', None))
    616         self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
    617                          ('b', 'a'))
    618         self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
    619                          ('c', 'b'))
    620         self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
    621                          ('b', None))
    622         self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
    623                          ('b', None))
    624 
    625     def test_bug_725149(self):
    626         # mark_stack_base restoring before restoring marks

    627         self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
    628                          ('a', None))
    629         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
    630                          ('a', None, None))
    631 
    632     def test_bug_764548(self):
    633         # bug 764548, re.compile() barfs on str/unicode subclasses

    634         try:
    635             unicode
    636         except NameError:
    637             return  # no problem if we have no unicode

    638         class my_unicode(unicode): pass
    639         pat = re.compile(my_unicode("abc"))
    640         self.assertEqual(pat.match("xyz"), None)
    641 
    642     def test_finditer(self):
    643         iter = re.finditer(r":+", "a:b::c:::d")
    644         self.assertEqual([item.group(0) for item in iter],
    645                          [":", "::", ":::"])
    646 
    647     def test_bug_926075(self):
    648         try:
    649             unicode
    650         except NameError:
    651             return # no problem if we have no unicode

    652         self.assertTrue(re.compile('bug_926075') is not
    653                      re.compile(eval("u'bug_926075'")))
    654 
    655     def test_bug_931848(self):
    656         try:
    657             unicode
    658         except NameError:
    659             pass
    660         pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
    661         self.assertEqual(re.compile(pattern).split("a.b.c"),
    662                          ['a','b','c'])
    663 
    664     def test_bug_581080(self):
    665         iter = re.finditer(r"\s", "a b")
    666         self.assertEqual(iter.next().span(), (1,2))
    667         self.assertRaises(StopIteration, iter.next)
    668 
    669         scanner = re.compile(r"\s").scanner("a b")
    670         self.assertEqual(scanner.search().span(), (1, 2))
    671         self.assertEqual(scanner.search(), None)
    672 
    673     def test_bug_817234(self):
    674         iter = re.finditer(r".*", "asdf")
    675         self.assertEqual(iter.next().span(), (0, 4))
    676         self.assertEqual(iter.next().span(), (4, 4))
    677         self.assertRaises(StopIteration, iter.next)
    678 
    679     def test_bug_6561(self):
    680         # '\d' should match characters in Unicode category 'Nd'

    681         # (Number, Decimal Digit), but not those in 'Nl' (Number,

    682         # Letter) or 'No' (Number, Other).

    683         decimal_digits = [
    684             u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'

    685             u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'

    686             u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'

    687             ]
    688         for x in decimal_digits:
    689             self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
    690 
    691         not_decimal_digits = [
    692             u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'

    693             u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'

    694             u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'

    695             u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'

    696             ]
    697         for x in not_decimal_digits:
    698             self.assertIsNone(re.match('^\d$', x, re.UNICODE))
    699 
    700     def test_empty_array(self):
    701         # SF buf 1647541

    702         import array
    703         for typecode in 'cbBuhHiIlLfd':
    704             a = array.array(typecode)
    705             self.assertEqual(re.compile("bla").match(a), None)
    706             self.assertEqual(re.compile("").match(a).groups(), ())
    707 
    708     def test_inline_flags(self):
    709         # Bug #1700

    710         upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow

    711         lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow

    712 
    713         p = re.compile(upper_char, re.I | re.U)
    714         q = p.match(lower_char)
    715         self.assertNotEqual(q, None)
    716 
    717         p = re.compile(lower_char, re.I | re.U)
    718         q = p.match(upper_char)
    719         self.assertNotEqual(q, None)
    720 
    721         p = re.compile('(?i)' + upper_char, re.U)
    722         q = p.match(lower_char)
    723         self.assertNotEqual(q, None)
    724 
    725         p = re.compile('(?i)' + lower_char, re.U)
    726         q = p.match(upper_char)
    727         self.assertNotEqual(q, None)
    728 
    729         p = re.compile('(?iu)' + upper_char)
    730         q = p.match(lower_char)
    731         self.assertNotEqual(q, None)
    732 
    733         p = re.compile('(?iu)' + lower_char)
    734         q = p.match(upper_char)
    735         self.assertNotEqual(q, None)
    736 
    737     def test_dollar_matches_twice(self):
    738         "$ matches the end of string, and just before the terminating \n"
    739         pattern = re.compile('$')
    740         self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
    741         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
    742         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
    743 
    744         pattern = re.compile('$', re.MULTILINE)
    745         self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
    746         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
    747         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
    748 
    749     def test_dealloc(self):
    750         # issue 3299: check for segfault in debug build

    751         import _sre
    752         # the overflow limit is different on wide and narrow builds and it

    753         # depends on the definition of SRE_CODE (see sre.h).

    754         # 2**128 should be big enough to overflow on both. For smaller values

    755         # a RuntimeError is raised instead of OverflowError.

    756         long_overflow = 2**128
    757         self.assertRaises(TypeError, re.finditer, "a", {})
    758         self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
    759 
    760 def run_re_tests():
    761     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
    762     if verbose:
    763         print 'Running re_tests test suite'
    764     else:
    765         # To save time, only run the first and last 10 tests

    766         #tests = tests[:10] + tests[-10:]

    767         pass
    768 
    769     for t in tests:
    770         sys.stdout.flush()
    771         pattern = s = outcome = repl = expected = None
    772         if len(t) == 5:
    773             pattern, s, outcome, repl, expected = t
    774         elif len(t) == 3:
    775             pattern, s, outcome = t
    776         else:
    777             raise ValueError, ('Test tuples should have 3 or 5 fields', t)
    778 
    779         try:
    780             obj = re.compile(pattern)
    781         except re.error:
    782             if outcome == SYNTAX_ERROR: pass  # Expected a syntax error

    783             else:
    784                 print '=== Syntax error:', t
    785         except KeyboardInterrupt: raise KeyboardInterrupt
    786         except:
    787             print '*** Unexpected error ***', t
    788             if verbose:
    789                 traceback.print_exc(file=sys.stdout)
    790         else:
    791             try:
    792                 result = obj.search(s)
    793             except re.error, msg:
    794                 print '=== Unexpected exception', t, repr(msg)
    795             if outcome == SYNTAX_ERROR:
    796                 # This should have been a syntax error; forget it.

    797                 pass
    798             elif outcome == FAIL:
    799                 if result is None: pass   # No match, as expected

    800                 else: print '=== Succeeded incorrectly', t
    801             elif outcome == SUCCEED:
    802                 if result is not None:
    803                     # Matched, as expected, so now we compute the

    804                     # result string and compare it to our expected result.

    805                     start, end = result.span(0)
    806                     vardict={'found': result.group(0),
    807                              'groups': result.group(),
    808                              'flags': result.re.flags}
    809                     for i in range(1, 100):
    810                         try:
    811                             gi = result.group(i)
    812                             # Special hack because else the string concat fails:

    813                             if gi is None:
    814                                 gi = "None"
    815                         except IndexError:
    816                             gi = "Error"
    817                         vardict['g%d' % i] = gi
    818                     for i in result.re.groupindex.keys():
    819                         try:
    820                             gi = result.group(i)
    821                             if gi is None:
    822                                 gi = "None"
    823                         except IndexError:
    824                             gi = "Error"
    825                         vardict[i] = gi
    826                     repl = eval(repl, vardict)
    827                     if repl != expected:
    828                         print '=== grouping error', t,
    829                         print repr(repl) + ' should be ' + repr(expected)
    830                 else:
    831                     print '=== Failed incorrectly', t
    832 
    833                 # Try the match on a unicode string, and check that it

    834                 # still succeeds.

    835                 try:
    836                     result = obj.search(unicode(s, "latin-1"))
    837                     if result is None:
    838                         print '=== Fails on unicode match', t
    839                 except NameError:
    840                     continue # 1.5.2

    841                 except TypeError:
    842                     continue # unicode test case

    843 
    844                 # Try the match on a unicode pattern, and check that it

    845                 # still succeeds.

    846                 obj=re.compile(unicode(pattern, "latin-1"))
    847                 result = obj.search(s)
    848                 if result is None:
    849                     print '=== Fails on unicode pattern match', t
    850 
    851                 # Try the match with the search area limited to the extent

    852                 # of the match and see if it still succeeds.  \B will

    853                 # break (because it won't match at the end or start of a

    854                 # string), so we'll ignore patterns that feature it.

    855 
    856                 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
    857                                and result is not None:
    858                     obj = re.compile(pattern)
    859                     result = obj.search(s, result.start(0), result.end(0) + 1)
    860                     if result is None:
    861                         print '=== Failed on range-limited match', t
    862 
    863                 # Try the match with IGNORECASE enabled, and check that it

    864                 # still succeeds.

    865                 obj = re.compile(pattern, re.IGNORECASE)
    866                 result = obj.search(s)
    867                 if result is None:
    868                     print '=== Fails on case-insensitive match', t
    869 
    870                 # Try the match with LOCALE enabled, and check that it

    871                 # still succeeds.

    872                 obj = re.compile(pattern, re.LOCALE)
    873                 result = obj.search(s)
    874                 if result is None:
    875                     print '=== Fails on locale-sensitive match', t
    876 
    877                 # Try the match with UNICODE locale enabled, and check

    878                 # that it still succeeds.

    879                 obj = re.compile(pattern, re.UNICODE)
    880                 result = obj.search(s)
    881                 if result is None:
    882                     print '=== Fails on unicode-sensitive match', t
    883 
    884 def test_main():
    885     run_unittest(ReTests)
    886     run_re_tests()
    887 
    888 if __name__ == "__main__":
    889     test_main()
    890