Home | History | Annotate | Download | only in test
      1 from test import support
      2 from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
      3                      STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
      4                      open as tokenize_open, Untokenizer)
      5 from io import BytesIO
      6 from unittest import TestCase, mock
      7 from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
      8                                INVALID_UNDERSCORE_LITERALS)
      9 import os
     10 import token
     11 
     12 
     13 class TokenizeTest(TestCase):
     14     # Tests for the tokenize module.
     15 
     16     # The tests can be really simple. Given a small fragment of source
     17     # code, print out a table with tokens. The ENDMARKER is omitted for
     18     # brevity.
     19 
     20     def check_tokenize(self, s, expected):
     21         # Format the tokens in s in a table format.
     22         # The ENDMARKER is omitted.
     23         result = []
     24         f = BytesIO(s.encode('utf-8'))
     25         for type, token, start, end, line in tokenize(f.readline):
     26             if type == ENDMARKER:
     27                 break
     28             type = tok_name[type]
     29             result.append(f"    {type:10} {token!r:13} {start} {end}")
     30         self.assertEqual(result,
     31                          ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
     32                          expected.rstrip().splitlines())
     33 
     34     def test_basic(self):
     35         self.check_tokenize("1 + 1", """\
     36     NUMBER     '1'           (1, 0) (1, 1)
     37     OP         '+'           (1, 2) (1, 3)
     38     NUMBER     '1'           (1, 4) (1, 5)
     39     """)
     40         self.check_tokenize("if False:\n"
     41                             "    # NL\n"
     42                             "    True = False # NEWLINE\n", """\
     43     NAME       'if'          (1, 0) (1, 2)
     44     NAME       'False'       (1, 3) (1, 8)
     45     OP         ':'           (1, 8) (1, 9)
     46     NEWLINE    '\\n'          (1, 9) (1, 10)
     47     COMMENT    '# NL'        (2, 4) (2, 8)
     48     NL         '\\n'          (2, 8) (2, 9)
     49     INDENT     '    '        (3, 0) (3, 4)
     50     NAME       'True'        (3, 4) (3, 8)
     51     OP         '='           (3, 9) (3, 10)
     52     NAME       'False'       (3, 11) (3, 16)
     53     COMMENT    '# NEWLINE'   (3, 17) (3, 26)
     54     NEWLINE    '\\n'          (3, 26) (3, 27)
     55     DEDENT     ''            (4, 0) (4, 0)
     56     """)
     57         indent_error_file = b"""\
     58 def k(x):
     59     x += 2
     60   x += 5
     61 """
     62         readline = BytesIO(indent_error_file).readline
     63         with self.assertRaisesRegex(IndentationError,
     64                                     "unindent does not match any "
     65                                     "outer indentation level"):
     66             for tok in tokenize(readline):
     67                 pass
     68 
     69     def test_int(self):
     70         # Ordinary integers and binary operators
     71         self.check_tokenize("0xff <= 255", """\
     72     NUMBER     '0xff'        (1, 0) (1, 4)
     73     OP         '<='          (1, 5) (1, 7)
     74     NUMBER     '255'         (1, 8) (1, 11)
     75     """)
     76         self.check_tokenize("0b10 <= 255", """\
     77     NUMBER     '0b10'        (1, 0) (1, 4)
     78     OP         '<='          (1, 5) (1, 7)
     79     NUMBER     '255'         (1, 8) (1, 11)
     80     """)
     81         self.check_tokenize("0o123 <= 0O123", """\
     82     NUMBER     '0o123'       (1, 0) (1, 5)
     83     OP         '<='          (1, 6) (1, 8)
     84     NUMBER     '0O123'       (1, 9) (1, 14)
     85     """)
     86         self.check_tokenize("1234567 > ~0x15", """\
     87     NUMBER     '1234567'     (1, 0) (1, 7)
     88     OP         '>'           (1, 8) (1, 9)
     89     OP         '~'           (1, 10) (1, 11)
     90     NUMBER     '0x15'        (1, 11) (1, 15)
     91     """)
     92         self.check_tokenize("2134568 != 1231515", """\
     93     NUMBER     '2134568'     (1, 0) (1, 7)
     94     OP         '!='          (1, 8) (1, 10)
     95     NUMBER     '1231515'     (1, 11) (1, 18)
     96     """)
     97         self.check_tokenize("(-124561-1) & 200000000", """\
     98     OP         '('           (1, 0) (1, 1)
     99     OP         '-'           (1, 1) (1, 2)
    100     NUMBER     '124561'      (1, 2) (1, 8)
    101     OP         '-'           (1, 8) (1, 9)
    102     NUMBER     '1'           (1, 9) (1, 10)
    103     OP         ')'           (1, 10) (1, 11)
    104     OP         '&'           (1, 12) (1, 13)
    105     NUMBER     '200000000'   (1, 14) (1, 23)
    106     """)
    107         self.check_tokenize("0xdeadbeef != -1", """\
    108     NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
    109     OP         '!='          (1, 11) (1, 13)
    110     OP         '-'           (1, 14) (1, 15)
    111     NUMBER     '1'           (1, 15) (1, 16)
    112     """)
    113         self.check_tokenize("0xdeadc0de & 12345", """\
    114     NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
    115     OP         '&'           (1, 11) (1, 12)
    116     NUMBER     '12345'       (1, 13) (1, 18)
    117     """)
    118         self.check_tokenize("0xFF & 0x15 | 1234", """\
    119     NUMBER     '0xFF'        (1, 0) (1, 4)
    120     OP         '&'           (1, 5) (1, 6)
    121     NUMBER     '0x15'        (1, 7) (1, 11)
    122     OP         '|'           (1, 12) (1, 13)
    123     NUMBER     '1234'        (1, 14) (1, 18)
    124     """)
    125 
    126     def test_long(self):
    127         # Long integers
    128         self.check_tokenize("x = 0", """\
    129     NAME       'x'           (1, 0) (1, 1)
    130     OP         '='           (1, 2) (1, 3)
    131     NUMBER     '0'           (1, 4) (1, 5)
    132     """)
    133         self.check_tokenize("x = 0xfffffffffff", """\
    134     NAME       'x'           (1, 0) (1, 1)
    135     OP         '='           (1, 2) (1, 3)
    136     NUMBER     '0xfffffffffff' (1, 4) (1, 17)
    137     """)
    138         self.check_tokenize("x = 123141242151251616110", """\
    139     NAME       'x'           (1, 0) (1, 1)
    140     OP         '='           (1, 2) (1, 3)
    141     NUMBER     '123141242151251616110' (1, 4) (1, 25)
    142     """)
    143         self.check_tokenize("x = -15921590215012591", """\
    144     NAME       'x'           (1, 0) (1, 1)
    145     OP         '='           (1, 2) (1, 3)
    146     OP         '-'           (1, 4) (1, 5)
    147     NUMBER     '15921590215012591' (1, 5) (1, 22)
    148     """)
    149 
    150     def test_float(self):
    151         # Floating point numbers
    152         self.check_tokenize("x = 3.14159", """\
    153     NAME       'x'           (1, 0) (1, 1)
    154     OP         '='           (1, 2) (1, 3)
    155     NUMBER     '3.14159'     (1, 4) (1, 11)
    156     """)
    157         self.check_tokenize("x = 314159.", """\
    158     NAME       'x'           (1, 0) (1, 1)
    159     OP         '='           (1, 2) (1, 3)
    160     NUMBER     '314159.'     (1, 4) (1, 11)
    161     """)
    162         self.check_tokenize("x = .314159", """\
    163     NAME       'x'           (1, 0) (1, 1)
    164     OP         '='           (1, 2) (1, 3)
    165     NUMBER     '.314159'     (1, 4) (1, 11)
    166     """)
    167         self.check_tokenize("x = 3e14159", """\
    168     NAME       'x'           (1, 0) (1, 1)
    169     OP         '='           (1, 2) (1, 3)
    170     NUMBER     '3e14159'     (1, 4) (1, 11)
    171     """)
    172         self.check_tokenize("x = 3E123", """\
    173     NAME       'x'           (1, 0) (1, 1)
    174     OP         '='           (1, 2) (1, 3)
    175     NUMBER     '3E123'       (1, 4) (1, 9)
    176     """)
    177         self.check_tokenize("x+y = 3e-1230", """\
    178     NAME       'x'           (1, 0) (1, 1)
    179     OP         '+'           (1, 1) (1, 2)
    180     NAME       'y'           (1, 2) (1, 3)
    181     OP         '='           (1, 4) (1, 5)
    182     NUMBER     '3e-1230'     (1, 6) (1, 13)
    183     """)
    184         self.check_tokenize("x = 3.14e159", """\
    185     NAME       'x'           (1, 0) (1, 1)
    186     OP         '='           (1, 2) (1, 3)
    187     NUMBER     '3.14e159'    (1, 4) (1, 12)
    188     """)
    189 
    190     def test_underscore_literals(self):
    191         def number_token(s):
    192             f = BytesIO(s.encode('utf-8'))
    193             for toktype, token, start, end, line in tokenize(f.readline):
    194                 if toktype == NUMBER:
    195                     return token
    196             return 'invalid token'
    197         for lit in VALID_UNDERSCORE_LITERALS:
    198             if '(' in lit:
    199                 # this won't work with compound complex inputs
    200                 continue
    201             self.assertEqual(number_token(lit), lit)
    202         for lit in INVALID_UNDERSCORE_LITERALS:
    203             self.assertNotEqual(number_token(lit), lit)
    204 
    205     def test_string(self):
    206         # String literals
    207         self.check_tokenize("x = ''; y = \"\"", """\
    208     NAME       'x'           (1, 0) (1, 1)
    209     OP         '='           (1, 2) (1, 3)
    210     STRING     "''"          (1, 4) (1, 6)
    211     OP         ';'           (1, 6) (1, 7)
    212     NAME       'y'           (1, 8) (1, 9)
    213     OP         '='           (1, 10) (1, 11)
    214     STRING     '""'          (1, 12) (1, 14)
    215     """)
    216         self.check_tokenize("x = '\"'; y = \"'\"", """\
    217     NAME       'x'           (1, 0) (1, 1)
    218     OP         '='           (1, 2) (1, 3)
    219     STRING     '\\'"\\''       (1, 4) (1, 7)
    220     OP         ';'           (1, 7) (1, 8)
    221     NAME       'y'           (1, 9) (1, 10)
    222     OP         '='           (1, 11) (1, 12)
    223     STRING     '"\\'"'        (1, 13) (1, 16)
    224     """)
    225         self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
    226     NAME       'x'           (1, 0) (1, 1)
    227     OP         '='           (1, 2) (1, 3)
    228     STRING     '"doesn\\'t "' (1, 4) (1, 14)
    229     NAME       'shrink'      (1, 14) (1, 20)
    230     STRING     '", does it"' (1, 20) (1, 31)
    231     """)
    232         self.check_tokenize("x = 'abc' + 'ABC'", """\
    233     NAME       'x'           (1, 0) (1, 1)
    234     OP         '='           (1, 2) (1, 3)
    235     STRING     "'abc'"       (1, 4) (1, 9)
    236     OP         '+'           (1, 10) (1, 11)
    237     STRING     "'ABC'"       (1, 12) (1, 17)
    238     """)
    239         self.check_tokenize('y = "ABC" + "ABC"', """\
    240     NAME       'y'           (1, 0) (1, 1)
    241     OP         '='           (1, 2) (1, 3)
    242     STRING     '"ABC"'       (1, 4) (1, 9)
    243     OP         '+'           (1, 10) (1, 11)
    244     STRING     '"ABC"'       (1, 12) (1, 17)
    245     """)
    246         self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
    247     NAME       'x'           (1, 0) (1, 1)
    248     OP         '='           (1, 2) (1, 3)
    249     STRING     "r'abc'"      (1, 4) (1, 10)
    250     OP         '+'           (1, 11) (1, 12)
    251     STRING     "r'ABC'"      (1, 13) (1, 19)
    252     OP         '+'           (1, 20) (1, 21)
    253     STRING     "R'ABC'"      (1, 22) (1, 28)
    254     OP         '+'           (1, 29) (1, 30)
    255     STRING     "R'ABC'"      (1, 31) (1, 37)
    256     """)
    257         self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
    258     NAME       'y'           (1, 0) (1, 1)
    259     OP         '='           (1, 2) (1, 3)
    260     STRING     'r"abc"'      (1, 4) (1, 10)
    261     OP         '+'           (1, 11) (1, 12)
    262     STRING     'r"ABC"'      (1, 13) (1, 19)
    263     OP         '+'           (1, 20) (1, 21)
    264     STRING     'R"ABC"'      (1, 22) (1, 28)
    265     OP         '+'           (1, 29) (1, 30)
    266     STRING     'R"ABC"'      (1, 31) (1, 37)
    267     """)
    268 
    269         self.check_tokenize("u'abc' + U'abc'", """\
    270     STRING     "u'abc'"      (1, 0) (1, 6)
    271     OP         '+'           (1, 7) (1, 8)
    272     STRING     "U'abc'"      (1, 9) (1, 15)
    273     """)
    274         self.check_tokenize('u"abc" + U"abc"', """\
    275     STRING     'u"abc"'      (1, 0) (1, 6)
    276     OP         '+'           (1, 7) (1, 8)
    277     STRING     'U"abc"'      (1, 9) (1, 15)
    278     """)
    279 
    280         self.check_tokenize("b'abc' + B'abc'", """\
    281     STRING     "b'abc'"      (1, 0) (1, 6)
    282     OP         '+'           (1, 7) (1, 8)
    283     STRING     "B'abc'"      (1, 9) (1, 15)
    284     """)
    285         self.check_tokenize('b"abc" + B"abc"', """\
    286     STRING     'b"abc"'      (1, 0) (1, 6)
    287     OP         '+'           (1, 7) (1, 8)
    288     STRING     'B"abc"'      (1, 9) (1, 15)
    289     """)
    290         self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
    291     STRING     "br'abc'"     (1, 0) (1, 7)
    292     OP         '+'           (1, 8) (1, 9)
    293     STRING     "bR'abc'"     (1, 10) (1, 17)
    294     OP         '+'           (1, 18) (1, 19)
    295     STRING     "Br'abc'"     (1, 20) (1, 27)
    296     OP         '+'           (1, 28) (1, 29)
    297     STRING     "BR'abc'"     (1, 30) (1, 37)
    298     """)
    299         self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
    300     STRING     'br"abc"'     (1, 0) (1, 7)
    301     OP         '+'           (1, 8) (1, 9)
    302     STRING     'bR"abc"'     (1, 10) (1, 17)
    303     OP         '+'           (1, 18) (1, 19)
    304     STRING     'Br"abc"'     (1, 20) (1, 27)
    305     OP         '+'           (1, 28) (1, 29)
    306     STRING     'BR"abc"'     (1, 30) (1, 37)
    307     """)
    308         self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
    309     STRING     "rb'abc'"     (1, 0) (1, 7)
    310     OP         '+'           (1, 8) (1, 9)
    311     STRING     "rB'abc'"     (1, 10) (1, 17)
    312     OP         '+'           (1, 18) (1, 19)
    313     STRING     "Rb'abc'"     (1, 20) (1, 27)
    314     OP         '+'           (1, 28) (1, 29)
    315     STRING     "RB'abc'"     (1, 30) (1, 37)
    316     """)
    317         self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
    318     STRING     'rb"abc"'     (1, 0) (1, 7)
    319     OP         '+'           (1, 8) (1, 9)
    320     STRING     'rB"abc"'     (1, 10) (1, 17)
    321     OP         '+'           (1, 18) (1, 19)
    322     STRING     'Rb"abc"'     (1, 20) (1, 27)
    323     OP         '+'           (1, 28) (1, 29)
    324     STRING     'RB"abc"'     (1, 30) (1, 37)
    325     """)
    326         # Check 0, 1, and 2 character string prefixes.
    327         self.check_tokenize(r'"a\
    328 de\
    329 fg"', """\
    330     STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
    331     """)
    332         self.check_tokenize(r'u"a\
    333 de"', """\
    334     STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
    335     """)
    336         self.check_tokenize(r'rb"a\
    337 d"', """\
    338     STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
    339     """)
    340         self.check_tokenize(r'"""a\
    341 b"""', """\
    342     STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
    343     """)
    344         self.check_tokenize(r'u"""a\
    345 b"""', """\
    346     STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
    347     """)
    348         self.check_tokenize(r'rb"""a\
    349 b\
    350 c"""', """\
    351     STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
    352     """)
    353         self.check_tokenize('f"abc"', """\
    354     STRING     'f"abc"'      (1, 0) (1, 6)
    355     """)
    356         self.check_tokenize('fR"a{b}c"', """\
    357     STRING     'fR"a{b}c"'   (1, 0) (1, 9)
    358     """)
    359         self.check_tokenize('f"""abc"""', """\
    360     STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
    361     """)
    362         self.check_tokenize(r'f"abc\
    363 def"', """\
    364     STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
    365     """)
    366         self.check_tokenize(r'Rf"abc\
    367 def"', """\
    368     STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
    369     """)
    370 
    371     def test_function(self):
    372         self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
    373     NAME       'def'         (1, 0) (1, 3)
    374     NAME       'd22'         (1, 4) (1, 7)
    375     OP         '('           (1, 7) (1, 8)
    376     NAME       'a'           (1, 8) (1, 9)
    377     OP         ','           (1, 9) (1, 10)
    378     NAME       'b'           (1, 11) (1, 12)
    379     OP         ','           (1, 12) (1, 13)
    380     NAME       'c'           (1, 14) (1, 15)
    381     OP         '='           (1, 15) (1, 16)
    382     NUMBER     '2'           (1, 16) (1, 17)
    383     OP         ','           (1, 17) (1, 18)
    384     NAME       'd'           (1, 19) (1, 20)
    385     OP         '='           (1, 20) (1, 21)
    386     NUMBER     '2'           (1, 21) (1, 22)
    387     OP         ','           (1, 22) (1, 23)
    388     OP         '*'           (1, 24) (1, 25)
    389     NAME       'k'           (1, 25) (1, 26)
    390     OP         ')'           (1, 26) (1, 27)
    391     OP         ':'           (1, 27) (1, 28)
    392     NAME       'pass'        (1, 29) (1, 33)
    393     """)
    394         self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
    395     NAME       'def'         (1, 0) (1, 3)
    396     NAME       'd01v_'       (1, 4) (1, 9)
    397     OP         '('           (1, 9) (1, 10)
    398     NAME       'a'           (1, 10) (1, 11)
    399     OP         '='           (1, 11) (1, 12)
    400     NUMBER     '1'           (1, 12) (1, 13)
    401     OP         ','           (1, 13) (1, 14)
    402     OP         '*'           (1, 15) (1, 16)
    403     NAME       'k'           (1, 16) (1, 17)
    404     OP         ','           (1, 17) (1, 18)
    405     OP         '**'          (1, 19) (1, 21)
    406     NAME       'w'           (1, 21) (1, 22)
    407     OP         ')'           (1, 22) (1, 23)
    408     OP         ':'           (1, 23) (1, 24)
    409     NAME       'pass'        (1, 25) (1, 29)
    410     """)
    411 
    412     def test_comparison(self):
    413         # Comparison
    414         self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
    415                             "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
    416     NAME       'if'          (1, 0) (1, 2)
    417     NUMBER     '1'           (1, 3) (1, 4)
    418     OP         '<'           (1, 5) (1, 6)
    419     NUMBER     '1'           (1, 7) (1, 8)
    420     OP         '>'           (1, 9) (1, 10)
    421     NUMBER     '1'           (1, 11) (1, 12)
    422     OP         '=='          (1, 13) (1, 15)
    423     NUMBER     '1'           (1, 16) (1, 17)
    424     OP         '>='          (1, 18) (1, 20)
    425     NUMBER     '5'           (1, 21) (1, 22)
    426     OP         '<='          (1, 23) (1, 25)
    427     NUMBER     '0x15'        (1, 26) (1, 30)
    428     OP         '<='          (1, 31) (1, 33)
    429     NUMBER     '0x12'        (1, 34) (1, 38)
    430     OP         '!='          (1, 39) (1, 41)
    431     NUMBER     '1'           (1, 42) (1, 43)
    432     NAME       'and'         (1, 44) (1, 47)
    433     NUMBER     '5'           (1, 48) (1, 49)
    434     NAME       'in'          (1, 50) (1, 52)
    435     NUMBER     '1'           (1, 53) (1, 54)
    436     NAME       'not'         (1, 55) (1, 58)
    437     NAME       'in'          (1, 59) (1, 61)
    438     NUMBER     '1'           (1, 62) (1, 63)
    439     NAME       'is'          (1, 64) (1, 66)
    440     NUMBER     '1'           (1, 67) (1, 68)
    441     NAME       'or'          (1, 69) (1, 71)
    442     NUMBER     '5'           (1, 72) (1, 73)
    443     NAME       'is'          (1, 74) (1, 76)
    444     NAME       'not'         (1, 77) (1, 80)
    445     NUMBER     '1'           (1, 81) (1, 82)
    446     OP         ':'           (1, 82) (1, 83)
    447     NAME       'pass'        (1, 84) (1, 88)
    448     """)
    449 
    450     def test_shift(self):
    451         # Shift
    452         self.check_tokenize("x = 1 << 1 >> 5", """\
    453     NAME       'x'           (1, 0) (1, 1)
    454     OP         '='           (1, 2) (1, 3)
    455     NUMBER     '1'           (1, 4) (1, 5)
    456     OP         '<<'          (1, 6) (1, 8)
    457     NUMBER     '1'           (1, 9) (1, 10)
    458     OP         '>>'          (1, 11) (1, 13)
    459     NUMBER     '5'           (1, 14) (1, 15)
    460     """)
    461 
    462     def test_additive(self):
    463         # Additive
    464         self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
    465     NAME       'x'           (1, 0) (1, 1)
    466     OP         '='           (1, 2) (1, 3)
    467     NUMBER     '1'           (1, 4) (1, 5)
    468     OP         '-'           (1, 6) (1, 7)
    469     NAME       'y'           (1, 8) (1, 9)
    470     OP         '+'           (1, 10) (1, 11)
    471     NUMBER     '15'          (1, 12) (1, 14)
    472     OP         '-'           (1, 15) (1, 16)
    473     NUMBER     '1'           (1, 17) (1, 18)
    474     OP         '+'           (1, 19) (1, 20)
    475     NUMBER     '0x124'       (1, 21) (1, 26)
    476     OP         '+'           (1, 27) (1, 28)
    477     NAME       'z'           (1, 29) (1, 30)
    478     OP         '+'           (1, 31) (1, 32)
    479     NAME       'a'           (1, 33) (1, 34)
    480     OP         '['           (1, 34) (1, 35)
    481     NUMBER     '5'           (1, 35) (1, 36)
    482     OP         ']'           (1, 36) (1, 37)
    483     """)
    484 
    485     def test_multiplicative(self):
    486         # Multiplicative
    487         self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
    488     NAME       'x'           (1, 0) (1, 1)
    489     OP         '='           (1, 2) (1, 3)
    490     NUMBER     '1'           (1, 4) (1, 5)
    491     OP         '//'          (1, 5) (1, 7)
    492     NUMBER     '1'           (1, 7) (1, 8)
    493     OP         '*'           (1, 8) (1, 9)
    494     NUMBER     '1'           (1, 9) (1, 10)
    495     OP         '/'           (1, 10) (1, 11)
    496     NUMBER     '5'           (1, 11) (1, 12)
    497     OP         '*'           (1, 12) (1, 13)
    498     NUMBER     '12'          (1, 13) (1, 15)
    499     OP         '%'           (1, 15) (1, 16)
    500     NUMBER     '0x12'        (1, 16) (1, 20)
    501     OP         '@'           (1, 20) (1, 21)
    502     NUMBER     '42'          (1, 21) (1, 23)
    503     """)
    504 
    505     def test_unary(self):
    506         # Unary
    507         self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
    508     OP         '~'           (1, 0) (1, 1)
    509     NUMBER     '1'           (1, 1) (1, 2)
    510     OP         '^'           (1, 3) (1, 4)
    511     NUMBER     '1'           (1, 5) (1, 6)
    512     OP         '&'           (1, 7) (1, 8)
    513     NUMBER     '1'           (1, 9) (1, 10)
    514     OP         '|'           (1, 11) (1, 12)
    515     NUMBER     '1'           (1, 12) (1, 13)
    516     OP         '^'           (1, 14) (1, 15)
    517     OP         '-'           (1, 16) (1, 17)
    518     NUMBER     '1'           (1, 17) (1, 18)
    519     """)
    520         self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
    521     OP         '-'           (1, 0) (1, 1)
    522     NUMBER     '1'           (1, 1) (1, 2)
    523     OP         '*'           (1, 2) (1, 3)
    524     NUMBER     '1'           (1, 3) (1, 4)
    525     OP         '/'           (1, 4) (1, 5)
    526     NUMBER     '1'           (1, 5) (1, 6)
    527     OP         '+'           (1, 6) (1, 7)
    528     NUMBER     '1'           (1, 7) (1, 8)
    529     OP         '*'           (1, 8) (1, 9)
    530     NUMBER     '1'           (1, 9) (1, 10)
    531     OP         '//'          (1, 10) (1, 12)
    532     NUMBER     '1'           (1, 12) (1, 13)
    533     OP         '-'           (1, 14) (1, 15)
    534     OP         '-'           (1, 16) (1, 17)
    535     OP         '-'           (1, 17) (1, 18)
    536     OP         '-'           (1, 18) (1, 19)
    537     NUMBER     '1'           (1, 19) (1, 20)
    538     OP         '**'          (1, 20) (1, 22)
    539     NUMBER     '1'           (1, 22) (1, 23)
    540     """)
    541 
    542     def test_selector(self):
    543         # Selector
    544         self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
    545     NAME       'import'      (1, 0) (1, 6)
    546     NAME       'sys'         (1, 7) (1, 10)
    547     OP         ','           (1, 10) (1, 11)
    548     NAME       'time'        (1, 12) (1, 16)
    549     NEWLINE    '\\n'          (1, 16) (1, 17)
    550     NAME       'x'           (2, 0) (2, 1)
    551     OP         '='           (2, 2) (2, 3)
    552     NAME       'sys'         (2, 4) (2, 7)
    553     OP         '.'           (2, 7) (2, 8)
    554     NAME       'modules'     (2, 8) (2, 15)
    555     OP         '['           (2, 15) (2, 16)
    556     STRING     "'time'"      (2, 16) (2, 22)
    557     OP         ']'           (2, 22) (2, 23)
    558     OP         '.'           (2, 23) (2, 24)
    559     NAME       'time'        (2, 24) (2, 28)
    560     OP         '('           (2, 28) (2, 29)
    561     OP         ')'           (2, 29) (2, 30)
    562     """)
    563 
    564     def test_method(self):
    565         # Methods
    566         self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
    567     OP         '@'           (1, 0) (1, 1)
    568     NAME       'staticmethod' (1, 1) (1, 13)
    569     NEWLINE    '\\n'          (1, 13) (1, 14)
    570     NAME       'def'         (2, 0) (2, 3)
    571     NAME       'foo'         (2, 4) (2, 7)
    572     OP         '('           (2, 7) (2, 8)
    573     NAME       'x'           (2, 8) (2, 9)
    574     OP         ','           (2, 9) (2, 10)
    575     NAME       'y'           (2, 10) (2, 11)
    576     OP         ')'           (2, 11) (2, 12)
    577     OP         ':'           (2, 12) (2, 13)
    578     NAME       'pass'        (2, 14) (2, 18)
    579     """)
    580 
    581     def test_tabs(self):
    582         # Evil tabs
    583         self.check_tokenize("def f():\n"
    584                             "\tif x\n"
    585                             "        \tpass", """\
    586     NAME       'def'         (1, 0) (1, 3)
    587     NAME       'f'           (1, 4) (1, 5)
    588     OP         '('           (1, 5) (1, 6)
    589     OP         ')'           (1, 6) (1, 7)
    590     OP         ':'           (1, 7) (1, 8)
    591     NEWLINE    '\\n'          (1, 8) (1, 9)
    592     INDENT     '\\t'          (2, 0) (2, 1)
    593     NAME       'if'          (2, 1) (2, 3)
    594     NAME       'x'           (2, 4) (2, 5)
    595     NEWLINE    '\\n'          (2, 5) (2, 6)
    596     INDENT     '        \\t'  (3, 0) (3, 9)
    597     NAME       'pass'        (3, 9) (3, 13)
    598     DEDENT     ''            (4, 0) (4, 0)
    599     DEDENT     ''            (4, 0) (4, 0)
    600     """)
    601 
    602     def test_non_ascii_identifiers(self):
    603         # Non-ascii identifiers
    604         self.check_tokenize("rter = 'places'\ngrn = 'green'", """\
    605     NAME       'rter'       (1, 0) (1, 5)
    606     OP         '='           (1, 6) (1, 7)
    607     STRING     "'places'"    (1, 8) (1, 16)
    608     NEWLINE    '\\n'          (1, 16) (1, 17)
    609     NAME       'grn'        (2, 0) (2, 4)
    610     OP         '='           (2, 5) (2, 6)
    611     STRING     "'green'"     (2, 7) (2, 14)
    612     """)
    613 
    614     def test_unicode(self):
    615         # Legacy unicode literals:
    616         self.check_tokenize("rter = u'places'\ngrn = U'green'", """\
    617     NAME       'rter'       (1, 0) (1, 5)
    618     OP         '='           (1, 6) (1, 7)
    619     STRING     "u'places'"   (1, 8) (1, 17)
    620     NEWLINE    '\\n'          (1, 17) (1, 18)
    621     NAME       'grn'        (2, 0) (2, 4)
    622     OP         '='           (2, 5) (2, 6)
    623     STRING     "U'green'"    (2, 7) (2, 15)
    624     """)
    625 
    626     def test_async(self):
    627         # Async/await extension:
    628         self.check_tokenize("async = 1", """\
    629     NAME       'async'       (1, 0) (1, 5)
    630     OP         '='           (1, 6) (1, 7)
    631     NUMBER     '1'           (1, 8) (1, 9)
    632     """)
    633 
    634         self.check_tokenize("a = (async = 1)", """\
    635     NAME       'a'           (1, 0) (1, 1)
    636     OP         '='           (1, 2) (1, 3)
    637     OP         '('           (1, 4) (1, 5)
    638     NAME       'async'       (1, 5) (1, 10)
    639     OP         '='           (1, 11) (1, 12)
    640     NUMBER     '1'           (1, 13) (1, 14)
    641     OP         ')'           (1, 14) (1, 15)
    642     """)
    643 
    644         self.check_tokenize("async()", """\
    645     NAME       'async'       (1, 0) (1, 5)
    646     OP         '('           (1, 5) (1, 6)
    647     OP         ')'           (1, 6) (1, 7)
    648     """)
    649 
    650         self.check_tokenize("class async(Bar):pass", """\
    651     NAME       'class'       (1, 0) (1, 5)
    652     NAME       'async'       (1, 6) (1, 11)
    653     OP         '('           (1, 11) (1, 12)
    654     NAME       'Bar'         (1, 12) (1, 15)
    655     OP         ')'           (1, 15) (1, 16)
    656     OP         ':'           (1, 16) (1, 17)
    657     NAME       'pass'        (1, 17) (1, 21)
    658     """)
    659 
    660         self.check_tokenize("class async:pass", """\
    661     NAME       'class'       (1, 0) (1, 5)
    662     NAME       'async'       (1, 6) (1, 11)
    663     OP         ':'           (1, 11) (1, 12)
    664     NAME       'pass'        (1, 12) (1, 16)
    665     """)
    666 
    667         self.check_tokenize("await = 1", """\
    668     NAME       'await'       (1, 0) (1, 5)
    669     OP         '='           (1, 6) (1, 7)
    670     NUMBER     '1'           (1, 8) (1, 9)
    671     """)
    672 
    673         self.check_tokenize("foo.async", """\
    674     NAME       'foo'         (1, 0) (1, 3)
    675     OP         '.'           (1, 3) (1, 4)
    676     NAME       'async'       (1, 4) (1, 9)
    677     """)
    678 
    679         self.check_tokenize("async for a in b: pass", """\
    680     NAME       'async'       (1, 0) (1, 5)
    681     NAME       'for'         (1, 6) (1, 9)
    682     NAME       'a'           (1, 10) (1, 11)
    683     NAME       'in'          (1, 12) (1, 14)
    684     NAME       'b'           (1, 15) (1, 16)
    685     OP         ':'           (1, 16) (1, 17)
    686     NAME       'pass'        (1, 18) (1, 22)
    687     """)
    688 
    689         self.check_tokenize("async with a as b: pass", """\
    690     NAME       'async'       (1, 0) (1, 5)
    691     NAME       'with'        (1, 6) (1, 10)
    692     NAME       'a'           (1, 11) (1, 12)
    693     NAME       'as'          (1, 13) (1, 15)
    694     NAME       'b'           (1, 16) (1, 17)
    695     OP         ':'           (1, 17) (1, 18)
    696     NAME       'pass'        (1, 19) (1, 23)
    697     """)
    698 
    699         self.check_tokenize("async.foo", """\
    700     NAME       'async'       (1, 0) (1, 5)
    701     OP         '.'           (1, 5) (1, 6)
    702     NAME       'foo'         (1, 6) (1, 9)
    703     """)
    704 
    705         self.check_tokenize("async", """\
    706     NAME       'async'       (1, 0) (1, 5)
    707     """)
    708 
    709         self.check_tokenize("async\n#comment\nawait", """\
    710     NAME       'async'       (1, 0) (1, 5)
    711     NEWLINE    '\\n'          (1, 5) (1, 6)
    712     COMMENT    '#comment'    (2, 0) (2, 8)
    713     NL         '\\n'          (2, 8) (2, 9)
    714     NAME       'await'       (3, 0) (3, 5)
    715     """)
    716 
    717         self.check_tokenize("async\n...\nawait", """\
    718     NAME       'async'       (1, 0) (1, 5)
    719     NEWLINE    '\\n'          (1, 5) (1, 6)
    720     OP         '...'         (2, 0) (2, 3)
    721     NEWLINE    '\\n'          (2, 3) (2, 4)
    722     NAME       'await'       (3, 0) (3, 5)
    723     """)
    724 
    725         self.check_tokenize("async\nawait", """\
    726     NAME       'async'       (1, 0) (1, 5)
    727     NEWLINE    '\\n'          (1, 5) (1, 6)
    728     NAME       'await'       (2, 0) (2, 5)
    729     """)
    730 
    731         self.check_tokenize("foo.async + 1", """\
    732     NAME       'foo'         (1, 0) (1, 3)
    733     OP         '.'           (1, 3) (1, 4)
    734     NAME       'async'       (1, 4) (1, 9)
    735     OP         '+'           (1, 10) (1, 11)
    736     NUMBER     '1'           (1, 12) (1, 13)
    737     """)
    738 
    739         self.check_tokenize("async def foo(): pass", """\
    740     ASYNC      'async'       (1, 0) (1, 5)
    741     NAME       'def'         (1, 6) (1, 9)
    742     NAME       'foo'         (1, 10) (1, 13)
    743     OP         '('           (1, 13) (1, 14)
    744     OP         ')'           (1, 14) (1, 15)
    745     OP         ':'           (1, 15) (1, 16)
    746     NAME       'pass'        (1, 17) (1, 21)
    747     """)
    748 
    749         self.check_tokenize('''\
    750 async def foo():
    751   def foo(await):
    752     await = 1
    753   if 1:
    754     await
    755 async += 1
    756 ''', """\
    757     ASYNC      'async'       (1, 0) (1, 5)
    758     NAME       'def'         (1, 6) (1, 9)
    759     NAME       'foo'         (1, 10) (1, 13)
    760     OP         '('           (1, 13) (1, 14)
    761     OP         ')'           (1, 14) (1, 15)
    762     OP         ':'           (1, 15) (1, 16)
    763     NEWLINE    '\\n'          (1, 16) (1, 17)
    764     INDENT     '  '          (2, 0) (2, 2)
    765     NAME       'def'         (2, 2) (2, 5)
    766     NAME       'foo'         (2, 6) (2, 9)
    767     OP         '('           (2, 9) (2, 10)
    768     AWAIT      'await'       (2, 10) (2, 15)
    769     OP         ')'           (2, 15) (2, 16)
    770     OP         ':'           (2, 16) (2, 17)
    771     NEWLINE    '\\n'          (2, 17) (2, 18)
    772     INDENT     '    '        (3, 0) (3, 4)
    773     AWAIT      'await'       (3, 4) (3, 9)
    774     OP         '='           (3, 10) (3, 11)
    775     NUMBER     '1'           (3, 12) (3, 13)
    776     NEWLINE    '\\n'          (3, 13) (3, 14)
    777     DEDENT     ''            (4, 2) (4, 2)
    778     NAME       'if'          (4, 2) (4, 4)
    779     NUMBER     '1'           (4, 5) (4, 6)
    780     OP         ':'           (4, 6) (4, 7)
    781     NEWLINE    '\\n'          (4, 7) (4, 8)
    782     INDENT     '    '        (5, 0) (5, 4)
    783     AWAIT      'await'       (5, 4) (5, 9)
    784     NEWLINE    '\\n'          (5, 9) (5, 10)
    785     DEDENT     ''            (6, 0) (6, 0)
    786     DEDENT     ''            (6, 0) (6, 0)
    787     NAME       'async'       (6, 0) (6, 5)
    788     OP         '+='          (6, 6) (6, 8)
    789     NUMBER     '1'           (6, 9) (6, 10)
    790     NEWLINE    '\\n'          (6, 10) (6, 11)
    791     """)
    792 
    793         self.check_tokenize('''\
    794 async def foo():
    795   async for i in 1: pass''', """\
    796     ASYNC      'async'       (1, 0) (1, 5)
    797     NAME       'def'         (1, 6) (1, 9)
    798     NAME       'foo'         (1, 10) (1, 13)
    799     OP         '('           (1, 13) (1, 14)
    800     OP         ')'           (1, 14) (1, 15)
    801     OP         ':'           (1, 15) (1, 16)
    802     NEWLINE    '\\n'          (1, 16) (1, 17)
    803     INDENT     '  '          (2, 0) (2, 2)
    804     ASYNC      'async'       (2, 2) (2, 7)
    805     NAME       'for'         (2, 8) (2, 11)
    806     NAME       'i'           (2, 12) (2, 13)
    807     NAME       'in'          (2, 14) (2, 16)
    808     NUMBER     '1'           (2, 17) (2, 18)
    809     OP         ':'           (2, 18) (2, 19)
    810     NAME       'pass'        (2, 20) (2, 24)
    811     DEDENT     ''            (3, 0) (3, 0)
    812     """)
    813 
    814         self.check_tokenize('''async def foo(async): await''', """\
    815     ASYNC      'async'       (1, 0) (1, 5)
    816     NAME       'def'         (1, 6) (1, 9)
    817     NAME       'foo'         (1, 10) (1, 13)
    818     OP         '('           (1, 13) (1, 14)
    819     ASYNC      'async'       (1, 14) (1, 19)
    820     OP         ')'           (1, 19) (1, 20)
    821     OP         ':'           (1, 20) (1, 21)
    822     AWAIT      'await'       (1, 22) (1, 27)
    823     """)
    824 
    825         self.check_tokenize('''\
    826 def f():
    827 
    828   def baz(): pass
    829   async def bar(): pass
    830 
    831   await = 2''', """\
    832     NAME       'def'         (1, 0) (1, 3)
    833     NAME       'f'           (1, 4) (1, 5)
    834     OP         '('           (1, 5) (1, 6)
    835     OP         ')'           (1, 6) (1, 7)
    836     OP         ':'           (1, 7) (1, 8)
    837     NEWLINE    '\\n'          (1, 8) (1, 9)
    838     NL         '\\n'          (2, 0) (2, 1)
    839     INDENT     '  '          (3, 0) (3, 2)
    840     NAME       'def'         (3, 2) (3, 5)
    841     NAME       'baz'         (3, 6) (3, 9)
    842     OP         '('           (3, 9) (3, 10)
    843     OP         ')'           (3, 10) (3, 11)
    844     OP         ':'           (3, 11) (3, 12)
    845     NAME       'pass'        (3, 13) (3, 17)
    846     NEWLINE    '\\n'          (3, 17) (3, 18)
    847     ASYNC      'async'       (4, 2) (4, 7)
    848     NAME       'def'         (4, 8) (4, 11)
    849     NAME       'bar'         (4, 12) (4, 15)
    850     OP         '('           (4, 15) (4, 16)
    851     OP         ')'           (4, 16) (4, 17)
    852     OP         ':'           (4, 17) (4, 18)
    853     NAME       'pass'        (4, 19) (4, 23)
    854     NEWLINE    '\\n'          (4, 23) (4, 24)
    855     NL         '\\n'          (5, 0) (5, 1)
    856     NAME       'await'       (6, 2) (6, 7)
    857     OP         '='           (6, 8) (6, 9)
    858     NUMBER     '2'           (6, 10) (6, 11)
    859     DEDENT     ''            (7, 0) (7, 0)
    860     """)
    861 
    862         self.check_tokenize('''\
    863 async def f():
    864 
    865   def baz(): pass
    866   async def bar(): pass
    867 
    868   await = 2''', """\
    869     ASYNC      'async'       (1, 0) (1, 5)
    870     NAME       'def'         (1, 6) (1, 9)
    871     NAME       'f'           (1, 10) (1, 11)
    872     OP         '('           (1, 11) (1, 12)
    873     OP         ')'           (1, 12) (1, 13)
    874     OP         ':'           (1, 13) (1, 14)
    875     NEWLINE    '\\n'          (1, 14) (1, 15)
    876     NL         '\\n'          (2, 0) (2, 1)
    877     INDENT     '  '          (3, 0) (3, 2)
    878     NAME       'def'         (3, 2) (3, 5)
    879     NAME       'baz'         (3, 6) (3, 9)
    880     OP         '('           (3, 9) (3, 10)
    881     OP         ')'           (3, 10) (3, 11)
    882     OP         ':'           (3, 11) (3, 12)
    883     NAME       'pass'        (3, 13) (3, 17)
    884     NEWLINE    '\\n'          (3, 17) (3, 18)
    885     ASYNC      'async'       (4, 2) (4, 7)
    886     NAME       'def'         (4, 8) (4, 11)
    887     NAME       'bar'         (4, 12) (4, 15)
    888     OP         '('           (4, 15) (4, 16)
    889     OP         ')'           (4, 16) (4, 17)
    890     OP         ':'           (4, 17) (4, 18)
    891     NAME       'pass'        (4, 19) (4, 23)
    892     NEWLINE    '\\n'          (4, 23) (4, 24)
    893     NL         '\\n'          (5, 0) (5, 1)
    894     AWAIT      'await'       (6, 2) (6, 7)
    895     OP         '='           (6, 8) (6, 9)
    896     NUMBER     '2'           (6, 10) (6, 11)
    897     DEDENT     ''            (7, 0) (7, 0)
    898     """)
    899 
    900 
    901 def decistmt(s):
    902     result = []
    903     g = tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
    904     for toknum, tokval, _, _, _  in g:
    905         if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
    906             result.extend([
    907                 (NAME, 'Decimal'),
    908                 (OP, '('),
    909                 (STRING, repr(tokval)),
    910                 (OP, ')')
    911             ])
    912         else:
    913             result.append((toknum, tokval))
    914     return untokenize(result).decode('utf-8')
    915 
    916 class TestMisc(TestCase):
    917 
    918     def test_decistmt(self):
    919         # Substitute Decimals for floats in a string of statements.
    920         # This is an example from the docs.
    921 
    922         from decimal import Decimal
    923         s = '+21.3e-5*-.1234/81.7'
    924         self.assertEqual(decistmt(s),
    925                          "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
    926 
    927         # The format of the exponent is inherited from the platform C library.
    928         # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    929         # we're only showing 11 digits, and the 12th isn't close to 5, the
    930         # rest of the output should be platform-independent.
    931         self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
    932 
    933         # Output from calculations with Decimal should be identical across all
    934         # platforms.
    935         self.assertEqual(eval(decistmt(s)),
    936                          Decimal('-3.217160342717258261933904529E-7'))
    937 
    938 
    939 class TestTokenizerAdheresToPep0263(TestCase):
    940     """
    941     Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
    942     """
    943 
    944     def _testFile(self, filename):
    945         path = os.path.join(os.path.dirname(__file__), filename)
    946         TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
    947 
    948     def test_utf8_coding_cookie_and_no_utf8_bom(self):
    949         f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
    950         self._testFile(f)
    951 
    952     def test_latin1_coding_cookie_and_utf8_bom(self):
    953         """
    954         As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
    955         allowed encoding for the comment is 'utf-8'.  The text file used in
    956         this test starts with a BOM signature, but specifies latin1 as the
    957         coding, so verify that a SyntaxError is raised, which matches the
    958         behaviour of the interpreter when it encounters a similar condition.
    959         """
    960         f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
    961         self.assertRaises(SyntaxError, self._testFile, f)
    962 
    963     def test_no_coding_cookie_and_utf8_bom(self):
    964         f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
    965         self._testFile(f)
    966 
    967     def test_utf8_coding_cookie_and_utf8_bom(self):
    968         f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
    969         self._testFile(f)
    970 
    971     def test_bad_coding_cookie(self):
    972         self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
    973         self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
    974 
    975 
    976 class Test_Tokenize(TestCase):
    977 
    978     def test__tokenize_decodes_with_specified_encoding(self):
    979         literal = '""'
    980         line = literal.encode('utf-8')
    981         first = False
    982         def readline():
    983             nonlocal first
    984             if not first:
    985                 first = True
    986                 return line
    987             else:
    988                 return b''
    989 
    990         # skip the initial encoding token and the end token
    991         tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
    992         expected_tokens = [(3, '""', (1, 0), (1, 7), '""')]
    993         self.assertEqual(tokens, expected_tokens,
    994                          "bytes not decoded with encoding")
    995 
    996     def test__tokenize_does_not_decode_with_encoding_none(self):
    997         literal = '""'
    998         first = False
    999         def readline():
   1000             nonlocal first
   1001             if not first:
   1002                 first = True
   1003                 return literal
   1004             else:
   1005                 return b''
   1006 
   1007         # skip the end token
   1008         tokens = list(_tokenize(readline, encoding=None))[:-1]
   1009         expected_tokens = [(3, '""', (1, 0), (1, 7), '""')]
   1010         self.assertEqual(tokens, expected_tokens,
   1011                          "string not tokenized when encoding is None")
   1012 
   1013 
   1014 class TestDetectEncoding(TestCase):
   1015 
   1016     def get_readline(self, lines):
   1017         index = 0
   1018         def readline():
   1019             nonlocal index
   1020             if index == len(lines):
   1021                 raise StopIteration
   1022             line = lines[index]
   1023             index += 1
   1024             return line
   1025         return readline
   1026 
   1027     def test_no_bom_no_encoding_cookie(self):
   1028         lines = (
   1029             b'# something\n',
   1030             b'print(something)\n',
   1031             b'do_something(else)\n'
   1032         )
   1033         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1034         self.assertEqual(encoding, 'utf-8')
   1035         self.assertEqual(consumed_lines, list(lines[:2]))
   1036 
   1037     def test_bom_no_cookie(self):
   1038         lines = (
   1039             b'\xef\xbb\xbf# something\n',
   1040             b'print(something)\n',
   1041             b'do_something(else)\n'
   1042         )
   1043         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1044         self.assertEqual(encoding, 'utf-8-sig')
   1045         self.assertEqual(consumed_lines,
   1046                          [b'# something\n', b'print(something)\n'])
   1047 
   1048     def test_cookie_first_line_no_bom(self):
   1049         lines = (
   1050             b'# -*- coding: latin-1 -*-\n',
   1051             b'print(something)\n',
   1052             b'do_something(else)\n'
   1053         )
   1054         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1055         self.assertEqual(encoding, 'iso-8859-1')
   1056         self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
   1057 
   1058     def test_matched_bom_and_cookie_first_line(self):
   1059         lines = (
   1060             b'\xef\xbb\xbf# coding=utf-8\n',
   1061             b'print(something)\n',
   1062             b'do_something(else)\n'
   1063         )
   1064         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1065         self.assertEqual(encoding, 'utf-8-sig')
   1066         self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
   1067 
   1068     def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
   1069         lines = (
   1070             b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
   1071             b'print(something)\n',
   1072             b'do_something(else)\n'
   1073         )
   1074         readline = self.get_readline(lines)
   1075         self.assertRaises(SyntaxError, detect_encoding, readline)
   1076 
   1077     def test_cookie_second_line_no_bom(self):
   1078         lines = (
   1079             b'#! something\n',
   1080             b'# vim: set fileencoding=ascii :\n',
   1081             b'print(something)\n',
   1082             b'do_something(else)\n'
   1083         )
   1084         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1085         self.assertEqual(encoding, 'ascii')
   1086         expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
   1087         self.assertEqual(consumed_lines, expected)
   1088 
   1089     def test_matched_bom_and_cookie_second_line(self):
   1090         lines = (
   1091             b'\xef\xbb\xbf#! something\n',
   1092             b'f# coding=utf-8\n',
   1093             b'print(something)\n',
   1094             b'do_something(else)\n'
   1095         )
   1096         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1097         self.assertEqual(encoding, 'utf-8-sig')
   1098         self.assertEqual(consumed_lines,
   1099                          [b'#! something\n', b'f# coding=utf-8\n'])
   1100 
   1101     def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
   1102         lines = (
   1103             b'\xef\xbb\xbf#! something\n',
   1104             b'# vim: set fileencoding=ascii :\n',
   1105             b'print(something)\n',
   1106             b'do_something(else)\n'
   1107         )
   1108         readline = self.get_readline(lines)
   1109         self.assertRaises(SyntaxError, detect_encoding, readline)
   1110 
   1111     def test_cookie_second_line_noncommented_first_line(self):
   1112         lines = (
   1113             b"print('\xc2\xa3')\n",
   1114             b'# vim: set fileencoding=iso8859-15 :\n',
   1115             b"print('\xe2\x82\xac')\n"
   1116         )
   1117         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1118         self.assertEqual(encoding, 'utf-8')
   1119         expected = [b"print('\xc2\xa3')\n"]
   1120         self.assertEqual(consumed_lines, expected)
   1121 
   1122     def test_cookie_second_line_commented_first_line(self):
   1123         lines = (
   1124             b"#print('\xc2\xa3')\n",
   1125             b'# vim: set fileencoding=iso8859-15 :\n',
   1126             b"print('\xe2\x82\xac')\n"
   1127         )
   1128         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1129         self.assertEqual(encoding, 'iso8859-15')
   1130         expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
   1131         self.assertEqual(consumed_lines, expected)
   1132 
   1133     def test_cookie_second_line_empty_first_line(self):
   1134         lines = (
   1135             b'\n',
   1136             b'# vim: set fileencoding=iso8859-15 :\n',
   1137             b"print('\xe2\x82\xac')\n"
   1138         )
   1139         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
   1140         self.assertEqual(encoding, 'iso8859-15')
   1141         expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
   1142         self.assertEqual(consumed_lines, expected)
   1143 
   1144     def test_latin1_normalization(self):
   1145         # See get_normal_name() in tokenizer.c.
   1146         encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
   1147                      "iso-8859-1-unix", "iso-latin-1-mac")
   1148         for encoding in encodings:
   1149             for rep in ("-", "_"):
   1150                 enc = encoding.replace("-", rep)
   1151                 lines = (b"#!/usr/bin/python\n",
   1152                          b"# coding: " + enc.encode("ascii") + b"\n",
   1153                          b"print(things)\n",
   1154                          b"do_something += 4\n")
   1155                 rl = self.get_readline(lines)
   1156                 found, consumed_lines = detect_encoding(rl)
   1157                 self.assertEqual(found, "iso-8859-1")
   1158 
   1159     def test_syntaxerror_latin1(self):
   1160         # Issue 14629: need to raise SyntaxError if the first
   1161         # line(s) have non-UTF-8 characters
   1162         lines = (
   1163             b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
   1164             )
   1165         readline = self.get_readline(lines)
   1166         self.assertRaises(SyntaxError, detect_encoding, readline)
   1167 
   1168 
   1169     def test_utf8_normalization(self):
   1170         # See get_normal_name() in tokenizer.c.
   1171         encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
   1172         for encoding in encodings:
   1173             for rep in ("-", "_"):
   1174                 enc = encoding.replace("-", rep)
   1175                 lines = (b"#!/usr/bin/python\n",
   1176                          b"# coding: " + enc.encode("ascii") + b"\n",
   1177                          b"1 + 3\n")
   1178                 rl = self.get_readline(lines)
   1179                 found, consumed_lines = detect_encoding(rl)
   1180                 self.assertEqual(found, "utf-8")
   1181 
   1182     def test_short_files(self):
   1183         readline = self.get_readline((b'print(something)\n',))
   1184         encoding, consumed_lines = detect_encoding(readline)
   1185         self.assertEqual(encoding, 'utf-8')
   1186         self.assertEqual(consumed_lines, [b'print(something)\n'])
   1187 
   1188         encoding, consumed_lines = detect_encoding(self.get_readline(()))
   1189         self.assertEqual(encoding, 'utf-8')
   1190         self.assertEqual(consumed_lines, [])
   1191 
   1192         readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
   1193         encoding, consumed_lines = detect_encoding(readline)
   1194         self.assertEqual(encoding, 'utf-8-sig')
   1195         self.assertEqual(consumed_lines, [b'print(something)\n'])
   1196 
   1197         readline = self.get_readline((b'\xef\xbb\xbf',))
   1198         encoding, consumed_lines = detect_encoding(readline)
   1199         self.assertEqual(encoding, 'utf-8-sig')
   1200         self.assertEqual(consumed_lines, [])
   1201 
   1202         readline = self.get_readline((b'# coding: bad\n',))
   1203         self.assertRaises(SyntaxError, detect_encoding, readline)
   1204 
   1205     def test_false_encoding(self):
   1206         # Issue 18873: "Encoding" detected in non-comment lines
   1207         readline = self.get_readline((b'print("#coding=fake")',))
   1208         encoding, consumed_lines = detect_encoding(readline)
   1209         self.assertEqual(encoding, 'utf-8')
   1210         self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
   1211 
   1212     def test_open(self):
   1213         filename = support.TESTFN + '.py'
   1214         self.addCleanup(support.unlink, filename)
   1215 
   1216         # test coding cookie
   1217         for encoding in ('iso-8859-15', 'utf-8'):
   1218             with open(filename, 'w', encoding=encoding) as fp:
   1219                 print("# coding: %s" % encoding, file=fp)
   1220                 print("print('euro:\u20ac')", file=fp)
   1221             with tokenize_open(filename) as fp:
   1222                 self.assertEqual(fp.encoding, encoding)
   1223                 self.assertEqual(fp.mode, 'r')
   1224 
   1225         # test BOM (no coding cookie)
   1226         with open(filename, 'w', encoding='utf-8-sig') as fp:
   1227             print("print('euro:\u20ac')", file=fp)
   1228         with tokenize_open(filename) as fp:
   1229             self.assertEqual(fp.encoding, 'utf-8-sig')
   1230             self.assertEqual(fp.mode, 'r')
   1231 
   1232     def test_filename_in_exception(self):
   1233         # When possible, include the file name in the exception.
   1234         path = 'some_file_path'
   1235         lines = (
   1236             b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
   1237             )
   1238         class Bunk:
   1239             def __init__(self, lines, path):
   1240                 self.name = path
   1241                 self._lines = lines
   1242                 self._index = 0
   1243 
   1244             def readline(self):
   1245                 if self._index == len(lines):
   1246                     raise StopIteration
   1247                 line = lines[self._index]
   1248                 self._index += 1
   1249                 return line
   1250 
   1251         with self.assertRaises(SyntaxError):
   1252             ins = Bunk(lines, path)
   1253             # Make sure lacking a name isn't an issue.
   1254             del ins.name
   1255             detect_encoding(ins.readline)
   1256         with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
   1257             ins = Bunk(lines, path)
   1258             detect_encoding(ins.readline)
   1259 
   1260     def test_open_error(self):
   1261         # Issue #23840: open() must close the binary file on error
   1262         m = BytesIO(b'#coding:xxx')
   1263         with mock.patch('tokenize._builtin_open', return_value=m):
   1264             self.assertRaises(SyntaxError, tokenize_open, 'foobar')
   1265         self.assertTrue(m.closed)
   1266 
   1267 
   1268 class TestTokenize(TestCase):
   1269 
   1270     def test_tokenize(self):
   1271         import tokenize as tokenize_module
   1272         encoding = object()
   1273         encoding_used = None
   1274         def mock_detect_encoding(readline):
   1275             return encoding, [b'first', b'second']
   1276 
   1277         def mock__tokenize(readline, encoding):
   1278             nonlocal encoding_used
   1279             encoding_used = encoding
   1280             out = []
   1281             while True:
   1282                 next_line = readline()
   1283                 if next_line:
   1284                     out.append(next_line)
   1285                     continue
   1286                 return out
   1287 
   1288         counter = 0
   1289         def mock_readline():
   1290             nonlocal counter
   1291             counter += 1
   1292             if counter == 5:
   1293                 return b''
   1294             return str(counter).encode()
   1295 
   1296         orig_detect_encoding = tokenize_module.detect_encoding
   1297         orig__tokenize = tokenize_module._tokenize
   1298         tokenize_module.detect_encoding = mock_detect_encoding
   1299         tokenize_module._tokenize = mock__tokenize
   1300         try:
   1301             results = tokenize(mock_readline)
   1302             self.assertEqual(list(results),
   1303                              [b'first', b'second', b'1', b'2', b'3', b'4'])
   1304         finally:
   1305             tokenize_module.detect_encoding = orig_detect_encoding
   1306             tokenize_module._tokenize = orig__tokenize
   1307 
   1308         self.assertTrue(encoding_used, encoding)
   1309 
   1310     def test_oneline_defs(self):
   1311         buf = []
   1312         for i in range(500):
   1313             buf.append('def i{i}(): return {i}'.format(i=i))
   1314         buf.append('OK')
   1315         buf = '\n'.join(buf)
   1316 
   1317         # Test that 500 consequent, one-line defs is OK
   1318         toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
   1319         self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER
   1320 
   1321     def assertExactTypeEqual(self, opstr, *optypes):
   1322         tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
   1323         num_optypes = len(optypes)
   1324         self.assertEqual(len(tokens), 2 + num_optypes)
   1325         self.assertEqual(token.tok_name[tokens[0].exact_type],
   1326                          token.tok_name[ENCODING])
   1327         for i in range(num_optypes):
   1328             self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
   1329                              token.tok_name[optypes[i]])
   1330         self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
   1331                          token.tok_name[token.ENDMARKER])
   1332 
   1333     def test_exact_type(self):
   1334         self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
   1335         self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
   1336         self.assertExactTypeEqual(':', token.COLON)
   1337         self.assertExactTypeEqual(',', token.COMMA)
   1338         self.assertExactTypeEqual(';', token.SEMI)
   1339         self.assertExactTypeEqual('+', token.PLUS)
   1340         self.assertExactTypeEqual('-', token.MINUS)
   1341         self.assertExactTypeEqual('*', token.STAR)
   1342         self.assertExactTypeEqual('/', token.SLASH)
   1343         self.assertExactTypeEqual('|', token.VBAR)
   1344         self.assertExactTypeEqual('&', token.AMPER)
   1345         self.assertExactTypeEqual('<', token.LESS)
   1346         self.assertExactTypeEqual('>', token.GREATER)
   1347         self.assertExactTypeEqual('=', token.EQUAL)
   1348         self.assertExactTypeEqual('.', token.DOT)
   1349         self.assertExactTypeEqual('%', token.PERCENT)
   1350         self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
   1351         self.assertExactTypeEqual('==', token.EQEQUAL)
   1352         self.assertExactTypeEqual('!=', token.NOTEQUAL)
   1353         self.assertExactTypeEqual('<=', token.LESSEQUAL)
   1354         self.assertExactTypeEqual('>=', token.GREATEREQUAL)
   1355         self.assertExactTypeEqual('~', token.TILDE)
   1356         self.assertExactTypeEqual('^', token.CIRCUMFLEX)
   1357         self.assertExactTypeEqual('<<', token.LEFTSHIFT)
   1358         self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
   1359         self.assertExactTypeEqual('**', token.DOUBLESTAR)
   1360         self.assertExactTypeEqual('+=', token.PLUSEQUAL)
   1361         self.assertExactTypeEqual('-=', token.MINEQUAL)
   1362         self.assertExactTypeEqual('*=', token.STAREQUAL)
   1363         self.assertExactTypeEqual('/=', token.SLASHEQUAL)
   1364         self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
   1365         self.assertExactTypeEqual('&=', token.AMPEREQUAL)
   1366         self.assertExactTypeEqual('|=', token.VBAREQUAL)
   1367         self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
   1368         self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
   1369         self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
   1370         self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
   1371         self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
   1372         self.assertExactTypeEqual('//', token.DOUBLESLASH)
   1373         self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
   1374         self.assertExactTypeEqual('@', token.AT)
   1375         self.assertExactTypeEqual('@=', token.ATEQUAL)
   1376 
   1377         self.assertExactTypeEqual('a**2+b**2==c**2',
   1378                                   NAME, token.DOUBLESTAR, NUMBER,
   1379                                   token.PLUS,
   1380                                   NAME, token.DOUBLESTAR, NUMBER,
   1381                                   token.EQEQUAL,
   1382                                   NAME, token.DOUBLESTAR, NUMBER)
   1383         self.assertExactTypeEqual('{1, 2, 3}',
   1384                                   token.LBRACE,
   1385                                   token.NUMBER, token.COMMA,
   1386                                   token.NUMBER, token.COMMA,
   1387                                   token.NUMBER,
   1388                                   token.RBRACE)
   1389         self.assertExactTypeEqual('^(x & 0x1)',
   1390                                   token.CIRCUMFLEX,
   1391                                   token.LPAR,
   1392                                   token.NAME, token.AMPER, token.NUMBER,
   1393                                   token.RPAR)
   1394 
   1395     def test_pathological_trailing_whitespace(self):
   1396         # See http://bugs.python.org/issue16152
   1397         self.assertExactTypeEqual('@          ', token.AT)
   1398 
   1399 
   1400 class UntokenizeTest(TestCase):
   1401 
   1402     def test_bad_input_order(self):
   1403         # raise if previous row
   1404         u = Untokenizer()
   1405         u.prev_row = 2
   1406         u.prev_col = 2
   1407         with self.assertRaises(ValueError) as cm:
   1408             u.add_whitespace((1,3))
   1409         self.assertEqual(cm.exception.args[0],
   1410                 'start (1,3) precedes previous end (2,2)')
   1411         # raise if previous column in row
   1412         self.assertRaises(ValueError, u.add_whitespace, (2,1))
   1413 
   1414     def test_backslash_continuation(self):
   1415         # The problem is that <whitespace>\<newline> leaves no token
   1416         u = Untokenizer()
   1417         u.prev_row = 1
   1418         u.prev_col =  1
   1419         u.tokens = []
   1420         u.add_whitespace((2, 0))
   1421         self.assertEqual(u.tokens, ['\\\n'])
   1422         u.prev_row = 2
   1423         u.add_whitespace((4, 4))
   1424         self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
   1425         TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
   1426 
   1427     def test_iter_compat(self):
   1428         u = Untokenizer()
   1429         token = (NAME, 'Hello')
   1430         tokens = [(ENCODING, 'utf-8'), token]
   1431         u.compat(token, iter([]))
   1432         self.assertEqual(u.tokens, ["Hello "])
   1433         u = Untokenizer()
   1434         self.assertEqual(u.untokenize(iter([token])), 'Hello ')
   1435         u = Untokenizer()
   1436         self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
   1437         self.assertEqual(u.encoding, 'utf-8')
   1438         self.assertEqual(untokenize(iter(tokens)), b'Hello ')
   1439 
   1440 
   1441 class TestRoundtrip(TestCase):
   1442 
   1443     def check_roundtrip(self, f):
   1444         """
   1445         Test roundtrip for `untokenize`. `f` is an open file or a string.
   1446         The source code in f is tokenized to both 5- and 2-tuples.
   1447         Both sequences are converted back to source code via
   1448         tokenize.untokenize(), and the latter tokenized again to 2-tuples.
   1449         The test fails if the 3 pair tokenizations do not match.
   1450 
   1451         When untokenize bugs are fixed, untokenize with 5-tuples should
   1452         reproduce code that does not contain a backslash continuation
   1453         following spaces.  A proper test should test this.
   1454         """
   1455         # Get source code and original tokenizations
   1456         if isinstance(f, str):
   1457             code = f.encode('utf-8')
   1458         else:
   1459             code = f.read()
   1460             f.close()
   1461         readline = iter(code.splitlines(keepends=True)).__next__
   1462         tokens5 = list(tokenize(readline))
   1463         tokens2 = [tok[:2] for tok in tokens5]
   1464         # Reproduce tokens2 from pairs
   1465         bytes_from2 = untokenize(tokens2)
   1466         readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
   1467         tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
   1468         self.assertEqual(tokens2_from2, tokens2)
   1469         # Reproduce tokens2 from 5-tuples
   1470         bytes_from5 = untokenize(tokens5)
   1471         readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
   1472         tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
   1473         self.assertEqual(tokens2_from5, tokens2)
   1474 
   1475     def test_roundtrip(self):
   1476         # There are some standard formatting practices that are easy to get right.
   1477 
   1478         self.check_roundtrip("if x == 1:\n"
   1479                              "    print(x)\n")
   1480         self.check_roundtrip("# This is a comment\n"
   1481                              "# This also")
   1482 
   1483         # Some people use different formatting conventions, which makes
   1484         # untokenize a little trickier. Note that this test involves trailing
   1485         # whitespace after the colon. Note that we use hex escapes to make the
   1486         # two trailing blanks apparent in the expected output.
   1487 
   1488         self.check_roundtrip("if x == 1 : \n"
   1489                              "  print(x)\n")
   1490         fn = support.findfile("tokenize_tests.txt")
   1491         with open(fn, 'rb') as f:
   1492             self.check_roundtrip(f)
   1493         self.check_roundtrip("if x == 1:\n"
   1494                              "    # A comment by itself.\n"
   1495                              "    print(x) # Comment here, too.\n"
   1496                              "    # Another comment.\n"
   1497                              "after_if = True\n")
   1498         self.check_roundtrip("if (x # The comments need to go in the right place\n"
   1499                              "    == 1):\n"
   1500                              "    print('x==1')\n")
   1501         self.check_roundtrip("class Test: # A comment here\n"
   1502                              "  # A comment with weird indent\n"
   1503                              "  after_com = 5\n"
   1504                              "  def x(m): return m*5 # a one liner\n"
   1505                              "  def y(m): # A whitespace after the colon\n"
   1506                              "     return y*4 # 3-space indent\n")
   1507 
   1508         # Some error-handling code
   1509         self.check_roundtrip("try: import somemodule\n"
   1510                              "except ImportError: # comment\n"
   1511                              "    print('Can not import' # comment2\n)"
   1512                              "else:   print('Loaded')\n")
   1513 
   1514     def test_continuation(self):
   1515         # Balancing continuation
   1516         self.check_roundtrip("a = (3,4, \n"
   1517                              "5,6)\n"
   1518                              "y = [3, 4,\n"
   1519                              "5]\n"
   1520                              "z = {'a': 5,\n"
   1521                              "'b':15, 'c':True}\n"
   1522                              "x = len(y) + 5 - a[\n"
   1523                              "3] - a[2]\n"
   1524                              "+ len(z) - z[\n"
   1525                              "'b']\n")
   1526 
   1527     def test_backslash_continuation(self):
   1528         # Backslash means line continuation, except for comments
   1529         self.check_roundtrip("x=1+\\\n"
   1530                              "1\n"
   1531                              "# This is a comment\\\n"
   1532                              "# This also\n")
   1533         self.check_roundtrip("# Comment \\\n"
   1534                              "x = 0")
   1535 
   1536     def test_string_concatenation(self):
   1537         # Two string literals on the same line
   1538         self.check_roundtrip("'' ''")
   1539 
   1540     def test_random_files(self):
   1541         # Test roundtrip on random python modules.
   1542         # pass the '-ucpu' option to process the full directory.
   1543 
   1544         import glob, random
   1545         fn = support.findfile("tokenize_tests.txt")
   1546         tempdir = os.path.dirname(fn) or os.curdir
   1547         testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
   1548 
   1549         # Tokenize is broken on test_pep3131.py because regular expressions are
   1550         # broken on the obscure unicode identifiers in it. *sigh*
   1551         # With roundtrip extended to test the 5-tuple mode of untokenize,
   1552         # 7 more testfiles fail.  Remove them also until the failure is diagnosed.
   1553 
   1554         testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
   1555         for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
   1556             testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
   1557 
   1558         if not support.is_resource_enabled("cpu"):
   1559             testfiles = random.sample(testfiles, 10)
   1560 
   1561         for testfile in testfiles:
   1562             with open(testfile, 'rb') as f:
   1563                 with self.subTest(file=testfile):
   1564                     self.check_roundtrip(f)
   1565 
   1566 
   1567     def roundtrip(self, code):
   1568         if isinstance(code, str):
   1569             code = code.encode('utf-8')
   1570         return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
   1571 
   1572     def test_indentation_semantics_retained(self):
   1573         """
   1574         Ensure that although whitespace might be mutated in a roundtrip,
   1575         the semantic meaning of the indentation remains consistent.
   1576         """
   1577         code = "if False:\n\tx=3\n\tx=3\n"
   1578         codelines = self.roundtrip(code).split('\n')
   1579         self.assertEqual(codelines[1], codelines[2])
   1580         self.check_roundtrip(code)
   1581 
   1582 
   1583 if __name__ == "__main__":
   1584     unittest.main()
   1585