Home | History | Annotate | Download | only in test
      1 from test import test_support
      2 from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
      3                      STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
      4 from StringIO import StringIO
      5 import os
      6 from unittest import TestCase
      7 
      8 
      9 class TokenizeTest(TestCase):
     10     # Tests for the tokenize module.
     11 
     12     # The tests can be really simple. Given a small fragment of source
     13     # code, print out a table with tokens. The ENDMARKER is omitted for
     14     # brevity.
     15 
     16     def check_tokenize(self, s, expected):
     17         # Format the tokens in s in a table format.
     18         # The ENDMARKER is omitted.
     19         result = []
     20         f = StringIO(s)
     21         for type, token, start, end, line in generate_tokens(f.readline):
     22             if type == ENDMARKER:
     23                 break
     24             type = tok_name[type]
     25             result.append("    %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
     26                           locals())
     27         self.assertEqual(result,
     28                          expected.rstrip().splitlines())
     29 
     30 
     31     def test_basic(self):
     32         self.check_tokenize("1 + 1", """\
     33     NUMBER     '1'           (1, 0) (1, 1)
     34     OP         '+'           (1, 2) (1, 3)
     35     NUMBER     '1'           (1, 4) (1, 5)
     36     """)
     37         self.check_tokenize("if False:\n"
     38                             "    # NL\n"
     39                             "    True = False # NEWLINE\n", """\
     40     NAME       'if'          (1, 0) (1, 2)
     41     NAME       'False'       (1, 3) (1, 8)
     42     OP         ':'           (1, 8) (1, 9)
     43     NEWLINE    '\\n'          (1, 9) (1, 10)
     44     COMMENT    '# NL'        (2, 4) (2, 8)
     45     NL         '\\n'          (2, 8) (2, 9)
     46     INDENT     '    '        (3, 0) (3, 4)
     47     NAME       'True'        (3, 4) (3, 8)
     48     OP         '='           (3, 9) (3, 10)
     49     NAME       'False'       (3, 11) (3, 16)
     50     COMMENT    '# NEWLINE'   (3, 17) (3, 26)
     51     NEWLINE    '\\n'          (3, 26) (3, 27)
     52     DEDENT     ''            (4, 0) (4, 0)
     53     """)
     54 
     55         indent_error_file = """\
     56 def k(x):
     57     x += 2
     58   x += 5
     59 """
     60         with self.assertRaisesRegexp(IndentationError,
     61                                      "unindent does not match any "
     62                                      "outer indentation level"):
     63             for tok in generate_tokens(StringIO(indent_error_file).readline):
     64                 pass
     65 
     66     def test_int(self):
     67         # Ordinary integers and binary operators
     68         self.check_tokenize("0xff <= 255", """\
     69     NUMBER     '0xff'        (1, 0) (1, 4)
     70     OP         '<='          (1, 5) (1, 7)
     71     NUMBER     '255'         (1, 8) (1, 11)
     72     """)
     73         self.check_tokenize("0b10 <= 255", """\
     74     NUMBER     '0b10'        (1, 0) (1, 4)
     75     OP         '<='          (1, 5) (1, 7)
     76     NUMBER     '255'         (1, 8) (1, 11)
     77     """)
     78         self.check_tokenize("0o123 <= 0123", """\
     79     NUMBER     '0o123'       (1, 0) (1, 5)
     80     OP         '<='          (1, 6) (1, 8)
     81     NUMBER     '0123'        (1, 9) (1, 13)
     82     """)
     83         self.check_tokenize("01234567 > ~0x15", """\
     84     NUMBER     '01234567'    (1, 0) (1, 8)
     85     OP         '>'           (1, 9) (1, 10)
     86     OP         '~'           (1, 11) (1, 12)
     87     NUMBER     '0x15'        (1, 12) (1, 16)
     88     """)
     89         self.check_tokenize("2134568 != 01231515", """\
     90     NUMBER     '2134568'     (1, 0) (1, 7)
     91     OP         '!='          (1, 8) (1, 10)
     92     NUMBER     '01231515'    (1, 11) (1, 19)
     93     """)
     94         self.check_tokenize("(-124561-1) & 0200000000", """\
     95     OP         '('           (1, 0) (1, 1)
     96     OP         '-'           (1, 1) (1, 2)
     97     NUMBER     '124561'      (1, 2) (1, 8)
     98     OP         '-'           (1, 8) (1, 9)
     99     NUMBER     '1'           (1, 9) (1, 10)
    100     OP         ')'           (1, 10) (1, 11)
    101     OP         '&'           (1, 12) (1, 13)
    102     NUMBER     '0200000000'  (1, 14) (1, 24)
    103     """)
    104         self.check_tokenize("0xdeadbeef != -1", """\
    105     NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
    106     OP         '!='          (1, 11) (1, 13)
    107     OP         '-'           (1, 14) (1, 15)
    108     NUMBER     '1'           (1, 15) (1, 16)
    109     """)
    110         self.check_tokenize("0xdeadc0de & 012345", """\
    111     NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
    112     OP         '&'           (1, 11) (1, 12)
    113     NUMBER     '012345'      (1, 13) (1, 19)
    114     """)
    115         self.check_tokenize("0xFF & 0x15 | 1234", """\
    116     NUMBER     '0xFF'        (1, 0) (1, 4)
    117     OP         '&'           (1, 5) (1, 6)
    118     NUMBER     '0x15'        (1, 7) (1, 11)
    119     OP         '|'           (1, 12) (1, 13)
    120     NUMBER     '1234'        (1, 14) (1, 18)
    121     """)
    122 
    123     def test_long(self):
    124         # Long integers
    125         self.check_tokenize("x = 0L", """\
    126     NAME       'x'           (1, 0) (1, 1)
    127     OP         '='           (1, 2) (1, 3)
    128     NUMBER     '0L'          (1, 4) (1, 6)
    129     """)
    130         self.check_tokenize("x = 0xfffffffffff", """\
    131     NAME       'x'           (1, 0) (1, 1)
    132     OP         '='           (1, 2) (1, 3)
    133     NUMBER     '0xffffffffff (1, 4) (1, 17)
    134     """)
    135         self.check_tokenize("x = 123141242151251616110l", """\
    136     NAME       'x'           (1, 0) (1, 1)
    137     OP         '='           (1, 2) (1, 3)
    138     NUMBER     '123141242151 (1, 4) (1, 26)
    139     """)
    140         self.check_tokenize("x = -15921590215012591L", """\
    141     NAME       'x'           (1, 0) (1, 1)
    142     OP         '='           (1, 2) (1, 3)
    143     OP         '-'           (1, 4) (1, 5)
    144     NUMBER     '159215902150 (1, 5) (1, 23)
    145     """)
    146 
    147     def test_float(self):
    148         # Floating point numbers
    149         self.check_tokenize("x = 3.14159", """\
    150     NAME       'x'           (1, 0) (1, 1)
    151     OP         '='           (1, 2) (1, 3)
    152     NUMBER     '3.14159'     (1, 4) (1, 11)
    153     """)
    154         self.check_tokenize("x = 314159.", """\
    155     NAME       'x'           (1, 0) (1, 1)
    156     OP         '='           (1, 2) (1, 3)
    157     NUMBER     '314159.'     (1, 4) (1, 11)
    158     """)
    159         self.check_tokenize("x = .314159", """\
    160     NAME       'x'           (1, 0) (1, 1)
    161     OP         '='           (1, 2) (1, 3)
    162     NUMBER     '.314159'     (1, 4) (1, 11)
    163     """)
    164         self.check_tokenize("x = 3e14159", """\
    165     NAME       'x'           (1, 0) (1, 1)
    166     OP         '='           (1, 2) (1, 3)
    167     NUMBER     '3e14159'     (1, 4) (1, 11)
    168     """)
    169         self.check_tokenize("x = 3E123", """\
    170     NAME       'x'           (1, 0) (1, 1)
    171     OP         '='           (1, 2) (1, 3)
    172     NUMBER     '3E123'       (1, 4) (1, 9)
    173     """)
    174         self.check_tokenize("x+y = 3e-1230", """\
    175     NAME       'x'           (1, 0) (1, 1)
    176     OP         '+'           (1, 1) (1, 2)
    177     NAME       'y'           (1, 2) (1, 3)
    178     OP         '='           (1, 4) (1, 5)
    179     NUMBER     '3e-1230'     (1, 6) (1, 13)
    180     """)
    181         self.check_tokenize("x = 3.14e159", """\
    182     NAME       'x'           (1, 0) (1, 1)
    183     OP         '='           (1, 2) (1, 3)
    184     NUMBER     '3.14e159'    (1, 4) (1, 12)
    185     """)
    186 
    187     def test_string(self):
    188         # String literals
    189         self.check_tokenize("x = ''; y = \"\"", """\
    190     NAME       'x'           (1, 0) (1, 1)
    191     OP         '='           (1, 2) (1, 3)
    192     STRING     "''"          (1, 4) (1, 6)
    193     OP         ';'           (1, 6) (1, 7)
    194     NAME       'y'           (1, 8) (1, 9)
    195     OP         '='           (1, 10) (1, 11)
    196     STRING     '""'          (1, 12) (1, 14)
    197     """)
    198         self.check_tokenize("x = '\"'; y = \"'\"", """\
    199     NAME       'x'           (1, 0) (1, 1)
    200     OP         '='           (1, 2) (1, 3)
    201     STRING     '\\'"\\''       (1, 4) (1, 7)
    202     OP         ';'           (1, 7) (1, 8)
    203     NAME       'y'           (1, 9) (1, 10)
    204     OP         '='           (1, 11) (1, 12)
    205     STRING     '"\\'"'        (1, 13) (1, 16)
    206     """)
    207         self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
    208     NAME       'x'           (1, 0) (1, 1)
    209     OP         '='           (1, 2) (1, 3)
    210     STRING     '"doesn\\'t "' (1, 4) (1, 14)
    211     NAME       'shrink'      (1, 14) (1, 20)
    212     STRING     '", does it"' (1, 20) (1, 31)
    213     """)
    214         self.check_tokenize("x = u'abc' + U'ABC'", """\
    215     NAME       'x'           (1, 0) (1, 1)
    216     OP         '='           (1, 2) (1, 3)
    217     STRING     "u'abc'"      (1, 4) (1, 10)
    218     OP         '+'           (1, 11) (1, 12)
    219     STRING     "U'ABC'"      (1, 13) (1, 19)
    220     """)
    221         self.check_tokenize('y = u"ABC" + U"ABC"', """\
    222     NAME       'y'           (1, 0) (1, 1)
    223     OP         '='           (1, 2) (1, 3)
    224     STRING     'u"ABC"'      (1, 4) (1, 10)
    225     OP         '+'           (1, 11) (1, 12)
    226     STRING     'U"ABC"'      (1, 13) (1, 19)
    227     """)
    228         self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\
    229     NAME       'x'           (1, 0) (1, 1)
    230     OP         '='           (1, 2) (1, 3)
    231     STRING     "ur'abc'"     (1, 4) (1, 11)
    232     OP         '+'           (1, 12) (1, 13)
    233     STRING     "Ur'ABC'"     (1, 14) (1, 21)
    234     OP         '+'           (1, 22) (1, 23)
    235     STRING     "uR'ABC'"     (1, 24) (1, 31)
    236     OP         '+'           (1, 32) (1, 33)
    237     STRING     "UR'ABC'"     (1, 34) (1, 41)
    238     """)
    239         self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\
    240     NAME       'y'           (1, 0) (1, 1)
    241     OP         '='           (1, 2) (1, 3)
    242     STRING     'ur"abc"'     (1, 4) (1, 11)
    243     OP         '+'           (1, 12) (1, 13)
    244     STRING     'Ur"ABC"'     (1, 14) (1, 21)
    245     OP         '+'           (1, 22) (1, 23)
    246     STRING     'uR"ABC"'     (1, 24) (1, 31)
    247     OP         '+'           (1, 32) (1, 33)
    248     STRING     'UR"ABC"'     (1, 34) (1, 41)
    249 
    250     """)
    251         self.check_tokenize("b'abc' + B'abc'", """\
    252     STRING     "b'abc'"      (1, 0) (1, 6)
    253     OP         '+'           (1, 7) (1, 8)
    254     STRING     "B'abc'"      (1, 9) (1, 15)
    255     """)
    256         self.check_tokenize('b"abc" + B"abc"', """\
    257     STRING     'b"abc"'      (1, 0) (1, 6)
    258     OP         '+'           (1, 7) (1, 8)
    259     STRING     'B"abc"'      (1, 9) (1, 15)
    260     """)
    261         self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
    262     STRING     "br'abc'"     (1, 0) (1, 7)
    263     OP         '+'           (1, 8) (1, 9)
    264     STRING     "bR'abc'"     (1, 10) (1, 17)
    265     OP         '+'           (1, 18) (1, 19)
    266     STRING     "Br'abc'"     (1, 20) (1, 27)
    267     OP         '+'           (1, 28) (1, 29)
    268     STRING     "BR'abc'"     (1, 30) (1, 37)
    269     """)
    270         self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
    271     STRING     'br"abc"'     (1, 0) (1, 7)
    272     OP         '+'           (1, 8) (1, 9)
    273     STRING     'bR"abc"'     (1, 10) (1, 17)
    274     OP         '+'           (1, 18) (1, 19)
    275     STRING     'Br"abc"'     (1, 20) (1, 27)
    276     OP         '+'           (1, 28) (1, 29)
    277     STRING     'BR"abc"'     (1, 30) (1, 37)
    278     """)
    279 
    280     def test_function(self):
    281         self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
    282     NAME       'def'         (1, 0) (1, 3)
    283     NAME       'd22'         (1, 4) (1, 7)
    284     OP         '('           (1, 7) (1, 8)
    285     NAME       'a'           (1, 8) (1, 9)
    286     OP         ','           (1, 9) (1, 10)
    287     NAME       'b'           (1, 11) (1, 12)
    288     OP         ','           (1, 12) (1, 13)
    289     NAME       'c'           (1, 14) (1, 15)
    290     OP         '='           (1, 15) (1, 16)
    291     NUMBER     '2'           (1, 16) (1, 17)
    292     OP         ','           (1, 17) (1, 18)
    293     NAME       'd'           (1, 19) (1, 20)
    294     OP         '='           (1, 20) (1, 21)
    295     NUMBER     '2'           (1, 21) (1, 22)
    296     OP         ','           (1, 22) (1, 23)
    297     OP         '*'           (1, 24) (1, 25)
    298     NAME       'k'           (1, 25) (1, 26)
    299     OP         ')'           (1, 26) (1, 27)
    300     OP         ':'           (1, 27) (1, 28)
    301     NAME       'pass'        (1, 29) (1, 33)
    302     """)
    303         self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
    304     NAME       'def'         (1, 0) (1, 3)
    305     NAME       'd01v_'       (1, 4) (1, 9)
    306     OP         '('           (1, 9) (1, 10)
    307     NAME       'a'           (1, 10) (1, 11)
    308     OP         '='           (1, 11) (1, 12)
    309     NUMBER     '1'           (1, 12) (1, 13)
    310     OP         ','           (1, 13) (1, 14)
    311     OP         '*'           (1, 15) (1, 16)
    312     NAME       'k'           (1, 16) (1, 17)
    313     OP         ','           (1, 17) (1, 18)
    314     OP         '**'          (1, 19) (1, 21)
    315     NAME       'w'           (1, 21) (1, 22)
    316     OP         ')'           (1, 22) (1, 23)
    317     OP         ':'           (1, 23) (1, 24)
    318     NAME       'pass'        (1, 25) (1, 29)
    319     """)
    320 
    321     def test_comparison(self):
    322         # Comparison
    323         self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
    324                             "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
    325     NAME       'if'          (1, 0) (1, 2)
    326     NUMBER     '1'           (1, 3) (1, 4)
    327     OP         '<'           (1, 5) (1, 6)
    328     NUMBER     '1'           (1, 7) (1, 8)
    329     OP         '>'           (1, 9) (1, 10)
    330     NUMBER     '1'           (1, 11) (1, 12)
    331     OP         '=='          (1, 13) (1, 15)
    332     NUMBER     '1'           (1, 16) (1, 17)
    333     OP         '>='          (1, 18) (1, 20)
    334     NUMBER     '5'           (1, 21) (1, 22)
    335     OP         '<='          (1, 23) (1, 25)
    336     NUMBER     '0x15'        (1, 26) (1, 30)
    337     OP         '<='          (1, 31) (1, 33)
    338     NUMBER     '0x12'        (1, 34) (1, 38)
    339     OP         '!='          (1, 39) (1, 41)
    340     NUMBER     '1'           (1, 42) (1, 43)
    341     NAME       'and'         (1, 44) (1, 47)
    342     NUMBER     '5'           (1, 48) (1, 49)
    343     NAME       'in'          (1, 50) (1, 52)
    344     NUMBER     '1'           (1, 53) (1, 54)
    345     NAME       'not'         (1, 55) (1, 58)
    346     NAME       'in'          (1, 59) (1, 61)
    347     NUMBER     '1'           (1, 62) (1, 63)
    348     NAME       'is'          (1, 64) (1, 66)
    349     NUMBER     '1'           (1, 67) (1, 68)
    350     NAME       'or'          (1, 69) (1, 71)
    351     NUMBER     '5'           (1, 72) (1, 73)
    352     NAME       'is'          (1, 74) (1, 76)
    353     NAME       'not'         (1, 77) (1, 80)
    354     NUMBER     '1'           (1, 81) (1, 82)
    355     OP         ':'           (1, 82) (1, 83)
    356     NAME       'pass'        (1, 84) (1, 88)
    357     """)
    358 
    359     def test_shift(self):
    360         # Shift
    361         self.check_tokenize("x = 1 << 1 >> 5", """\
    362     NAME       'x'           (1, 0) (1, 1)
    363     OP         '='           (1, 2) (1, 3)
    364     NUMBER     '1'           (1, 4) (1, 5)
    365     OP         '<<'          (1, 6) (1, 8)
    366     NUMBER     '1'           (1, 9) (1, 10)
    367     OP         '>>'          (1, 11) (1, 13)
    368     NUMBER     '5'           (1, 14) (1, 15)
    369     """)
    370 
    371     def test_additive(self):
    372         # Additive
    373         self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\
    374     NAME       'x'           (1, 0) (1, 1)
    375     OP         '='           (1, 2) (1, 3)
    376     NUMBER     '1'           (1, 4) (1, 5)
    377     OP         '-'           (1, 6) (1, 7)
    378     NAME       'y'           (1, 8) (1, 9)
    379     OP         '+'           (1, 10) (1, 11)
    380     NUMBER     '15'          (1, 12) (1, 14)
    381     OP         '-'           (1, 15) (1, 16)
    382     NUMBER     '01'          (1, 17) (1, 19)
    383     OP         '+'           (1, 20) (1, 21)
    384     NUMBER     '0x124'       (1, 22) (1, 27)
    385     OP         '+'           (1, 28) (1, 29)
    386     NAME       'z'           (1, 30) (1, 31)
    387     OP         '+'           (1, 32) (1, 33)
    388     NAME       'a'           (1, 34) (1, 35)
    389     OP         '['           (1, 35) (1, 36)
    390     NUMBER     '5'           (1, 36) (1, 37)
    391     OP         ']'           (1, 37) (1, 38)
    392     """)
    393 
    394     def test_multiplicative(self):
    395         # Multiplicative
    396         self.check_tokenize("x = 1//1*1/5*12%0x12", """\
    397     NAME       'x'           (1, 0) (1, 1)
    398     OP         '='           (1, 2) (1, 3)
    399     NUMBER     '1'           (1, 4) (1, 5)
    400     OP         '//'          (1, 5) (1, 7)
    401     NUMBER     '1'           (1, 7) (1, 8)
    402     OP         '*'           (1, 8) (1, 9)
    403     NUMBER     '1'           (1, 9) (1, 10)
    404     OP         '/'           (1, 10) (1, 11)
    405     NUMBER     '5'           (1, 11) (1, 12)
    406     OP         '*'           (1, 12) (1, 13)
    407     NUMBER     '12'          (1, 13) (1, 15)
    408     OP         '%'           (1, 15) (1, 16)
    409     NUMBER     '0x12'        (1, 16) (1, 20)
    410     """)
    411 
    412     def test_unary(self):
    413         # Unary
    414         self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
    415     OP         '~'           (1, 0) (1, 1)
    416     NUMBER     '1'           (1, 1) (1, 2)
    417     OP         '^'           (1, 3) (1, 4)
    418     NUMBER     '1'           (1, 5) (1, 6)
    419     OP         '&'           (1, 7) (1, 8)
    420     NUMBER     '1'           (1, 9) (1, 10)
    421     OP         '|'           (1, 11) (1, 12)
    422     NUMBER     '1'           (1, 12) (1, 13)
    423     OP         '^'           (1, 14) (1, 15)
    424     OP         '-'           (1, 16) (1, 17)
    425     NUMBER     '1'           (1, 17) (1, 18)
    426     """)
    427         self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
    428     OP         '-'           (1, 0) (1, 1)
    429     NUMBER     '1'           (1, 1) (1, 2)
    430     OP         '*'           (1, 2) (1, 3)
    431     NUMBER     '1'           (1, 3) (1, 4)
    432     OP         '/'           (1, 4) (1, 5)
    433     NUMBER     '1'           (1, 5) (1, 6)
    434     OP         '+'           (1, 6) (1, 7)
    435     NUMBER     '1'           (1, 7) (1, 8)
    436     OP         '*'           (1, 8) (1, 9)
    437     NUMBER     '1'           (1, 9) (1, 10)
    438     OP         '//'          (1, 10) (1, 12)
    439     NUMBER     '1'           (1, 12) (1, 13)
    440     OP         '-'           (1, 14) (1, 15)
    441     OP         '-'           (1, 16) (1, 17)
    442     OP         '-'           (1, 17) (1, 18)
    443     OP         '-'           (1, 18) (1, 19)
    444     NUMBER     '1'           (1, 19) (1, 20)
    445     OP         '**'          (1, 20) (1, 22)
    446     NUMBER     '1'           (1, 22) (1, 23)
    447     """)
    448 
    449     def test_selector(self):
    450         # Selector
    451         self.check_tokenize("import sys, time\n"
    452                             "x = sys.modules['time'].time()", """\
    453     NAME       'import'      (1, 0) (1, 6)
    454     NAME       'sys'         (1, 7) (1, 10)
    455     OP         ','           (1, 10) (1, 11)
    456     NAME       'time'        (1, 12) (1, 16)
    457     NEWLINE    '\\n'          (1, 16) (1, 17)
    458     NAME       'x'           (2, 0) (2, 1)
    459     OP         '='           (2, 2) (2, 3)
    460     NAME       'sys'         (2, 4) (2, 7)
    461     OP         '.'           (2, 7) (2, 8)
    462     NAME       'modules'     (2, 8) (2, 15)
    463     OP         '['           (2, 15) (2, 16)
    464     STRING     "'time'"      (2, 16) (2, 22)
    465     OP         ']'           (2, 22) (2, 23)
    466     OP         '.'           (2, 23) (2, 24)
    467     NAME       'time'        (2, 24) (2, 28)
    468     OP         '('           (2, 28) (2, 29)
    469     OP         ')'           (2, 29) (2, 30)
    470     """)
    471 
    472     def test_method(self):
    473         # Methods
    474         self.check_tokenize("@staticmethod\n"
    475                             "def foo(x,y): pass", """\
    476     OP         '@'           (1, 0) (1, 1)
    477     NAME       'staticmethod (1, 1) (1, 13)
    478     NEWLINE    '\\n'          (1, 13) (1, 14)
    479     NAME       'def'         (2, 0) (2, 3)
    480     NAME       'foo'         (2, 4) (2, 7)
    481     OP         '('           (2, 7) (2, 8)
    482     NAME       'x'           (2, 8) (2, 9)
    483     OP         ','           (2, 9) (2, 10)
    484     NAME       'y'           (2, 10) (2, 11)
    485     OP         ')'           (2, 11) (2, 12)
    486     OP         ':'           (2, 12) (2, 13)
    487     NAME       'pass'        (2, 14) (2, 18)
    488     """)
    489 
    490     def test_tabs(self):
    491         # Evil tabs
    492         self.check_tokenize("def f():\n"
    493                             "\tif x\n"
    494                             "        \tpass", """\
    495     NAME       'def'         (1, 0) (1, 3)
    496     NAME       'f'           (1, 4) (1, 5)
    497     OP         '('           (1, 5) (1, 6)
    498     OP         ')'           (1, 6) (1, 7)
    499     OP         ':'           (1, 7) (1, 8)
    500     NEWLINE    '\\n'          (1, 8) (1, 9)
    501     INDENT     '\\t'          (2, 0) (2, 1)
    502     NAME       'if'          (2, 1) (2, 3)
    503     NAME       'x'           (2, 4) (2, 5)
    504     NEWLINE    '\\n'          (2, 5) (2, 6)
    505     INDENT     '        \\t'  (3, 0) (3, 9)
    506     NAME       'pass'        (3, 9) (3, 13)
    507     DEDENT     ''            (4, 0) (4, 0)
    508     DEDENT     ''            (4, 0) (4, 0)
    509     """)
    510 
    511     def test_pathological_trailing_whitespace(self):
    512         # Pathological whitespace (http://bugs.python.org/issue16152)
    513         self.check_tokenize("@          ", """\
    514     OP         '@'           (1, 0) (1, 1)
    515     """)
    516 
    517 
    518 def decistmt(s):
    519     result = []
    520     g = generate_tokens(StringIO(s).readline)   # tokenize the string
    521     for toknum, tokval, _, _, _  in g:
    522         if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
    523             result.extend([
    524                 (NAME, 'Decimal'),
    525                 (OP, '('),
    526                 (STRING, repr(tokval)),
    527                 (OP, ')')
    528             ])
    529         else:
    530             result.append((toknum, tokval))
    531     return untokenize(result)
    532 
    533 class TestMisc(TestCase):
    534 
    535     def test_decistmt(self):
    536         # Substitute Decimals for floats in a string of statements.
    537         # This is an example from the docs.
    538 
    539         from decimal import Decimal
    540         s = '+21.3e-5*-.1234/81.7'
    541         self.assertEqual(decistmt(s),
    542                          "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
    543 
    544         # The format of the exponent is inherited from the platform C library.
    545         # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    546         # we're only showing 12 digits, and the 13th isn't close to 5, the
    547         # rest of the output should be platform-independent.
    548         self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7')
    549 
    550         # Output from calculations with Decimal should be identical across all
    551         # platforms.
    552         self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7'))
    553 
    554 
    555 class UntokenizeTest(TestCase):
    556 
    557     def test_bad_input_order(self):
    558         # raise if previous row
    559         u = Untokenizer()
    560         u.prev_row = 2
    561         u.prev_col = 2
    562         with self.assertRaises(ValueError) as cm:
    563             u.add_whitespace((1,3))
    564         self.assertEqual(cm.exception.args[0],
    565                 'start (1,3) precedes previous end (2,2)')
    566         # raise if previous column in row
    567         self.assertRaises(ValueError, u.add_whitespace, (2,1))
    568 
    569     def test_backslash_continuation(self):
    570         # The problem is that <whitespace>\<newline> leaves no token
    571         u = Untokenizer()
    572         u.prev_row = 1
    573         u.prev_col =  1
    574         u.tokens = []
    575         u.add_whitespace((2, 0))
    576         self.assertEqual(u.tokens, ['\\\n'])
    577         u.prev_row = 2
    578         u.add_whitespace((4, 4))
    579         self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
    580 
    581     def test_iter_compat(self):
    582         u = Untokenizer()
    583         token = (NAME, 'Hello')
    584         u.compat(token, iter([]))
    585         self.assertEqual(u.tokens, ["Hello "])
    586         u = Untokenizer()
    587         self.assertEqual(u.untokenize(iter([token])), 'Hello ')
    588 
    589 
    590 class TestRoundtrip(TestCase):
    591 
    592     def check_roundtrip(self, f):
    593         """
    594         Test roundtrip for `untokenize`. `f` is an open file or a string.
    595         The source code in f is tokenized, converted back to source code
    596         via tokenize.untokenize(), and tokenized again from the latter.
    597         The test fails if the second tokenization doesn't match the first.
    598         """
    599         if isinstance(f, str): f = StringIO(f)
    600         token_list = list(generate_tokens(f.readline))
    601         f.close()
    602         tokens1 = [tok[:2] for tok in token_list]
    603         new_text = untokenize(tokens1)
    604         readline = iter(new_text.splitlines(1)).next
    605         tokens2 = [tok[:2] for tok in generate_tokens(readline)]
    606         self.assertEqual(tokens2, tokens1)
    607 
    608     def test_roundtrip(self):
    609         # There are some standard formatting practices that are easy to get right.
    610 
    611         self.check_roundtrip("if x == 1:\n"
    612                              "    print(x)\n")
    613 
    614         # There are some standard formatting practices that are easy to get right.
    615 
    616         self.check_roundtrip("if x == 1:\n"
    617                              "    print x\n")
    618         self.check_roundtrip("# This is a comment\n"
    619                              "# This also")
    620 
    621         # Some people use different formatting conventions, which makes
    622         # untokenize a little trickier. Note that this test involves trailing
    623         # whitespace after the colon. Note that we use hex escapes to make the
    624         # two trailing blanks apperant in the expected output.
    625 
    626         self.check_roundtrip("if x == 1 : \n"
    627                              "  print x\n")
    628         fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
    629         with open(fn) as f:
    630             self.check_roundtrip(f)
    631         self.check_roundtrip("if x == 1:\n"
    632                              "    # A comment by itself.\n"
    633                              "    print x # Comment here, too.\n"
    634                              "    # Another comment.\n"
    635                              "after_if = True\n")
    636         self.check_roundtrip("if (x # The comments need to go in the right place\n"
    637                              "    == 1):\n"
    638                              "    print 'x==1'\n")
    639         self.check_roundtrip("class Test: # A comment here\n"
    640                              "  # A comment with weird indent\n"
    641                              "  after_com = 5\n"
    642                              "  def x(m): return m*5 # a one liner\n"
    643                              "  def y(m): # A whitespace after the colon\n"
    644                              "     return y*4 # 3-space indent\n")
    645 
    646         # Some error-handling code
    647 
    648         self.check_roundtrip("try: import somemodule\n"
    649                              "except ImportError: # comment\n"
    650                              "    print 'Can not import' # comment2\n"
    651                              "else:   print 'Loaded'\n")
    652 
    653     def test_continuation(self):
    654         # Balancing continuation
    655         self.check_roundtrip("a = (3,4, \n"
    656                              "5,6)\n"
    657                              "y = [3, 4,\n"
    658                              "5]\n"
    659                              "z = {'a': 5,\n"
    660                              "'b':15, 'c':True}\n"
    661                              "x = len(y) + 5 - a[\n"
    662                              "3] - a[2]\n"
    663                              "+ len(z) - z[\n"
    664                              "'b']\n")
    665 
    666     def test_backslash_continuation(self):
    667         # Backslash means line continuation, except for comments
    668         self.check_roundtrip("x=1+\\\n"
    669                              "1\n"
    670                              "# This is a comment\\\n"
    671                              "# This also\n")
    672         self.check_roundtrip("# Comment \\\n"
    673                              "x = 0")
    674 
    675     def test_string_concatenation(self):
    676         # Two string literals on the same line
    677         self.check_roundtrip("'' ''")
    678 
    679     def test_random_files(self):
    680         # Test roundtrip on random python modules.
    681         # pass the '-ucpu' option to process the full directory.
    682 
    683         import glob, random
    684         fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
    685         tempdir = os.path.dirname(fn) or os.curdir
    686         testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
    687 
    688         if not test_support.is_resource_enabled("cpu"):
    689             testfiles = random.sample(testfiles, 10)
    690 
    691         for testfile in testfiles:
    692             try:
    693                 with open(testfile, 'rb') as f:
    694                     self.check_roundtrip(f)
    695             except:
    696                 print "Roundtrip failed for file %s" % testfile
    697                 raise
    698 
    699 
    700     def roundtrip(self, code):
    701         if isinstance(code, str):
    702             code = code.encode('utf-8')
    703         tokens = generate_tokens(StringIO(code).readline)
    704         return untokenize(tokens).decode('utf-8')
    705 
    706     def test_indentation_semantics_retained(self):
    707         """
    708         Ensure that although whitespace might be mutated in a roundtrip,
    709         the semantic meaning of the indentation remains consistent.
    710         """
    711         code = "if False:\n\tx=3\n\tx=3\n"
    712         codelines = self.roundtrip(code).split('\n')
    713         self.assertEqual(codelines[1], codelines[2])
    714 
    715 
    716 def test_main():
    717     test_support.run_unittest(TokenizeTest)
    718     test_support.run_unittest(UntokenizeTest)
    719     test_support.run_unittest(TestRoundtrip)
    720     test_support.run_unittest(TestMisc)
    721 
    722 if __name__ == "__main__":
    723     test_main()
    724