Home | History | Annotate | Download | only in test
      1 doctests = """
      2 Tests for the tokenize module.
      3 
      4     >>> import glob, random, sys
      5 
      6 The tests can be really simple. Given a small fragment of source
      7 code, print out a table with tokens. The ENDMARK is omitted for
      8 brevity.
      9 
     10     >>> dump_tokens("1 + 1")
     11     NUMBER     '1'           (1, 0) (1, 1)
     12     OP         '+'           (1, 2) (1, 3)
     13     NUMBER     '1'           (1, 4) (1, 5)
     14 
     15     >>> dump_tokens("if False:\\n"
     16     ...             "    # NL\\n"
     17     ...             "    True = False # NEWLINE\\n")
     18     NAME       'if'          (1, 0) (1, 2)
     19     NAME       'False'       (1, 3) (1, 8)
     20     OP         ':'           (1, 8) (1, 9)
     21     NEWLINE    '\\n'          (1, 9) (1, 10)
     22     COMMENT    '# NL'        (2, 4) (2, 8)
     23     NL         '\\n'          (2, 8) (2, 9)
     24     INDENT     '    '        (3, 0) (3, 4)
     25     NAME       'True'        (3, 4) (3, 8)
     26     OP         '='           (3, 9) (3, 10)
     27     NAME       'False'       (3, 11) (3, 16)
     28     COMMENT    '# NEWLINE'   (3, 17) (3, 26)
     29     NEWLINE    '\\n'          (3, 26) (3, 27)
     30     DEDENT     ''            (4, 0) (4, 0)
     31 
     32     >>> indent_error_file = \"""
     33     ... def k(x):
     34     ...     x += 2
     35     ...   x += 5
     36     ... \"""
     37 
     38     >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
     39     Traceback (most recent call last):
     40         ...
     41     IndentationError: unindent does not match any outer indentation level
     42 
     43 Test roundtrip for `untokenize`. `f` is an open file or a string. The source
     44 code in f is tokenized, converted back to source code via tokenize.untokenize(),
     45 and tokenized again from the latter. The test fails if the second tokenization
     46 doesn't match the first.
     47 
     48     >>> def roundtrip(f):
     49     ...     if isinstance(f, str): f = StringIO(f)
     50     ...     token_list = list(generate_tokens(f.readline))
     51     ...     f.close()
     52     ...     tokens1 = [tok[:2] for tok in token_list]
     53     ...     new_text = untokenize(tokens1)
     54     ...     readline = iter(new_text.splitlines(1)).next
     55     ...     tokens2 = [tok[:2] for tok in generate_tokens(readline)]
     56     ...     return tokens1 == tokens2
     57     ...
     58 
     59 There are some standard formatting practices that are easy to get right.
     60 
     61     >>> roundtrip("if x == 1:\\n"
     62     ...           "    print x\\n")
     63     True
     64 
     65     >>> roundtrip("# This is a comment\\n# This also")
     66     True
     67 
     68 Some people use different formatting conventions, which makes
     69 untokenize a little trickier. Note that this test involves trailing
     70 whitespace after the colon. Note that we use hex escapes to make the
     71 two trailing blanks apperant in the expected output.
     72 
     73     >>> roundtrip("if x == 1 : \\n"
     74     ...           "  print x\\n")
     75     True
     76 
     77     >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
     78     >>> roundtrip(open(f))
     79     True
     80 
     81     >>> roundtrip("if x == 1:\\n"
     82     ...           "    # A comment by itself.\\n"
     83     ...           "    print x # Comment here, too.\\n"
     84     ...           "    # Another comment.\\n"
     85     ...           "after_if = True\\n")
     86     True
     87 
     88     >>> roundtrip("if (x # The comments need to go in the right place\\n"
     89     ...           "    == 1):\\n"
     90     ...           "    print 'x==1'\\n")
     91     True
     92 
     93     >>> roundtrip("class Test: # A comment here\\n"
     94     ...           "  # A comment with weird indent\\n"
     95     ...           "  after_com = 5\\n"
     96     ...           "  def x(m): return m*5 # a one liner\\n"
     97     ...           "  def y(m): # A whitespace after the colon\\n"
     98     ...           "     return y*4 # 3-space indent\\n")
     99     True
    100 
    101 Some error-handling code
    102 
    103     >>> roundtrip("try: import somemodule\\n"
    104     ...           "except ImportError: # comment\\n"
    105     ...           "    print 'Can not import' # comment2\\n"
    106     ...           "else:   print 'Loaded'\\n")
    107     True
    108 
    109 Balancing continuation
    110 
    111     >>> roundtrip("a = (3,4, \\n"
    112     ...           "5,6)\\n"
    113     ...           "y = [3, 4,\\n"
    114     ...           "5]\\n"
    115     ...           "z = {'a': 5,\\n"
    116     ...           "'b':15, 'c':True}\\n"
    117     ...           "x = len(y) + 5 - a[\\n"
    118     ...           "3] - a[2]\\n"
    119     ...           "+ len(z) - z[\\n"
    120     ...           "'b']\\n")
    121     True
    122 
    123 Ordinary integers and binary operators
    124 
    125     >>> dump_tokens("0xff <= 255")
    126     NUMBER     '0xff'        (1, 0) (1, 4)
    127     OP         '<='          (1, 5) (1, 7)
    128     NUMBER     '255'         (1, 8) (1, 11)
    129     >>> dump_tokens("0b10 <= 255")
    130     NUMBER     '0b10'        (1, 0) (1, 4)
    131     OP         '<='          (1, 5) (1, 7)
    132     NUMBER     '255'         (1, 8) (1, 11)
    133     >>> dump_tokens("0o123 <= 0123")
    134     NUMBER     '0o123'       (1, 0) (1, 5)
    135     OP         '<='          (1, 6) (1, 8)
    136     NUMBER     '0123'        (1, 9) (1, 13)
    137     >>> dump_tokens("01234567 > ~0x15")
    138     NUMBER     '01234567'    (1, 0) (1, 8)
    139     OP         '>'           (1, 9) (1, 10)
    140     OP         '~'           (1, 11) (1, 12)
    141     NUMBER     '0x15'        (1, 12) (1, 16)
    142     >>> dump_tokens("2134568 != 01231515")
    143     NUMBER     '2134568'     (1, 0) (1, 7)
    144     OP         '!='          (1, 8) (1, 10)
    145     NUMBER     '01231515'    (1, 11) (1, 19)
    146     >>> dump_tokens("(-124561-1) & 0200000000")
    147     OP         '('           (1, 0) (1, 1)
    148     OP         '-'           (1, 1) (1, 2)
    149     NUMBER     '124561'      (1, 2) (1, 8)
    150     OP         '-'           (1, 8) (1, 9)
    151     NUMBER     '1'           (1, 9) (1, 10)
    152     OP         ')'           (1, 10) (1, 11)
    153     OP         '&'           (1, 12) (1, 13)
    154     NUMBER     '0200000000'  (1, 14) (1, 24)
    155     >>> dump_tokens("0xdeadbeef != -1")
    156     NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
    157     OP         '!='          (1, 11) (1, 13)
    158     OP         '-'           (1, 14) (1, 15)
    159     NUMBER     '1'           (1, 15) (1, 16)
    160     >>> dump_tokens("0xdeadc0de & 012345")
    161     NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
    162     OP         '&'           (1, 11) (1, 12)
    163     NUMBER     '012345'      (1, 13) (1, 19)
    164     >>> dump_tokens("0xFF & 0x15 | 1234")
    165     NUMBER     '0xFF'        (1, 0) (1, 4)
    166     OP         '&'           (1, 5) (1, 6)
    167     NUMBER     '0x15'        (1, 7) (1, 11)
    168     OP         '|'           (1, 12) (1, 13)
    169     NUMBER     '1234'        (1, 14) (1, 18)
    170 
    171 Long integers
    172 
    173     >>> dump_tokens("x = 0L")
    174     NAME       'x'           (1, 0) (1, 1)
    175     OP         '='           (1, 2) (1, 3)
    176     NUMBER     '0L'          (1, 4) (1, 6)
    177     >>> dump_tokens("x = 0xfffffffffff")
    178     NAME       'x'           (1, 0) (1, 1)
    179     OP         '='           (1, 2) (1, 3)
    180     NUMBER     '0xffffffffff (1, 4) (1, 17)
    181     >>> dump_tokens("x = 123141242151251616110l")
    182     NAME       'x'           (1, 0) (1, 1)
    183     OP         '='           (1, 2) (1, 3)
    184     NUMBER     '123141242151 (1, 4) (1, 26)
    185     >>> dump_tokens("x = -15921590215012591L")
    186     NAME       'x'           (1, 0) (1, 1)
    187     OP         '='           (1, 2) (1, 3)
    188     OP         '-'           (1, 4) (1, 5)
    189     NUMBER     '159215902150 (1, 5) (1, 23)
    190 
    191 Floating point numbers
    192 
    193     >>> dump_tokens("x = 3.14159")
    194     NAME       'x'           (1, 0) (1, 1)
    195     OP         '='           (1, 2) (1, 3)
    196     NUMBER     '3.14159'     (1, 4) (1, 11)
    197     >>> dump_tokens("x = 314159.")
    198     NAME       'x'           (1, 0) (1, 1)
    199     OP         '='           (1, 2) (1, 3)
    200     NUMBER     '314159.'     (1, 4) (1, 11)
    201     >>> dump_tokens("x = .314159")
    202     NAME       'x'           (1, 0) (1, 1)
    203     OP         '='           (1, 2) (1, 3)
    204     NUMBER     '.314159'     (1, 4) (1, 11)
    205     >>> dump_tokens("x = 3e14159")
    206     NAME       'x'           (1, 0) (1, 1)
    207     OP         '='           (1, 2) (1, 3)
    208     NUMBER     '3e14159'     (1, 4) (1, 11)
    209     >>> dump_tokens("x = 3E123")
    210     NAME       'x'           (1, 0) (1, 1)
    211     OP         '='           (1, 2) (1, 3)
    212     NUMBER     '3E123'       (1, 4) (1, 9)
    213     >>> dump_tokens("x+y = 3e-1230")
    214     NAME       'x'           (1, 0) (1, 1)
    215     OP         '+'           (1, 1) (1, 2)
    216     NAME       'y'           (1, 2) (1, 3)
    217     OP         '='           (1, 4) (1, 5)
    218     NUMBER     '3e-1230'     (1, 6) (1, 13)
    219     >>> dump_tokens("x = 3.14e159")
    220     NAME       'x'           (1, 0) (1, 1)
    221     OP         '='           (1, 2) (1, 3)
    222     NUMBER     '3.14e159'    (1, 4) (1, 12)
    223 
    224 String literals
    225 
    226     >>> dump_tokens("x = ''; y = \\\"\\\"")
    227     NAME       'x'           (1, 0) (1, 1)
    228     OP         '='           (1, 2) (1, 3)
    229     STRING     "''"          (1, 4) (1, 6)
    230     OP         ';'           (1, 6) (1, 7)
    231     NAME       'y'           (1, 8) (1, 9)
    232     OP         '='           (1, 10) (1, 11)
    233     STRING     '""'          (1, 12) (1, 14)
    234     >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
    235     NAME       'x'           (1, 0) (1, 1)
    236     OP         '='           (1, 2) (1, 3)
    237     STRING     '\\'"\\''       (1, 4) (1, 7)
    238     OP         ';'           (1, 7) (1, 8)
    239     NAME       'y'           (1, 9) (1, 10)
    240     OP         '='           (1, 11) (1, 12)
    241     STRING     '"\\'"'        (1, 13) (1, 16)
    242     >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
    243     NAME       'x'           (1, 0) (1, 1)
    244     OP         '='           (1, 2) (1, 3)
    245     STRING     '"doesn\\'t "' (1, 4) (1, 14)
    246     NAME       'shrink'      (1, 14) (1, 20)
    247     STRING     '", does it"' (1, 20) (1, 31)
    248     >>> dump_tokens("x = u'abc' + U'ABC'")
    249     NAME       'x'           (1, 0) (1, 1)
    250     OP         '='           (1, 2) (1, 3)
    251     STRING     "u'abc'"      (1, 4) (1, 10)
    252     OP         '+'           (1, 11) (1, 12)
    253     STRING     "U'ABC'"      (1, 13) (1, 19)
    254     >>> dump_tokens('y = u"ABC" + U"ABC"')
    255     NAME       'y'           (1, 0) (1, 1)
    256     OP         '='           (1, 2) (1, 3)
    257     STRING     'u"ABC"'      (1, 4) (1, 10)
    258     OP         '+'           (1, 11) (1, 12)
    259     STRING     'U"ABC"'      (1, 13) (1, 19)
    260     >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
    261     NAME       'x'           (1, 0) (1, 1)
    262     OP         '='           (1, 2) (1, 3)
    263     STRING     "ur'abc'"     (1, 4) (1, 11)
    264     OP         '+'           (1, 12) (1, 13)
    265     STRING     "Ur'ABC'"     (1, 14) (1, 21)
    266     OP         '+'           (1, 22) (1, 23)
    267     STRING     "uR'ABC'"     (1, 24) (1, 31)
    268     OP         '+'           (1, 32) (1, 33)
    269     STRING     "UR'ABC'"     (1, 34) (1, 41)
    270     >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
    271     NAME       'y'           (1, 0) (1, 1)
    272     OP         '='           (1, 2) (1, 3)
    273     STRING     'ur"abc"'     (1, 4) (1, 11)
    274     OP         '+'           (1, 12) (1, 13)
    275     STRING     'Ur"ABC"'     (1, 14) (1, 21)
    276     OP         '+'           (1, 22) (1, 23)
    277     STRING     'uR"ABC"'     (1, 24) (1, 31)
    278     OP         '+'           (1, 32) (1, 33)
    279     STRING     'UR"ABC"'     (1, 34) (1, 41)
    280 
    281     >>> dump_tokens("b'abc' + B'abc'")
    282     STRING     "b'abc'"      (1, 0) (1, 6)
    283     OP         '+'           (1, 7) (1, 8)
    284     STRING     "B'abc'"      (1, 9) (1, 15)
    285     >>> dump_tokens('b"abc" + B"abc"')
    286     STRING     'b"abc"'      (1, 0) (1, 6)
    287     OP         '+'           (1, 7) (1, 8)
    288     STRING     'B"abc"'      (1, 9) (1, 15)
    289     >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
    290     STRING     "br'abc'"     (1, 0) (1, 7)
    291     OP         '+'           (1, 8) (1, 9)
    292     STRING     "bR'abc'"     (1, 10) (1, 17)
    293     OP         '+'           (1, 18) (1, 19)
    294     STRING     "Br'abc'"     (1, 20) (1, 27)
    295     OP         '+'           (1, 28) (1, 29)
    296     STRING     "BR'abc'"     (1, 30) (1, 37)
    297     >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
    298     STRING     'br"abc"'     (1, 0) (1, 7)
    299     OP         '+'           (1, 8) (1, 9)
    300     STRING     'bR"abc"'     (1, 10) (1, 17)
    301     OP         '+'           (1, 18) (1, 19)
    302     STRING     'Br"abc"'     (1, 20) (1, 27)
    303     OP         '+'           (1, 28) (1, 29)
    304     STRING     'BR"abc"'     (1, 30) (1, 37)
    305 
    306 Operators
    307 
    308     >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
    309     NAME       'def'         (1, 0) (1, 3)
    310     NAME       'd22'         (1, 4) (1, 7)
    311     OP         '('           (1, 7) (1, 8)
    312     NAME       'a'           (1, 8) (1, 9)
    313     OP         ','           (1, 9) (1, 10)
    314     NAME       'b'           (1, 11) (1, 12)
    315     OP         ','           (1, 12) (1, 13)
    316     NAME       'c'           (1, 14) (1, 15)
    317     OP         '='           (1, 15) (1, 16)
    318     NUMBER     '2'           (1, 16) (1, 17)
    319     OP         ','           (1, 17) (1, 18)
    320     NAME       'd'           (1, 19) (1, 20)
    321     OP         '='           (1, 20) (1, 21)
    322     NUMBER     '2'           (1, 21) (1, 22)
    323     OP         ','           (1, 22) (1, 23)
    324     OP         '*'           (1, 24) (1, 25)
    325     NAME       'k'           (1, 25) (1, 26)
    326     OP         ')'           (1, 26) (1, 27)
    327     OP         ':'           (1, 27) (1, 28)
    328     NAME       'pass'        (1, 29) (1, 33)
    329     >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
    330     NAME       'def'         (1, 0) (1, 3)
    331     NAME       'd01v_'       (1, 4) (1, 9)
    332     OP         '('           (1, 9) (1, 10)
    333     NAME       'a'           (1, 10) (1, 11)
    334     OP         '='           (1, 11) (1, 12)
    335     NUMBER     '1'           (1, 12) (1, 13)
    336     OP         ','           (1, 13) (1, 14)
    337     OP         '*'           (1, 15) (1, 16)
    338     NAME       'k'           (1, 16) (1, 17)
    339     OP         ','           (1, 17) (1, 18)
    340     OP         '**'          (1, 19) (1, 21)
    341     NAME       'w'           (1, 21) (1, 22)
    342     OP         ')'           (1, 22) (1, 23)
    343     OP         ':'           (1, 23) (1, 24)
    344     NAME       'pass'        (1, 25) (1, 29)
    345 
    346 Comparison
    347 
    348     >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
    349     ...             "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
    350     NAME       'if'          (1, 0) (1, 2)
    351     NUMBER     '1'           (1, 3) (1, 4)
    352     OP         '<'           (1, 5) (1, 6)
    353     NUMBER     '1'           (1, 7) (1, 8)
    354     OP         '>'           (1, 9) (1, 10)
    355     NUMBER     '1'           (1, 11) (1, 12)
    356     OP         '=='          (1, 13) (1, 15)
    357     NUMBER     '1'           (1, 16) (1, 17)
    358     OP         '>='          (1, 18) (1, 20)
    359     NUMBER     '5'           (1, 21) (1, 22)
    360     OP         '<='          (1, 23) (1, 25)
    361     NUMBER     '0x15'        (1, 26) (1, 30)
    362     OP         '<='          (1, 31) (1, 33)
    363     NUMBER     '0x12'        (1, 34) (1, 38)
    364     OP         '!='          (1, 39) (1, 41)
    365     NUMBER     '1'           (1, 42) (1, 43)
    366     NAME       'and'         (1, 44) (1, 47)
    367     NUMBER     '5'           (1, 48) (1, 49)
    368     NAME       'in'          (1, 50) (1, 52)
    369     NUMBER     '1'           (1, 53) (1, 54)
    370     NAME       'not'         (1, 55) (1, 58)
    371     NAME       'in'          (1, 59) (1, 61)
    372     NUMBER     '1'           (1, 62) (1, 63)
    373     NAME       'is'          (1, 64) (1, 66)
    374     NUMBER     '1'           (1, 67) (1, 68)
    375     NAME       'or'          (1, 69) (1, 71)
    376     NUMBER     '5'           (1, 72) (1, 73)
    377     NAME       'is'          (1, 74) (1, 76)
    378     NAME       'not'         (1, 77) (1, 80)
    379     NUMBER     '1'           (1, 81) (1, 82)
    380     OP         ':'           (1, 82) (1, 83)
    381     NAME       'pass'        (1, 84) (1, 88)
    382 
    383 Shift
    384 
    385     >>> dump_tokens("x = 1 << 1 >> 5")
    386     NAME       'x'           (1, 0) (1, 1)
    387     OP         '='           (1, 2) (1, 3)
    388     NUMBER     '1'           (1, 4) (1, 5)
    389     OP         '<<'          (1, 6) (1, 8)
    390     NUMBER     '1'           (1, 9) (1, 10)
    391     OP         '>>'          (1, 11) (1, 13)
    392     NUMBER     '5'           (1, 14) (1, 15)
    393 
    394 Additive
    395 
    396     >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
    397     NAME       'x'           (1, 0) (1, 1)
    398     OP         '='           (1, 2) (1, 3)
    399     NUMBER     '1'           (1, 4) (1, 5)
    400     OP         '-'           (1, 6) (1, 7)
    401     NAME       'y'           (1, 8) (1, 9)
    402     OP         '+'           (1, 10) (1, 11)
    403     NUMBER     '15'          (1, 12) (1, 14)
    404     OP         '-'           (1, 15) (1, 16)
    405     NUMBER     '01'          (1, 17) (1, 19)
    406     OP         '+'           (1, 20) (1, 21)
    407     NUMBER     '0x124'       (1, 22) (1, 27)
    408     OP         '+'           (1, 28) (1, 29)
    409     NAME       'z'           (1, 30) (1, 31)
    410     OP         '+'           (1, 32) (1, 33)
    411     NAME       'a'           (1, 34) (1, 35)
    412     OP         '['           (1, 35) (1, 36)
    413     NUMBER     '5'           (1, 36) (1, 37)
    414     OP         ']'           (1, 37) (1, 38)
    415 
    416 Multiplicative
    417 
    418     >>> dump_tokens("x = 1//1*1/5*12%0x12")
    419     NAME       'x'           (1, 0) (1, 1)
    420     OP         '='           (1, 2) (1, 3)
    421     NUMBER     '1'           (1, 4) (1, 5)
    422     OP         '//'          (1, 5) (1, 7)
    423     NUMBER     '1'           (1, 7) (1, 8)
    424     OP         '*'           (1, 8) (1, 9)
    425     NUMBER     '1'           (1, 9) (1, 10)
    426     OP         '/'           (1, 10) (1, 11)
    427     NUMBER     '5'           (1, 11) (1, 12)
    428     OP         '*'           (1, 12) (1, 13)
    429     NUMBER     '12'          (1, 13) (1, 15)
    430     OP         '%'           (1, 15) (1, 16)
    431     NUMBER     '0x12'        (1, 16) (1, 20)
    432 
    433 Unary
    434 
    435     >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
    436     OP         '~'           (1, 0) (1, 1)
    437     NUMBER     '1'           (1, 1) (1, 2)
    438     OP         '^'           (1, 3) (1, 4)
    439     NUMBER     '1'           (1, 5) (1, 6)
    440     OP         '&'           (1, 7) (1, 8)
    441     NUMBER     '1'           (1, 9) (1, 10)
    442     OP         '|'           (1, 11) (1, 12)
    443     NUMBER     '1'           (1, 12) (1, 13)
    444     OP         '^'           (1, 14) (1, 15)
    445     OP         '-'           (1, 16) (1, 17)
    446     NUMBER     '1'           (1, 17) (1, 18)
    447     >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
    448     OP         '-'           (1, 0) (1, 1)
    449     NUMBER     '1'           (1, 1) (1, 2)
    450     OP         '*'           (1, 2) (1, 3)
    451     NUMBER     '1'           (1, 3) (1, 4)
    452     OP         '/'           (1, 4) (1, 5)
    453     NUMBER     '1'           (1, 5) (1, 6)
    454     OP         '+'           (1, 6) (1, 7)
    455     NUMBER     '1'           (1, 7) (1, 8)
    456     OP         '*'           (1, 8) (1, 9)
    457     NUMBER     '1'           (1, 9) (1, 10)
    458     OP         '//'          (1, 10) (1, 12)
    459     NUMBER     '1'           (1, 12) (1, 13)
    460     OP         '-'           (1, 14) (1, 15)
    461     OP         '-'           (1, 16) (1, 17)
    462     OP         '-'           (1, 17) (1, 18)
    463     OP         '-'           (1, 18) (1, 19)
    464     NUMBER     '1'           (1, 19) (1, 20)
    465     OP         '**'          (1, 20) (1, 22)
    466     NUMBER     '1'           (1, 22) (1, 23)
    467 
    468 Selector
    469 
    470     >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
    471     NAME       'import'      (1, 0) (1, 6)
    472     NAME       'sys'         (1, 7) (1, 10)
    473     OP         ','           (1, 10) (1, 11)
    474     NAME       'time'        (1, 12) (1, 16)
    475     NEWLINE    '\\n'          (1, 16) (1, 17)
    476     NAME       'x'           (2, 0) (2, 1)
    477     OP         '='           (2, 2) (2, 3)
    478     NAME       'sys'         (2, 4) (2, 7)
    479     OP         '.'           (2, 7) (2, 8)
    480     NAME       'modules'     (2, 8) (2, 15)
    481     OP         '['           (2, 15) (2, 16)
    482     STRING     "'time'"      (2, 16) (2, 22)
    483     OP         ']'           (2, 22) (2, 23)
    484     OP         '.'           (2, 23) (2, 24)
    485     NAME       'time'        (2, 24) (2, 28)
    486     OP         '('           (2, 28) (2, 29)
    487     OP         ')'           (2, 29) (2, 30)
    488 
    489 Methods
    490 
    491     >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
    492     OP         '@'           (1, 0) (1, 1)
    493     NAME       'staticmethod (1, 1) (1, 13)
    494     NEWLINE    '\\n'          (1, 13) (1, 14)
    495     NAME       'def'         (2, 0) (2, 3)
    496     NAME       'foo'         (2, 4) (2, 7)
    497     OP         '('           (2, 7) (2, 8)
    498     NAME       'x'           (2, 8) (2, 9)
    499     OP         ','           (2, 9) (2, 10)
    500     NAME       'y'           (2, 10) (2, 11)
    501     OP         ')'           (2, 11) (2, 12)
    502     OP         ':'           (2, 12) (2, 13)
    503     NAME       'pass'        (2, 14) (2, 18)
    504 
    505 Backslash means line continuation, except for comments
    506 
    507     >>> roundtrip("x=1+\\\\n"
    508     ...           "1\\n"
    509     ...           "# This is a comment\\\\n"
    510     ...           "# This also\\n")
    511     True
    512     >>> roundtrip("# Comment \\\\nx = 0")
    513     True
    514 
    515 Two string literals on the same line
    516 
    517     >>> roundtrip("'' ''")
    518     True
    519 
    520 Test roundtrip on random python modules.
    521 pass the '-ucpu' option to process the full directory.
    522 
    523     >>>
    524     >>> tempdir = os.path.dirname(f) or os.curdir
    525     >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
    526 
    527     >>> if not test_support.is_resource_enabled("cpu"):
    528     ...     testfiles = random.sample(testfiles, 10)
    529     ...
    530     >>> for testfile in testfiles:
    531     ...     if not roundtrip(open(testfile)):
    532     ...         print "Roundtrip failed for file %s" % testfile
    533     ...         break
    534     ... else: True
    535     True
    536 
    537 Evil tabs
    538     >>> dump_tokens("def f():\\n\\tif x\\n        \\tpass")
    539     NAME       'def'         (1, 0) (1, 3)
    540     NAME       'f'           (1, 4) (1, 5)
    541     OP         '('           (1, 5) (1, 6)
    542     OP         ')'           (1, 6) (1, 7)
    543     OP         ':'           (1, 7) (1, 8)
    544     NEWLINE    '\\n'          (1, 8) (1, 9)
    545     INDENT     '\\t'          (2, 0) (2, 1)
    546     NAME       'if'          (2, 1) (2, 3)
    547     NAME       'x'           (2, 4) (2, 5)
    548     NEWLINE    '\\n'          (2, 5) (2, 6)
    549     INDENT     '        \\t'  (3, 0) (3, 9)
    550     NAME       'pass'        (3, 9) (3, 13)
    551     DEDENT     ''            (4, 0) (4, 0)
    552     DEDENT     ''            (4, 0) (4, 0)
    553 
    554 Pathological whitespace (http://bugs.python.org/issue16152)
    555     >>> dump_tokens("@          ")
    556     OP         '@'           (1, 0) (1, 1)
    557 """
    558 
    559 
    560 from test import test_support
    561 from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
    562                      STRING, ENDMARKER, tok_name)
    563 from StringIO import StringIO
    564 import os
    565 
    566 def dump_tokens(s):
    567     """Print out the tokens in s in a table format.
    568 
    569     The ENDMARKER is omitted.
    570     """
    571     f = StringIO(s)
    572     for type, token, start, end, line in generate_tokens(f.readline):
    573         if type == ENDMARKER:
    574             break
    575         type = tok_name[type]
    576         print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
    577 
    578 # This is an example from the docs, set up as a doctest.
    579 def decistmt(s):
    580     """Substitute Decimals for floats in a string of statements.
    581 
    582     >>> from decimal import Decimal
    583     >>> s = 'print +21.3e-5*-.1234/81.7'
    584     >>> decistmt(s)
    585     "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
    586 
    587     The format of the exponent is inherited from the platform C library.
    588     Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    589     we're only showing 12 digits, and the 13th isn't close to 5, the
    590     rest of the output should be platform-independent.
    591 
    592     >>> exec(s) #doctest: +ELLIPSIS
    593     -3.21716034272e-0...7
    594 
    595     Output from calculations with Decimal should be identical across all
    596     platforms.
    597 
    598     >>> exec(decistmt(s))
    599     -3.217160342717258261933904529E-7
    600     """
    601 
    602     result = []
    603     g = generate_tokens(StringIO(s).readline)   # tokenize the string
    604     for toknum, tokval, _, _, _  in g:
    605         if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
    606             result.extend([
    607                 (NAME, 'Decimal'),
    608                 (OP, '('),
    609                 (STRING, repr(tokval)),
    610                 (OP, ')')
    611             ])
    612         else:
    613             result.append((toknum, tokval))
    614     return untokenize(result)
    615 
    616 
    617 __test__ = {"doctests" : doctests, 'decistmt': decistmt}
    618 
    619 
    620 def test_main():
    621     from test import test_tokenize
    622     test_support.run_doctest(test_tokenize, True)
    623 
    624 if __name__ == "__main__":
    625     test_main()
    626