1 doctests = """ 2 Tests for the tokenize module. 3 4 >>> import glob, random, sys 5 6 The tests can be really simple. Given a small fragment of source 7 code, print out a table with tokens. The ENDMARK is omitted for 8 brevity. 9 10 >>> dump_tokens("1 + 1") 11 NUMBER '1' (1, 0) (1, 1) 12 OP '+' (1, 2) (1, 3) 13 NUMBER '1' (1, 4) (1, 5) 14 15 >>> dump_tokens("if False:\\n" 16 ... " # NL\\n" 17 ... " True = False # NEWLINE\\n") 18 NAME 'if' (1, 0) (1, 2) 19 NAME 'False' (1, 3) (1, 8) 20 OP ':' (1, 8) (1, 9) 21 NEWLINE '\\n' (1, 9) (1, 10) 22 COMMENT '# NL' (2, 4) (2, 8) 23 NL '\\n' (2, 8) (2, 9) 24 INDENT ' ' (3, 0) (3, 4) 25 NAME 'True' (3, 4) (3, 8) 26 OP '=' (3, 9) (3, 10) 27 NAME 'False' (3, 11) (3, 16) 28 COMMENT '# NEWLINE' (3, 17) (3, 26) 29 NEWLINE '\\n' (3, 26) (3, 27) 30 DEDENT '' (4, 0) (4, 0) 31 32 >>> indent_error_file = \""" 33 ... def k(x): 34 ... x += 2 35 ... x += 5 36 ... \""" 37 38 >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass 39 Traceback (most recent call last): 40 ... 41 IndentationError: unindent does not match any outer indentation level 42 43 Test roundtrip for `untokenize`. `f` is an open file or a string. The source 44 code in f is tokenized, converted back to source code via tokenize.untokenize(), 45 and tokenized again from the latter. The test fails if the second tokenization 46 doesn't match the first. 47 48 >>> def roundtrip(f): 49 ... if isinstance(f, str): f = StringIO(f) 50 ... token_list = list(generate_tokens(f.readline)) 51 ... f.close() 52 ... tokens1 = [tok[:2] for tok in token_list] 53 ... new_text = untokenize(tokens1) 54 ... readline = iter(new_text.splitlines(1)).next 55 ... tokens2 = [tok[:2] for tok in generate_tokens(readline)] 56 ... return tokens1 == tokens2 57 ... 58 59 There are some standard formatting practices that are easy to get right. 60 61 >>> roundtrip("if x == 1:\\n" 62 ... " print x\\n") 63 True 64 65 >>> roundtrip("# This is a comment\\n# This also") 66 True 67 68 Some people use different formatting conventions, which makes 69 untokenize a little trickier. Note that this test involves trailing 70 whitespace after the colon. Note that we use hex escapes to make the 71 two trailing blanks apperant in the expected output. 72 73 >>> roundtrip("if x == 1 : \\n" 74 ... " print x\\n") 75 True 76 77 >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt") 78 >>> roundtrip(open(f)) 79 True 80 81 >>> roundtrip("if x == 1:\\n" 82 ... " # A comment by itself.\\n" 83 ... " print x # Comment here, too.\\n" 84 ... " # Another comment.\\n" 85 ... "after_if = True\\n") 86 True 87 88 >>> roundtrip("if (x # The comments need to go in the right place\\n" 89 ... " == 1):\\n" 90 ... " print 'x==1'\\n") 91 True 92 93 >>> roundtrip("class Test: # A comment here\\n" 94 ... " # A comment with weird indent\\n" 95 ... " after_com = 5\\n" 96 ... " def x(m): return m*5 # a one liner\\n" 97 ... " def y(m): # A whitespace after the colon\\n" 98 ... " return y*4 # 3-space indent\\n") 99 True 100 101 Some error-handling code 102 103 >>> roundtrip("try: import somemodule\\n" 104 ... "except ImportError: # comment\\n" 105 ... " print 'Can not import' # comment2\\n" 106 ... "else: print 'Loaded'\\n") 107 True 108 109 Balancing continuation 110 111 >>> roundtrip("a = (3,4, \\n" 112 ... "5,6)\\n" 113 ... "y = [3, 4,\\n" 114 ... "5]\\n" 115 ... "z = {'a': 5,\\n" 116 ... "'b':15, 'c':True}\\n" 117 ... "x = len(y) + 5 - a[\\n" 118 ... "3] - a[2]\\n" 119 ... "+ len(z) - z[\\n" 120 ... "'b']\\n") 121 True 122 123 Ordinary integers and binary operators 124 125 >>> dump_tokens("0xff <= 255") 126 NUMBER '0xff' (1, 0) (1, 4) 127 OP '<=' (1, 5) (1, 7) 128 NUMBER '255' (1, 8) (1, 11) 129 >>> dump_tokens("0b10 <= 255") 130 NUMBER '0b10' (1, 0) (1, 4) 131 OP '<=' (1, 5) (1, 7) 132 NUMBER '255' (1, 8) (1, 11) 133 >>> dump_tokens("0o123 <= 0123") 134 NUMBER '0o123' (1, 0) (1, 5) 135 OP '<=' (1, 6) (1, 8) 136 NUMBER '0123' (1, 9) (1, 13) 137 >>> dump_tokens("01234567 > ~0x15") 138 NUMBER '01234567' (1, 0) (1, 8) 139 OP '>' (1, 9) (1, 10) 140 OP '~' (1, 11) (1, 12) 141 NUMBER '0x15' (1, 12) (1, 16) 142 >>> dump_tokens("2134568 != 01231515") 143 NUMBER '2134568' (1, 0) (1, 7) 144 OP '!=' (1, 8) (1, 10) 145 NUMBER '01231515' (1, 11) (1, 19) 146 >>> dump_tokens("(-124561-1) & 0200000000") 147 OP '(' (1, 0) (1, 1) 148 OP '-' (1, 1) (1, 2) 149 NUMBER '124561' (1, 2) (1, 8) 150 OP '-' (1, 8) (1, 9) 151 NUMBER '1' (1, 9) (1, 10) 152 OP ')' (1, 10) (1, 11) 153 OP '&' (1, 12) (1, 13) 154 NUMBER '0200000000' (1, 14) (1, 24) 155 >>> dump_tokens("0xdeadbeef != -1") 156 NUMBER '0xdeadbeef' (1, 0) (1, 10) 157 OP '!=' (1, 11) (1, 13) 158 OP '-' (1, 14) (1, 15) 159 NUMBER '1' (1, 15) (1, 16) 160 >>> dump_tokens("0xdeadc0de & 012345") 161 NUMBER '0xdeadc0de' (1, 0) (1, 10) 162 OP '&' (1, 11) (1, 12) 163 NUMBER '012345' (1, 13) (1, 19) 164 >>> dump_tokens("0xFF & 0x15 | 1234") 165 NUMBER '0xFF' (1, 0) (1, 4) 166 OP '&' (1, 5) (1, 6) 167 NUMBER '0x15' (1, 7) (1, 11) 168 OP '|' (1, 12) (1, 13) 169 NUMBER '1234' (1, 14) (1, 18) 170 171 Long integers 172 173 >>> dump_tokens("x = 0L") 174 NAME 'x' (1, 0) (1, 1) 175 OP '=' (1, 2) (1, 3) 176 NUMBER '0L' (1, 4) (1, 6) 177 >>> dump_tokens("x = 0xfffffffffff") 178 NAME 'x' (1, 0) (1, 1) 179 OP '=' (1, 2) (1, 3) 180 NUMBER '0xffffffffff (1, 4) (1, 17) 181 >>> dump_tokens("x = 123141242151251616110l") 182 NAME 'x' (1, 0) (1, 1) 183 OP '=' (1, 2) (1, 3) 184 NUMBER '123141242151 (1, 4) (1, 26) 185 >>> dump_tokens("x = -15921590215012591L") 186 NAME 'x' (1, 0) (1, 1) 187 OP '=' (1, 2) (1, 3) 188 OP '-' (1, 4) (1, 5) 189 NUMBER '159215902150 (1, 5) (1, 23) 190 191 Floating point numbers 192 193 >>> dump_tokens("x = 3.14159") 194 NAME 'x' (1, 0) (1, 1) 195 OP '=' (1, 2) (1, 3) 196 NUMBER '3.14159' (1, 4) (1, 11) 197 >>> dump_tokens("x = 314159.") 198 NAME 'x' (1, 0) (1, 1) 199 OP '=' (1, 2) (1, 3) 200 NUMBER '314159.' (1, 4) (1, 11) 201 >>> dump_tokens("x = .314159") 202 NAME 'x' (1, 0) (1, 1) 203 OP '=' (1, 2) (1, 3) 204 NUMBER '.314159' (1, 4) (1, 11) 205 >>> dump_tokens("x = 3e14159") 206 NAME 'x' (1, 0) (1, 1) 207 OP '=' (1, 2) (1, 3) 208 NUMBER '3e14159' (1, 4) (1, 11) 209 >>> dump_tokens("x = 3E123") 210 NAME 'x' (1, 0) (1, 1) 211 OP '=' (1, 2) (1, 3) 212 NUMBER '3E123' (1, 4) (1, 9) 213 >>> dump_tokens("x+y = 3e-1230") 214 NAME 'x' (1, 0) (1, 1) 215 OP '+' (1, 1) (1, 2) 216 NAME 'y' (1, 2) (1, 3) 217 OP '=' (1, 4) (1, 5) 218 NUMBER '3e-1230' (1, 6) (1, 13) 219 >>> dump_tokens("x = 3.14e159") 220 NAME 'x' (1, 0) (1, 1) 221 OP '=' (1, 2) (1, 3) 222 NUMBER '3.14e159' (1, 4) (1, 12) 223 224 String literals 225 226 >>> dump_tokens("x = ''; y = \\\"\\\"") 227 NAME 'x' (1, 0) (1, 1) 228 OP '=' (1, 2) (1, 3) 229 STRING "''" (1, 4) (1, 6) 230 OP ';' (1, 6) (1, 7) 231 NAME 'y' (1, 8) (1, 9) 232 OP '=' (1, 10) (1, 11) 233 STRING '""' (1, 12) (1, 14) 234 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"") 235 NAME 'x' (1, 0) (1, 1) 236 OP '=' (1, 2) (1, 3) 237 STRING '\\'"\\'' (1, 4) (1, 7) 238 OP ';' (1, 7) (1, 8) 239 NAME 'y' (1, 9) (1, 10) 240 OP '=' (1, 11) (1, 12) 241 STRING '"\\'"' (1, 13) (1, 16) 242 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"") 243 NAME 'x' (1, 0) (1, 1) 244 OP '=' (1, 2) (1, 3) 245 STRING '"doesn\\'t "' (1, 4) (1, 14) 246 NAME 'shrink' (1, 14) (1, 20) 247 STRING '", does it"' (1, 20) (1, 31) 248 >>> dump_tokens("x = u'abc' + U'ABC'") 249 NAME 'x' (1, 0) (1, 1) 250 OP '=' (1, 2) (1, 3) 251 STRING "u'abc'" (1, 4) (1, 10) 252 OP '+' (1, 11) (1, 12) 253 STRING "U'ABC'" (1, 13) (1, 19) 254 >>> dump_tokens('y = u"ABC" + U"ABC"') 255 NAME 'y' (1, 0) (1, 1) 256 OP '=' (1, 2) (1, 3) 257 STRING 'u"ABC"' (1, 4) (1, 10) 258 OP '+' (1, 11) (1, 12) 259 STRING 'U"ABC"' (1, 13) (1, 19) 260 >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'") 261 NAME 'x' (1, 0) (1, 1) 262 OP '=' (1, 2) (1, 3) 263 STRING "ur'abc'" (1, 4) (1, 11) 264 OP '+' (1, 12) (1, 13) 265 STRING "Ur'ABC'" (1, 14) (1, 21) 266 OP '+' (1, 22) (1, 23) 267 STRING "uR'ABC'" (1, 24) (1, 31) 268 OP '+' (1, 32) (1, 33) 269 STRING "UR'ABC'" (1, 34) (1, 41) 270 >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"') 271 NAME 'y' (1, 0) (1, 1) 272 OP '=' (1, 2) (1, 3) 273 STRING 'ur"abc"' (1, 4) (1, 11) 274 OP '+' (1, 12) (1, 13) 275 STRING 'Ur"ABC"' (1, 14) (1, 21) 276 OP '+' (1, 22) (1, 23) 277 STRING 'uR"ABC"' (1, 24) (1, 31) 278 OP '+' (1, 32) (1, 33) 279 STRING 'UR"ABC"' (1, 34) (1, 41) 280 281 >>> dump_tokens("b'abc' + B'abc'") 282 STRING "b'abc'" (1, 0) (1, 6) 283 OP '+' (1, 7) (1, 8) 284 STRING "B'abc'" (1, 9) (1, 15) 285 >>> dump_tokens('b"abc" + B"abc"') 286 STRING 'b"abc"' (1, 0) (1, 6) 287 OP '+' (1, 7) (1, 8) 288 STRING 'B"abc"' (1, 9) (1, 15) 289 >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'") 290 STRING "br'abc'" (1, 0) (1, 7) 291 OP '+' (1, 8) (1, 9) 292 STRING "bR'abc'" (1, 10) (1, 17) 293 OP '+' (1, 18) (1, 19) 294 STRING "Br'abc'" (1, 20) (1, 27) 295 OP '+' (1, 28) (1, 29) 296 STRING "BR'abc'" (1, 30) (1, 37) 297 >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"') 298 STRING 'br"abc"' (1, 0) (1, 7) 299 OP '+' (1, 8) (1, 9) 300 STRING 'bR"abc"' (1, 10) (1, 17) 301 OP '+' (1, 18) (1, 19) 302 STRING 'Br"abc"' (1, 20) (1, 27) 303 OP '+' (1, 28) (1, 29) 304 STRING 'BR"abc"' (1, 30) (1, 37) 305 306 Operators 307 308 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass") 309 NAME 'def' (1, 0) (1, 3) 310 NAME 'd22' (1, 4) (1, 7) 311 OP '(' (1, 7) (1, 8) 312 NAME 'a' (1, 8) (1, 9) 313 OP ',' (1, 9) (1, 10) 314 NAME 'b' (1, 11) (1, 12) 315 OP ',' (1, 12) (1, 13) 316 NAME 'c' (1, 14) (1, 15) 317 OP '=' (1, 15) (1, 16) 318 NUMBER '2' (1, 16) (1, 17) 319 OP ',' (1, 17) (1, 18) 320 NAME 'd' (1, 19) (1, 20) 321 OP '=' (1, 20) (1, 21) 322 NUMBER '2' (1, 21) (1, 22) 323 OP ',' (1, 22) (1, 23) 324 OP '*' (1, 24) (1, 25) 325 NAME 'k' (1, 25) (1, 26) 326 OP ')' (1, 26) (1, 27) 327 OP ':' (1, 27) (1, 28) 328 NAME 'pass' (1, 29) (1, 33) 329 >>> dump_tokens("def d01v_(a=1, *k, **w): pass") 330 NAME 'def' (1, 0) (1, 3) 331 NAME 'd01v_' (1, 4) (1, 9) 332 OP '(' (1, 9) (1, 10) 333 NAME 'a' (1, 10) (1, 11) 334 OP '=' (1, 11) (1, 12) 335 NUMBER '1' (1, 12) (1, 13) 336 OP ',' (1, 13) (1, 14) 337 OP '*' (1, 15) (1, 16) 338 NAME 'k' (1, 16) (1, 17) 339 OP ',' (1, 17) (1, 18) 340 OP '**' (1, 19) (1, 21) 341 NAME 'w' (1, 21) (1, 22) 342 OP ')' (1, 22) (1, 23) 343 OP ':' (1, 23) (1, 24) 344 NAME 'pass' (1, 25) (1, 29) 345 346 Comparison 347 348 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " + 349 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass") 350 NAME 'if' (1, 0) (1, 2) 351 NUMBER '1' (1, 3) (1, 4) 352 OP '<' (1, 5) (1, 6) 353 NUMBER '1' (1, 7) (1, 8) 354 OP '>' (1, 9) (1, 10) 355 NUMBER '1' (1, 11) (1, 12) 356 OP '==' (1, 13) (1, 15) 357 NUMBER '1' (1, 16) (1, 17) 358 OP '>=' (1, 18) (1, 20) 359 NUMBER '5' (1, 21) (1, 22) 360 OP '<=' (1, 23) (1, 25) 361 NUMBER '0x15' (1, 26) (1, 30) 362 OP '<=' (1, 31) (1, 33) 363 NUMBER '0x12' (1, 34) (1, 38) 364 OP '!=' (1, 39) (1, 41) 365 NUMBER '1' (1, 42) (1, 43) 366 NAME 'and' (1, 44) (1, 47) 367 NUMBER '5' (1, 48) (1, 49) 368 NAME 'in' (1, 50) (1, 52) 369 NUMBER '1' (1, 53) (1, 54) 370 NAME 'not' (1, 55) (1, 58) 371 NAME 'in' (1, 59) (1, 61) 372 NUMBER '1' (1, 62) (1, 63) 373 NAME 'is' (1, 64) (1, 66) 374 NUMBER '1' (1, 67) (1, 68) 375 NAME 'or' (1, 69) (1, 71) 376 NUMBER '5' (1, 72) (1, 73) 377 NAME 'is' (1, 74) (1, 76) 378 NAME 'not' (1, 77) (1, 80) 379 NUMBER '1' (1, 81) (1, 82) 380 OP ':' (1, 82) (1, 83) 381 NAME 'pass' (1, 84) (1, 88) 382 383 Shift 384 385 >>> dump_tokens("x = 1 << 1 >> 5") 386 NAME 'x' (1, 0) (1, 1) 387 OP '=' (1, 2) (1, 3) 388 NUMBER '1' (1, 4) (1, 5) 389 OP '<<' (1, 6) (1, 8) 390 NUMBER '1' (1, 9) (1, 10) 391 OP '>>' (1, 11) (1, 13) 392 NUMBER '5' (1, 14) (1, 15) 393 394 Additive 395 396 >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]") 397 NAME 'x' (1, 0) (1, 1) 398 OP '=' (1, 2) (1, 3) 399 NUMBER '1' (1, 4) (1, 5) 400 OP '-' (1, 6) (1, 7) 401 NAME 'y' (1, 8) (1, 9) 402 OP '+' (1, 10) (1, 11) 403 NUMBER '15' (1, 12) (1, 14) 404 OP '-' (1, 15) (1, 16) 405 NUMBER '01' (1, 17) (1, 19) 406 OP '+' (1, 20) (1, 21) 407 NUMBER '0x124' (1, 22) (1, 27) 408 OP '+' (1, 28) (1, 29) 409 NAME 'z' (1, 30) (1, 31) 410 OP '+' (1, 32) (1, 33) 411 NAME 'a' (1, 34) (1, 35) 412 OP '[' (1, 35) (1, 36) 413 NUMBER '5' (1, 36) (1, 37) 414 OP ']' (1, 37) (1, 38) 415 416 Multiplicative 417 418 >>> dump_tokens("x = 1//1*1/5*12%0x12") 419 NAME 'x' (1, 0) (1, 1) 420 OP '=' (1, 2) (1, 3) 421 NUMBER '1' (1, 4) (1, 5) 422 OP '//' (1, 5) (1, 7) 423 NUMBER '1' (1, 7) (1, 8) 424 OP '*' (1, 8) (1, 9) 425 NUMBER '1' (1, 9) (1, 10) 426 OP '/' (1, 10) (1, 11) 427 NUMBER '5' (1, 11) (1, 12) 428 OP '*' (1, 12) (1, 13) 429 NUMBER '12' (1, 13) (1, 15) 430 OP '%' (1, 15) (1, 16) 431 NUMBER '0x12' (1, 16) (1, 20) 432 433 Unary 434 435 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1") 436 OP '~' (1, 0) (1, 1) 437 NUMBER '1' (1, 1) (1, 2) 438 OP '^' (1, 3) (1, 4) 439 NUMBER '1' (1, 5) (1, 6) 440 OP '&' (1, 7) (1, 8) 441 NUMBER '1' (1, 9) (1, 10) 442 OP '|' (1, 11) (1, 12) 443 NUMBER '1' (1, 12) (1, 13) 444 OP '^' (1, 14) (1, 15) 445 OP '-' (1, 16) (1, 17) 446 NUMBER '1' (1, 17) (1, 18) 447 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1") 448 OP '-' (1, 0) (1, 1) 449 NUMBER '1' (1, 1) (1, 2) 450 OP '*' (1, 2) (1, 3) 451 NUMBER '1' (1, 3) (1, 4) 452 OP '/' (1, 4) (1, 5) 453 NUMBER '1' (1, 5) (1, 6) 454 OP '+' (1, 6) (1, 7) 455 NUMBER '1' (1, 7) (1, 8) 456 OP '*' (1, 8) (1, 9) 457 NUMBER '1' (1, 9) (1, 10) 458 OP '//' (1, 10) (1, 12) 459 NUMBER '1' (1, 12) (1, 13) 460 OP '-' (1, 14) (1, 15) 461 OP '-' (1, 16) (1, 17) 462 OP '-' (1, 17) (1, 18) 463 OP '-' (1, 18) (1, 19) 464 NUMBER '1' (1, 19) (1, 20) 465 OP '**' (1, 20) (1, 22) 466 NUMBER '1' (1, 22) (1, 23) 467 468 Selector 469 470 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()") 471 NAME 'import' (1, 0) (1, 6) 472 NAME 'sys' (1, 7) (1, 10) 473 OP ',' (1, 10) (1, 11) 474 NAME 'time' (1, 12) (1, 16) 475 NEWLINE '\\n' (1, 16) (1, 17) 476 NAME 'x' (2, 0) (2, 1) 477 OP '=' (2, 2) (2, 3) 478 NAME 'sys' (2, 4) (2, 7) 479 OP '.' (2, 7) (2, 8) 480 NAME 'modules' (2, 8) (2, 15) 481 OP '[' (2, 15) (2, 16) 482 STRING "'time'" (2, 16) (2, 22) 483 OP ']' (2, 22) (2, 23) 484 OP '.' (2, 23) (2, 24) 485 NAME 'time' (2, 24) (2, 28) 486 OP '(' (2, 28) (2, 29) 487 OP ')' (2, 29) (2, 30) 488 489 Methods 490 491 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass") 492 OP '@' (1, 0) (1, 1) 493 NAME 'staticmethod (1, 1) (1, 13) 494 NEWLINE '\\n' (1, 13) (1, 14) 495 NAME 'def' (2, 0) (2, 3) 496 NAME 'foo' (2, 4) (2, 7) 497 OP '(' (2, 7) (2, 8) 498 NAME 'x' (2, 8) (2, 9) 499 OP ',' (2, 9) (2, 10) 500 NAME 'y' (2, 10) (2, 11) 501 OP ')' (2, 11) (2, 12) 502 OP ':' (2, 12) (2, 13) 503 NAME 'pass' (2, 14) (2, 18) 504 505 Backslash means line continuation, except for comments 506 507 >>> roundtrip("x=1+\\\\n" 508 ... "1\\n" 509 ... "# This is a comment\\\\n" 510 ... "# This also\\n") 511 True 512 >>> roundtrip("# Comment \\\\nx = 0") 513 True 514 515 Two string literals on the same line 516 517 >>> roundtrip("'' ''") 518 True 519 520 Test roundtrip on random python modules. 521 pass the '-ucpu' option to process the full directory. 522 523 >>> 524 >>> tempdir = os.path.dirname(f) or os.curdir 525 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py")) 526 527 >>> if not test_support.is_resource_enabled("cpu"): 528 ... testfiles = random.sample(testfiles, 10) 529 ... 530 >>> for testfile in testfiles: 531 ... if not roundtrip(open(testfile)): 532 ... print "Roundtrip failed for file %s" % testfile 533 ... break 534 ... else: True 535 True 536 537 Evil tabs 538 >>> dump_tokens("def f():\\n\\tif x\\n \\tpass") 539 NAME 'def' (1, 0) (1, 3) 540 NAME 'f' (1, 4) (1, 5) 541 OP '(' (1, 5) (1, 6) 542 OP ')' (1, 6) (1, 7) 543 OP ':' (1, 7) (1, 8) 544 NEWLINE '\\n' (1, 8) (1, 9) 545 INDENT '\\t' (2, 0) (2, 1) 546 NAME 'if' (2, 1) (2, 3) 547 NAME 'x' (2, 4) (2, 5) 548 NEWLINE '\\n' (2, 5) (2, 6) 549 INDENT ' \\t' (3, 0) (3, 9) 550 NAME 'pass' (3, 9) (3, 13) 551 DEDENT '' (4, 0) (4, 0) 552 DEDENT '' (4, 0) (4, 0) 553 554 Pathological whitespace (http://bugs.python.org/issue16152) 555 >>> dump_tokens("@ ") 556 OP '@' (1, 0) (1, 1) 557 """ 558 559 560 from test import test_support 561 from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, 562 STRING, ENDMARKER, tok_name) 563 from StringIO import StringIO 564 import os 565 566 def dump_tokens(s): 567 """Print out the tokens in s in a table format. 568 569 The ENDMARKER is omitted. 570 """ 571 f = StringIO(s) 572 for type, token, start, end, line in generate_tokens(f.readline): 573 if type == ENDMARKER: 574 break 575 type = tok_name[type] 576 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals()) 577 578 # This is an example from the docs, set up as a doctest. 579 def decistmt(s): 580 """Substitute Decimals for floats in a string of statements. 581 582 >>> from decimal import Decimal 583 >>> s = 'print +21.3e-5*-.1234/81.7' 584 >>> decistmt(s) 585 "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" 586 587 The format of the exponent is inherited from the platform C library. 588 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since 589 we're only showing 12 digits, and the 13th isn't close to 5, the 590 rest of the output should be platform-independent. 591 592 >>> exec(s) #doctest: +ELLIPSIS 593 -3.21716034272e-0...7 594 595 Output from calculations with Decimal should be identical across all 596 platforms. 597 598 >>> exec(decistmt(s)) 599 -3.217160342717258261933904529E-7 600 """ 601 602 result = [] 603 g = generate_tokens(StringIO(s).readline) # tokenize the string 604 for toknum, tokval, _, _, _ in g: 605 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens 606 result.extend([ 607 (NAME, 'Decimal'), 608 (OP, '('), 609 (STRING, repr(tokval)), 610 (OP, ')') 611 ]) 612 else: 613 result.append((toknum, tokval)) 614 return untokenize(result) 615 616 617 __test__ = {"doctests" : doctests, 'decistmt': decistmt} 618 619 620 def test_main(): 621 from test import test_tokenize 622 test_support.run_doctest(test_tokenize, True) 623 624 if __name__ == "__main__": 625 test_main() 626