1 from test import test_support 2 from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, 3 STRING, ENDMARKER, tok_name, Untokenizer, tokenize) 4 from StringIO import StringIO 5 import os 6 from unittest import TestCase 7 8 9 class TokenizeTest(TestCase): 10 # Tests for the tokenize module. 11 12 # The tests can be really simple. Given a small fragment of source 13 # code, print out a table with tokens. The ENDMARKER is omitted for 14 # brevity. 15 16 def check_tokenize(self, s, expected): 17 # Format the tokens in s in a table format. 18 # The ENDMARKER is omitted. 19 result = [] 20 f = StringIO(s) 21 for type, token, start, end, line in generate_tokens(f.readline): 22 if type == ENDMARKER: 23 break 24 type = tok_name[type] 25 result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" % 26 locals()) 27 self.assertEqual(result, 28 expected.rstrip().splitlines()) 29 30 31 def test_basic(self): 32 self.check_tokenize("1 + 1", """\ 33 NUMBER '1' (1, 0) (1, 1) 34 OP '+' (1, 2) (1, 3) 35 NUMBER '1' (1, 4) (1, 5) 36 """) 37 self.check_tokenize("if False:\n" 38 " # NL\n" 39 " True = False # NEWLINE\n", """\ 40 NAME 'if' (1, 0) (1, 2) 41 NAME 'False' (1, 3) (1, 8) 42 OP ':' (1, 8) (1, 9) 43 NEWLINE '\\n' (1, 9) (1, 10) 44 COMMENT '# NL' (2, 4) (2, 8) 45 NL '\\n' (2, 8) (2, 9) 46 INDENT ' ' (3, 0) (3, 4) 47 NAME 'True' (3, 4) (3, 8) 48 OP '=' (3, 9) (3, 10) 49 NAME 'False' (3, 11) (3, 16) 50 COMMENT '# NEWLINE' (3, 17) (3, 26) 51 NEWLINE '\\n' (3, 26) (3, 27) 52 DEDENT '' (4, 0) (4, 0) 53 """) 54 55 indent_error_file = """\ 56 def k(x): 57 x += 2 58 x += 5 59 """ 60 with self.assertRaisesRegexp(IndentationError, 61 "unindent does not match any " 62 "outer indentation level"): 63 for tok in generate_tokens(StringIO(indent_error_file).readline): 64 pass 65 66 def test_int(self): 67 # Ordinary integers and binary operators 68 self.check_tokenize("0xff <= 255", """\ 69 NUMBER '0xff' (1, 0) (1, 4) 70 OP '<=' (1, 5) (1, 7) 71 NUMBER '255' (1, 8) (1, 11) 72 """) 73 self.check_tokenize("0b10 <= 255", """\ 74 NUMBER '0b10' (1, 0) (1, 4) 75 OP '<=' (1, 5) (1, 7) 76 NUMBER '255' (1, 8) (1, 11) 77 """) 78 self.check_tokenize("0o123 <= 0123", """\ 79 NUMBER '0o123' (1, 0) (1, 5) 80 OP '<=' (1, 6) (1, 8) 81 NUMBER '0123' (1, 9) (1, 13) 82 """) 83 self.check_tokenize("01234567 > ~0x15", """\ 84 NUMBER '01234567' (1, 0) (1, 8) 85 OP '>' (1, 9) (1, 10) 86 OP '~' (1, 11) (1, 12) 87 NUMBER '0x15' (1, 12) (1, 16) 88 """) 89 self.check_tokenize("2134568 != 01231515", """\ 90 NUMBER '2134568' (1, 0) (1, 7) 91 OP '!=' (1, 8) (1, 10) 92 NUMBER '01231515' (1, 11) (1, 19) 93 """) 94 self.check_tokenize("(-124561-1) & 0200000000", """\ 95 OP '(' (1, 0) (1, 1) 96 OP '-' (1, 1) (1, 2) 97 NUMBER '124561' (1, 2) (1, 8) 98 OP '-' (1, 8) (1, 9) 99 NUMBER '1' (1, 9) (1, 10) 100 OP ')' (1, 10) (1, 11) 101 OP '&' (1, 12) (1, 13) 102 NUMBER '0200000000' (1, 14) (1, 24) 103 """) 104 self.check_tokenize("0xdeadbeef != -1", """\ 105 NUMBER '0xdeadbeef' (1, 0) (1, 10) 106 OP '!=' (1, 11) (1, 13) 107 OP '-' (1, 14) (1, 15) 108 NUMBER '1' (1, 15) (1, 16) 109 """) 110 self.check_tokenize("0xdeadc0de & 012345", """\ 111 NUMBER '0xdeadc0de' (1, 0) (1, 10) 112 OP '&' (1, 11) (1, 12) 113 NUMBER '012345' (1, 13) (1, 19) 114 """) 115 self.check_tokenize("0xFF & 0x15 | 1234", """\ 116 NUMBER '0xFF' (1, 0) (1, 4) 117 OP '&' (1, 5) (1, 6) 118 NUMBER '0x15' (1, 7) (1, 11) 119 OP '|' (1, 12) (1, 13) 120 NUMBER '1234' (1, 14) (1, 18) 121 """) 122 123 def test_long(self): 124 # Long integers 125 self.check_tokenize("x = 0L", """\ 126 NAME 'x' (1, 0) (1, 1) 127 OP '=' (1, 2) (1, 3) 128 NUMBER '0L' (1, 4) (1, 6) 129 """) 130 self.check_tokenize("x = 0xfffffffffff", """\ 131 NAME 'x' (1, 0) (1, 1) 132 OP '=' (1, 2) (1, 3) 133 NUMBER '0xffffffffff (1, 4) (1, 17) 134 """) 135 self.check_tokenize("x = 123141242151251616110l", """\ 136 NAME 'x' (1, 0) (1, 1) 137 OP '=' (1, 2) (1, 3) 138 NUMBER '123141242151 (1, 4) (1, 26) 139 """) 140 self.check_tokenize("x = -15921590215012591L", """\ 141 NAME 'x' (1, 0) (1, 1) 142 OP '=' (1, 2) (1, 3) 143 OP '-' (1, 4) (1, 5) 144 NUMBER '159215902150 (1, 5) (1, 23) 145 """) 146 147 def test_float(self): 148 # Floating point numbers 149 self.check_tokenize("x = 3.14159", """\ 150 NAME 'x' (1, 0) (1, 1) 151 OP '=' (1, 2) (1, 3) 152 NUMBER '3.14159' (1, 4) (1, 11) 153 """) 154 self.check_tokenize("x = 314159.", """\ 155 NAME 'x' (1, 0) (1, 1) 156 OP '=' (1, 2) (1, 3) 157 NUMBER '314159.' (1, 4) (1, 11) 158 """) 159 self.check_tokenize("x = .314159", """\ 160 NAME 'x' (1, 0) (1, 1) 161 OP '=' (1, 2) (1, 3) 162 NUMBER '.314159' (1, 4) (1, 11) 163 """) 164 self.check_tokenize("x = 3e14159", """\ 165 NAME 'x' (1, 0) (1, 1) 166 OP '=' (1, 2) (1, 3) 167 NUMBER '3e14159' (1, 4) (1, 11) 168 """) 169 self.check_tokenize("x = 3E123", """\ 170 NAME 'x' (1, 0) (1, 1) 171 OP '=' (1, 2) (1, 3) 172 NUMBER '3E123' (1, 4) (1, 9) 173 """) 174 self.check_tokenize("x+y = 3e-1230", """\ 175 NAME 'x' (1, 0) (1, 1) 176 OP '+' (1, 1) (1, 2) 177 NAME 'y' (1, 2) (1, 3) 178 OP '=' (1, 4) (1, 5) 179 NUMBER '3e-1230' (1, 6) (1, 13) 180 """) 181 self.check_tokenize("x = 3.14e159", """\ 182 NAME 'x' (1, 0) (1, 1) 183 OP '=' (1, 2) (1, 3) 184 NUMBER '3.14e159' (1, 4) (1, 12) 185 """) 186 187 def test_string(self): 188 # String literals 189 self.check_tokenize("x = ''; y = \"\"", """\ 190 NAME 'x' (1, 0) (1, 1) 191 OP '=' (1, 2) (1, 3) 192 STRING "''" (1, 4) (1, 6) 193 OP ';' (1, 6) (1, 7) 194 NAME 'y' (1, 8) (1, 9) 195 OP '=' (1, 10) (1, 11) 196 STRING '""' (1, 12) (1, 14) 197 """) 198 self.check_tokenize("x = '\"'; y = \"'\"", """\ 199 NAME 'x' (1, 0) (1, 1) 200 OP '=' (1, 2) (1, 3) 201 STRING '\\'"\\'' (1, 4) (1, 7) 202 OP ';' (1, 7) (1, 8) 203 NAME 'y' (1, 9) (1, 10) 204 OP '=' (1, 11) (1, 12) 205 STRING '"\\'"' (1, 13) (1, 16) 206 """) 207 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\ 208 NAME 'x' (1, 0) (1, 1) 209 OP '=' (1, 2) (1, 3) 210 STRING '"doesn\\'t "' (1, 4) (1, 14) 211 NAME 'shrink' (1, 14) (1, 20) 212 STRING '", does it"' (1, 20) (1, 31) 213 """) 214 self.check_tokenize("x = u'abc' + U'ABC'", """\ 215 NAME 'x' (1, 0) (1, 1) 216 OP '=' (1, 2) (1, 3) 217 STRING "u'abc'" (1, 4) (1, 10) 218 OP '+' (1, 11) (1, 12) 219 STRING "U'ABC'" (1, 13) (1, 19) 220 """) 221 self.check_tokenize('y = u"ABC" + U"ABC"', """\ 222 NAME 'y' (1, 0) (1, 1) 223 OP '=' (1, 2) (1, 3) 224 STRING 'u"ABC"' (1, 4) (1, 10) 225 OP '+' (1, 11) (1, 12) 226 STRING 'U"ABC"' (1, 13) (1, 19) 227 """) 228 self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\ 229 NAME 'x' (1, 0) (1, 1) 230 OP '=' (1, 2) (1, 3) 231 STRING "ur'abc'" (1, 4) (1, 11) 232 OP '+' (1, 12) (1, 13) 233 STRING "Ur'ABC'" (1, 14) (1, 21) 234 OP '+' (1, 22) (1, 23) 235 STRING "uR'ABC'" (1, 24) (1, 31) 236 OP '+' (1, 32) (1, 33) 237 STRING "UR'ABC'" (1, 34) (1, 41) 238 """) 239 self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\ 240 NAME 'y' (1, 0) (1, 1) 241 OP '=' (1, 2) (1, 3) 242 STRING 'ur"abc"' (1, 4) (1, 11) 243 OP '+' (1, 12) (1, 13) 244 STRING 'Ur"ABC"' (1, 14) (1, 21) 245 OP '+' (1, 22) (1, 23) 246 STRING 'uR"ABC"' (1, 24) (1, 31) 247 OP '+' (1, 32) (1, 33) 248 STRING 'UR"ABC"' (1, 34) (1, 41) 249 250 """) 251 self.check_tokenize("b'abc' + B'abc'", """\ 252 STRING "b'abc'" (1, 0) (1, 6) 253 OP '+' (1, 7) (1, 8) 254 STRING "B'abc'" (1, 9) (1, 15) 255 """) 256 self.check_tokenize('b"abc" + B"abc"', """\ 257 STRING 'b"abc"' (1, 0) (1, 6) 258 OP '+' (1, 7) (1, 8) 259 STRING 'B"abc"' (1, 9) (1, 15) 260 """) 261 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ 262 STRING "br'abc'" (1, 0) (1, 7) 263 OP '+' (1, 8) (1, 9) 264 STRING "bR'abc'" (1, 10) (1, 17) 265 OP '+' (1, 18) (1, 19) 266 STRING "Br'abc'" (1, 20) (1, 27) 267 OP '+' (1, 28) (1, 29) 268 STRING "BR'abc'" (1, 30) (1, 37) 269 """) 270 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ 271 STRING 'br"abc"' (1, 0) (1, 7) 272 OP '+' (1, 8) (1, 9) 273 STRING 'bR"abc"' (1, 10) (1, 17) 274 OP '+' (1, 18) (1, 19) 275 STRING 'Br"abc"' (1, 20) (1, 27) 276 OP '+' (1, 28) (1, 29) 277 STRING 'BR"abc"' (1, 30) (1, 37) 278 """) 279 280 def test_function(self): 281 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\ 282 NAME 'def' (1, 0) (1, 3) 283 NAME 'd22' (1, 4) (1, 7) 284 OP '(' (1, 7) (1, 8) 285 NAME 'a' (1, 8) (1, 9) 286 OP ',' (1, 9) (1, 10) 287 NAME 'b' (1, 11) (1, 12) 288 OP ',' (1, 12) (1, 13) 289 NAME 'c' (1, 14) (1, 15) 290 OP '=' (1, 15) (1, 16) 291 NUMBER '2' (1, 16) (1, 17) 292 OP ',' (1, 17) (1, 18) 293 NAME 'd' (1, 19) (1, 20) 294 OP '=' (1, 20) (1, 21) 295 NUMBER '2' (1, 21) (1, 22) 296 OP ',' (1, 22) (1, 23) 297 OP '*' (1, 24) (1, 25) 298 NAME 'k' (1, 25) (1, 26) 299 OP ')' (1, 26) (1, 27) 300 OP ':' (1, 27) (1, 28) 301 NAME 'pass' (1, 29) (1, 33) 302 """) 303 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\ 304 NAME 'def' (1, 0) (1, 3) 305 NAME 'd01v_' (1, 4) (1, 9) 306 OP '(' (1, 9) (1, 10) 307 NAME 'a' (1, 10) (1, 11) 308 OP '=' (1, 11) (1, 12) 309 NUMBER '1' (1, 12) (1, 13) 310 OP ',' (1, 13) (1, 14) 311 OP '*' (1, 15) (1, 16) 312 NAME 'k' (1, 16) (1, 17) 313 OP ',' (1, 17) (1, 18) 314 OP '**' (1, 19) (1, 21) 315 NAME 'w' (1, 21) (1, 22) 316 OP ')' (1, 22) (1, 23) 317 OP ':' (1, 23) (1, 24) 318 NAME 'pass' (1, 25) (1, 29) 319 """) 320 321 def test_comparison(self): 322 # Comparison 323 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " + 324 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ 325 NAME 'if' (1, 0) (1, 2) 326 NUMBER '1' (1, 3) (1, 4) 327 OP '<' (1, 5) (1, 6) 328 NUMBER '1' (1, 7) (1, 8) 329 OP '>' (1, 9) (1, 10) 330 NUMBER '1' (1, 11) (1, 12) 331 OP '==' (1, 13) (1, 15) 332 NUMBER '1' (1, 16) (1, 17) 333 OP '>=' (1, 18) (1, 20) 334 NUMBER '5' (1, 21) (1, 22) 335 OP '<=' (1, 23) (1, 25) 336 NUMBER '0x15' (1, 26) (1, 30) 337 OP '<=' (1, 31) (1, 33) 338 NUMBER '0x12' (1, 34) (1, 38) 339 OP '!=' (1, 39) (1, 41) 340 NUMBER '1' (1, 42) (1, 43) 341 NAME 'and' (1, 44) (1, 47) 342 NUMBER '5' (1, 48) (1, 49) 343 NAME 'in' (1, 50) (1, 52) 344 NUMBER '1' (1, 53) (1, 54) 345 NAME 'not' (1, 55) (1, 58) 346 NAME 'in' (1, 59) (1, 61) 347 NUMBER '1' (1, 62) (1, 63) 348 NAME 'is' (1, 64) (1, 66) 349 NUMBER '1' (1, 67) (1, 68) 350 NAME 'or' (1, 69) (1, 71) 351 NUMBER '5' (1, 72) (1, 73) 352 NAME 'is' (1, 74) (1, 76) 353 NAME 'not' (1, 77) (1, 80) 354 NUMBER '1' (1, 81) (1, 82) 355 OP ':' (1, 82) (1, 83) 356 NAME 'pass' (1, 84) (1, 88) 357 """) 358 359 def test_shift(self): 360 # Shift 361 self.check_tokenize("x = 1 << 1 >> 5", """\ 362 NAME 'x' (1, 0) (1, 1) 363 OP '=' (1, 2) (1, 3) 364 NUMBER '1' (1, 4) (1, 5) 365 OP '<<' (1, 6) (1, 8) 366 NUMBER '1' (1, 9) (1, 10) 367 OP '>>' (1, 11) (1, 13) 368 NUMBER '5' (1, 14) (1, 15) 369 """) 370 371 def test_additive(self): 372 # Additive 373 self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\ 374 NAME 'x' (1, 0) (1, 1) 375 OP '=' (1, 2) (1, 3) 376 NUMBER '1' (1, 4) (1, 5) 377 OP '-' (1, 6) (1, 7) 378 NAME 'y' (1, 8) (1, 9) 379 OP '+' (1, 10) (1, 11) 380 NUMBER '15' (1, 12) (1, 14) 381 OP '-' (1, 15) (1, 16) 382 NUMBER '01' (1, 17) (1, 19) 383 OP '+' (1, 20) (1, 21) 384 NUMBER '0x124' (1, 22) (1, 27) 385 OP '+' (1, 28) (1, 29) 386 NAME 'z' (1, 30) (1, 31) 387 OP '+' (1, 32) (1, 33) 388 NAME 'a' (1, 34) (1, 35) 389 OP '[' (1, 35) (1, 36) 390 NUMBER '5' (1, 36) (1, 37) 391 OP ']' (1, 37) (1, 38) 392 """) 393 394 def test_multiplicative(self): 395 # Multiplicative 396 self.check_tokenize("x = 1//1*1/5*12%0x12", """\ 397 NAME 'x' (1, 0) (1, 1) 398 OP '=' (1, 2) (1, 3) 399 NUMBER '1' (1, 4) (1, 5) 400 OP '//' (1, 5) (1, 7) 401 NUMBER '1' (1, 7) (1, 8) 402 OP '*' (1, 8) (1, 9) 403 NUMBER '1' (1, 9) (1, 10) 404 OP '/' (1, 10) (1, 11) 405 NUMBER '5' (1, 11) (1, 12) 406 OP '*' (1, 12) (1, 13) 407 NUMBER '12' (1, 13) (1, 15) 408 OP '%' (1, 15) (1, 16) 409 NUMBER '0x12' (1, 16) (1, 20) 410 """) 411 412 def test_unary(self): 413 # Unary 414 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\ 415 OP '~' (1, 0) (1, 1) 416 NUMBER '1' (1, 1) (1, 2) 417 OP '^' (1, 3) (1, 4) 418 NUMBER '1' (1, 5) (1, 6) 419 OP '&' (1, 7) (1, 8) 420 NUMBER '1' (1, 9) (1, 10) 421 OP '|' (1, 11) (1, 12) 422 NUMBER '1' (1, 12) (1, 13) 423 OP '^' (1, 14) (1, 15) 424 OP '-' (1, 16) (1, 17) 425 NUMBER '1' (1, 17) (1, 18) 426 """) 427 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\ 428 OP '-' (1, 0) (1, 1) 429 NUMBER '1' (1, 1) (1, 2) 430 OP '*' (1, 2) (1, 3) 431 NUMBER '1' (1, 3) (1, 4) 432 OP '/' (1, 4) (1, 5) 433 NUMBER '1' (1, 5) (1, 6) 434 OP '+' (1, 6) (1, 7) 435 NUMBER '1' (1, 7) (1, 8) 436 OP '*' (1, 8) (1, 9) 437 NUMBER '1' (1, 9) (1, 10) 438 OP '//' (1, 10) (1, 12) 439 NUMBER '1' (1, 12) (1, 13) 440 OP '-' (1, 14) (1, 15) 441 OP '-' (1, 16) (1, 17) 442 OP '-' (1, 17) (1, 18) 443 OP '-' (1, 18) (1, 19) 444 NUMBER '1' (1, 19) (1, 20) 445 OP '**' (1, 20) (1, 22) 446 NUMBER '1' (1, 22) (1, 23) 447 """) 448 449 def test_selector(self): 450 # Selector 451 self.check_tokenize("import sys, time\n" 452 "x = sys.modules['time'].time()", """\ 453 NAME 'import' (1, 0) (1, 6) 454 NAME 'sys' (1, 7) (1, 10) 455 OP ',' (1, 10) (1, 11) 456 NAME 'time' (1, 12) (1, 16) 457 NEWLINE '\\n' (1, 16) (1, 17) 458 NAME 'x' (2, 0) (2, 1) 459 OP '=' (2, 2) (2, 3) 460 NAME 'sys' (2, 4) (2, 7) 461 OP '.' (2, 7) (2, 8) 462 NAME 'modules' (2, 8) (2, 15) 463 OP '[' (2, 15) (2, 16) 464 STRING "'time'" (2, 16) (2, 22) 465 OP ']' (2, 22) (2, 23) 466 OP '.' (2, 23) (2, 24) 467 NAME 'time' (2, 24) (2, 28) 468 OP '(' (2, 28) (2, 29) 469 OP ')' (2, 29) (2, 30) 470 """) 471 472 def test_method(self): 473 # Methods 474 self.check_tokenize("@staticmethod\n" 475 "def foo(x,y): pass", """\ 476 OP '@' (1, 0) (1, 1) 477 NAME 'staticmethod (1, 1) (1, 13) 478 NEWLINE '\\n' (1, 13) (1, 14) 479 NAME 'def' (2, 0) (2, 3) 480 NAME 'foo' (2, 4) (2, 7) 481 OP '(' (2, 7) (2, 8) 482 NAME 'x' (2, 8) (2, 9) 483 OP ',' (2, 9) (2, 10) 484 NAME 'y' (2, 10) (2, 11) 485 OP ')' (2, 11) (2, 12) 486 OP ':' (2, 12) (2, 13) 487 NAME 'pass' (2, 14) (2, 18) 488 """) 489 490 def test_tabs(self): 491 # Evil tabs 492 self.check_tokenize("def f():\n" 493 "\tif x\n" 494 " \tpass", """\ 495 NAME 'def' (1, 0) (1, 3) 496 NAME 'f' (1, 4) (1, 5) 497 OP '(' (1, 5) (1, 6) 498 OP ')' (1, 6) (1, 7) 499 OP ':' (1, 7) (1, 8) 500 NEWLINE '\\n' (1, 8) (1, 9) 501 INDENT '\\t' (2, 0) (2, 1) 502 NAME 'if' (2, 1) (2, 3) 503 NAME 'x' (2, 4) (2, 5) 504 NEWLINE '\\n' (2, 5) (2, 6) 505 INDENT ' \\t' (3, 0) (3, 9) 506 NAME 'pass' (3, 9) (3, 13) 507 DEDENT '' (4, 0) (4, 0) 508 DEDENT '' (4, 0) (4, 0) 509 """) 510 511 def test_pathological_trailing_whitespace(self): 512 # Pathological whitespace (http://bugs.python.org/issue16152) 513 self.check_tokenize("@ ", """\ 514 OP '@' (1, 0) (1, 1) 515 """) 516 517 518 def decistmt(s): 519 result = [] 520 g = generate_tokens(StringIO(s).readline) # tokenize the string 521 for toknum, tokval, _, _, _ in g: 522 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens 523 result.extend([ 524 (NAME, 'Decimal'), 525 (OP, '('), 526 (STRING, repr(tokval)), 527 (OP, ')') 528 ]) 529 else: 530 result.append((toknum, tokval)) 531 return untokenize(result) 532 533 class TestMisc(TestCase): 534 535 def test_decistmt(self): 536 # Substitute Decimals for floats in a string of statements. 537 # This is an example from the docs. 538 539 from decimal import Decimal 540 s = '+21.3e-5*-.1234/81.7' 541 self.assertEqual(decistmt(s), 542 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')") 543 544 # The format of the exponent is inherited from the platform C library. 545 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since 546 # we're only showing 12 digits, and the 13th isn't close to 5, the 547 # rest of the output should be platform-independent. 548 self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7') 549 550 # Output from calculations with Decimal should be identical across all 551 # platforms. 552 self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7')) 553 554 555 class UntokenizeTest(TestCase): 556 557 def test_bad_input_order(self): 558 # raise if previous row 559 u = Untokenizer() 560 u.prev_row = 2 561 u.prev_col = 2 562 with self.assertRaises(ValueError) as cm: 563 u.add_whitespace((1,3)) 564 self.assertEqual(cm.exception.args[0], 565 'start (1,3) precedes previous end (2,2)') 566 # raise if previous column in row 567 self.assertRaises(ValueError, u.add_whitespace, (2,1)) 568 569 def test_backslash_continuation(self): 570 # The problem is that <whitespace>\<newline> leaves no token 571 u = Untokenizer() 572 u.prev_row = 1 573 u.prev_col = 1 574 u.tokens = [] 575 u.add_whitespace((2, 0)) 576 self.assertEqual(u.tokens, ['\\\n']) 577 u.prev_row = 2 578 u.add_whitespace((4, 4)) 579 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' ']) 580 581 def test_iter_compat(self): 582 u = Untokenizer() 583 token = (NAME, 'Hello') 584 u.compat(token, iter([])) 585 self.assertEqual(u.tokens, ["Hello "]) 586 u = Untokenizer() 587 self.assertEqual(u.untokenize(iter([token])), 'Hello ') 588 589 590 class TestRoundtrip(TestCase): 591 592 def check_roundtrip(self, f): 593 """ 594 Test roundtrip for `untokenize`. `f` is an open file or a string. 595 The source code in f is tokenized, converted back to source code 596 via tokenize.untokenize(), and tokenized again from the latter. 597 The test fails if the second tokenization doesn't match the first. 598 """ 599 if isinstance(f, str): f = StringIO(f) 600 token_list = list(generate_tokens(f.readline)) 601 f.close() 602 tokens1 = [tok[:2] for tok in token_list] 603 new_text = untokenize(tokens1) 604 readline = iter(new_text.splitlines(1)).next 605 tokens2 = [tok[:2] for tok in generate_tokens(readline)] 606 self.assertEqual(tokens2, tokens1) 607 608 def test_roundtrip(self): 609 # There are some standard formatting practices that are easy to get right. 610 611 self.check_roundtrip("if x == 1:\n" 612 " print(x)\n") 613 614 # There are some standard formatting practices that are easy to get right. 615 616 self.check_roundtrip("if x == 1:\n" 617 " print x\n") 618 self.check_roundtrip("# This is a comment\n" 619 "# This also") 620 621 # Some people use different formatting conventions, which makes 622 # untokenize a little trickier. Note that this test involves trailing 623 # whitespace after the colon. Note that we use hex escapes to make the 624 # two trailing blanks apperant in the expected output. 625 626 self.check_roundtrip("if x == 1 : \n" 627 " print x\n") 628 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt") 629 with open(fn) as f: 630 self.check_roundtrip(f) 631 self.check_roundtrip("if x == 1:\n" 632 " # A comment by itself.\n" 633 " print x # Comment here, too.\n" 634 " # Another comment.\n" 635 "after_if = True\n") 636 self.check_roundtrip("if (x # The comments need to go in the right place\n" 637 " == 1):\n" 638 " print 'x==1'\n") 639 self.check_roundtrip("class Test: # A comment here\n" 640 " # A comment with weird indent\n" 641 " after_com = 5\n" 642 " def x(m): return m*5 # a one liner\n" 643 " def y(m): # A whitespace after the colon\n" 644 " return y*4 # 3-space indent\n") 645 646 # Some error-handling code 647 648 self.check_roundtrip("try: import somemodule\n" 649 "except ImportError: # comment\n" 650 " print 'Can not import' # comment2\n" 651 "else: print 'Loaded'\n") 652 653 def test_continuation(self): 654 # Balancing continuation 655 self.check_roundtrip("a = (3,4, \n" 656 "5,6)\n" 657 "y = [3, 4,\n" 658 "5]\n" 659 "z = {'a': 5,\n" 660 "'b':15, 'c':True}\n" 661 "x = len(y) + 5 - a[\n" 662 "3] - a[2]\n" 663 "+ len(z) - z[\n" 664 "'b']\n") 665 666 def test_backslash_continuation(self): 667 # Backslash means line continuation, except for comments 668 self.check_roundtrip("x=1+\\\n" 669 "1\n" 670 "# This is a comment\\\n" 671 "# This also\n") 672 self.check_roundtrip("# Comment \\\n" 673 "x = 0") 674 675 def test_string_concatenation(self): 676 # Two string literals on the same line 677 self.check_roundtrip("'' ''") 678 679 def test_random_files(self): 680 # Test roundtrip on random python modules. 681 # pass the '-ucpu' option to process the full directory. 682 683 import glob, random 684 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt") 685 tempdir = os.path.dirname(fn) or os.curdir 686 testfiles = glob.glob(os.path.join(tempdir, "test*.py")) 687 688 if not test_support.is_resource_enabled("cpu"): 689 testfiles = random.sample(testfiles, 10) 690 691 for testfile in testfiles: 692 try: 693 with open(testfile, 'rb') as f: 694 self.check_roundtrip(f) 695 except: 696 print "Roundtrip failed for file %s" % testfile 697 raise 698 699 700 def roundtrip(self, code): 701 if isinstance(code, str): 702 code = code.encode('utf-8') 703 tokens = generate_tokens(StringIO(code).readline) 704 return untokenize(tokens).decode('utf-8') 705 706 def test_indentation_semantics_retained(self): 707 """ 708 Ensure that although whitespace might be mutated in a roundtrip, 709 the semantic meaning of the indentation remains consistent. 710 """ 711 code = "if False:\n\tx=3\n\tx=3\n" 712 codelines = self.roundtrip(code).split('\n') 713 self.assertEqual(codelines[1], codelines[2]) 714 715 716 def test_main(): 717 test_support.run_unittest(TokenizeTest) 718 test_support.run_unittest(UntokenizeTest) 719 test_support.run_unittest(TestRoundtrip) 720 test_support.run_unittest(TestMisc) 721 722 if __name__ == "__main__": 723 test_main() 724