1 # testlex.py 2 3 import unittest 4 try: 5 import StringIO 6 except ImportError: 7 import io as StringIO 8 9 import sys 10 import os 11 import warnings 12 import platform 13 14 sys.path.insert(0,"..") 15 sys.tracebacklimit = 0 16 17 import ply.lex 18 19 try: 20 from importlib.util import cache_from_source 21 except ImportError: 22 # Python 2.7, but we don't care. 23 cache_from_source = None 24 25 26 def make_pymodule_path(filename, optimization=None): 27 path = os.path.dirname(filename) 28 file = os.path.basename(filename) 29 mod, ext = os.path.splitext(file) 30 31 if sys.hexversion >= 0x3050000: 32 fullpath = cache_from_source(filename, optimization=optimization) 33 elif sys.hexversion >= 0x3040000: 34 fullpath = cache_from_source(filename, ext=='.pyc') 35 elif sys.hexversion >= 0x3020000: 36 import imp 37 modname = mod+"."+imp.get_tag()+ext 38 fullpath = os.path.join(path,'__pycache__',modname) 39 else: 40 fullpath = filename 41 return fullpath 42 43 def pymodule_out_exists(filename, optimization=None): 44 return os.path.exists(make_pymodule_path(filename, 45 optimization=optimization)) 46 47 def pymodule_out_remove(filename, optimization=None): 48 os.remove(make_pymodule_path(filename, optimization=optimization)) 49 50 def implementation(): 51 if platform.system().startswith("Java"): 52 return "Jython" 53 elif hasattr(sys, "pypy_version_info"): 54 return "PyPy" 55 else: 56 return "CPython" 57 58 test_pyo = (implementation() == 'CPython') 59 60 def check_expected(result, expected, contains=False): 61 if sys.version_info[0] >= 3: 62 if isinstance(result,str): 63 result = result.encode('ascii') 64 if isinstance(expected,str): 65 expected = expected.encode('ascii') 66 resultlines = result.splitlines() 67 expectedlines = expected.splitlines() 68 69 if len(resultlines) != len(expectedlines): 70 return False 71 72 for rline,eline in zip(resultlines,expectedlines): 73 if contains: 74 if eline not in rline: 75 return False 76 else: 77 if not rline.endswith(eline): 78 return False 79 return True 80 81 def run_import(module): 82 code = "import "+module 83 exec(code) 84 del sys.modules[module] 85 86 # Tests related to errors and warnings when building lexers 87 class LexErrorWarningTests(unittest.TestCase): 88 def setUp(self): 89 sys.stderr = StringIO.StringIO() 90 sys.stdout = StringIO.StringIO() 91 if sys.hexversion >= 0x3020000: 92 warnings.filterwarnings('ignore',category=ResourceWarning) 93 94 def tearDown(self): 95 sys.stderr = sys.__stderr__ 96 sys.stdout = sys.__stdout__ 97 def test_lex_doc1(self): 98 self.assertRaises(SyntaxError,run_import,"lex_doc1") 99 result = sys.stderr.getvalue() 100 self.assert_(check_expected(result, 101 "lex_doc1.py:18: No regular expression defined for rule 't_NUMBER'\n")) 102 def test_lex_dup1(self): 103 self.assertRaises(SyntaxError,run_import,"lex_dup1") 104 result = sys.stderr.getvalue() 105 self.assert_(check_expected(result, 106 "lex_dup1.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) 107 108 def test_lex_dup2(self): 109 self.assertRaises(SyntaxError,run_import,"lex_dup2") 110 result = sys.stderr.getvalue() 111 self.assert_(check_expected(result, 112 "lex_dup2.py:22: Rule t_NUMBER redefined. Previously defined on line 18\n" )) 113 114 def test_lex_dup3(self): 115 self.assertRaises(SyntaxError,run_import,"lex_dup3") 116 result = sys.stderr.getvalue() 117 self.assert_(check_expected(result, 118 "lex_dup3.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) 119 120 def test_lex_empty(self): 121 self.assertRaises(SyntaxError,run_import,"lex_empty") 122 result = sys.stderr.getvalue() 123 self.assert_(check_expected(result, 124 "No rules of the form t_rulename are defined\n" 125 "No rules defined for state 'INITIAL'\n")) 126 127 def test_lex_error1(self): 128 run_import("lex_error1") 129 result = sys.stderr.getvalue() 130 self.assert_(check_expected(result, 131 "No t_error rule is defined\n")) 132 133 def test_lex_error2(self): 134 self.assertRaises(SyntaxError,run_import,"lex_error2") 135 result = sys.stderr.getvalue() 136 self.assert_(check_expected(result, 137 "Rule 't_error' must be defined as a function\n") 138 ) 139 140 def test_lex_error3(self): 141 self.assertRaises(SyntaxError,run_import,"lex_error3") 142 result = sys.stderr.getvalue() 143 self.assert_(check_expected(result, 144 "lex_error3.py:20: Rule 't_error' requires an argument\n")) 145 146 def test_lex_error4(self): 147 self.assertRaises(SyntaxError,run_import,"lex_error4") 148 result = sys.stderr.getvalue() 149 self.assert_(check_expected(result, 150 "lex_error4.py:20: Rule 't_error' has too many arguments\n")) 151 152 def test_lex_ignore(self): 153 self.assertRaises(SyntaxError,run_import,"lex_ignore") 154 result = sys.stderr.getvalue() 155 self.assert_(check_expected(result, 156 "lex_ignore.py:20: Rule 't_ignore' must be defined as a string\n")) 157 158 def test_lex_ignore2(self): 159 run_import("lex_ignore2") 160 result = sys.stderr.getvalue() 161 self.assert_(check_expected(result, 162 "t_ignore contains a literal backslash '\\'\n")) 163 164 165 def test_lex_re1(self): 166 self.assertRaises(SyntaxError,run_import,"lex_re1") 167 result = sys.stderr.getvalue() 168 if sys.hexversion < 0x3050000: 169 msg = "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n" 170 else: 171 msg = "Invalid regular expression for rule 't_NUMBER'. missing ), unterminated subpattern at position 0" 172 self.assert_(check_expected(result, 173 msg, 174 contains=True)) 175 176 def test_lex_re2(self): 177 self.assertRaises(SyntaxError,run_import,"lex_re2") 178 result = sys.stderr.getvalue() 179 self.assert_(check_expected(result, 180 "Regular expression for rule 't_PLUS' matches empty string\n")) 181 182 def test_lex_re3(self): 183 self.assertRaises(SyntaxError,run_import,"lex_re3") 184 result = sys.stderr.getvalue() 185 # self.assert_(check_expected(result, 186 # "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" 187 # "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) 188 189 if sys.hexversion < 0x3050000: 190 msg = ("Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" 191 "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n") 192 else: 193 msg = ("Invalid regular expression for rule 't_POUND'. missing ), unterminated subpattern at position 0\n" 194 "ERROR: Make sure '#' in rule 't_POUND' is escaped with '\#'") 195 self.assert_(check_expected(result, 196 msg, 197 contains=True), result) 198 199 def test_lex_rule1(self): 200 self.assertRaises(SyntaxError,run_import,"lex_rule1") 201 result = sys.stderr.getvalue() 202 self.assert_(check_expected(result, 203 "t_NUMBER not defined as a function or string\n")) 204 205 def test_lex_rule2(self): 206 self.assertRaises(SyntaxError,run_import,"lex_rule2") 207 result = sys.stderr.getvalue() 208 self.assert_(check_expected(result, 209 "lex_rule2.py:18: Rule 't_NUMBER' requires an argument\n")) 210 211 def test_lex_rule3(self): 212 self.assertRaises(SyntaxError,run_import,"lex_rule3") 213 result = sys.stderr.getvalue() 214 self.assert_(check_expected(result, 215 "lex_rule3.py:18: Rule 't_NUMBER' has too many arguments\n")) 216 217 218 def test_lex_state1(self): 219 self.assertRaises(SyntaxError,run_import,"lex_state1") 220 result = sys.stderr.getvalue() 221 self.assert_(check_expected(result, 222 "states must be defined as a tuple or list\n")) 223 224 def test_lex_state2(self): 225 self.assertRaises(SyntaxError,run_import,"lex_state2") 226 result = sys.stderr.getvalue() 227 self.assert_(check_expected(result, 228 "Invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive')\n" 229 "Invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive')\n")) 230 231 def test_lex_state3(self): 232 self.assertRaises(SyntaxError,run_import,"lex_state3") 233 result = sys.stderr.getvalue() 234 self.assert_(check_expected(result, 235 "State name 1 must be a string\n" 236 "No rules defined for state 'example'\n")) 237 238 def test_lex_state4(self): 239 self.assertRaises(SyntaxError,run_import,"lex_state4") 240 result = sys.stderr.getvalue() 241 self.assert_(check_expected(result, 242 "State type for state comment must be 'inclusive' or 'exclusive'\n")) 243 244 245 def test_lex_state5(self): 246 self.assertRaises(SyntaxError,run_import,"lex_state5") 247 result = sys.stderr.getvalue() 248 self.assert_(check_expected(result, 249 "State 'comment' already defined\n")) 250 251 def test_lex_state_noerror(self): 252 run_import("lex_state_noerror") 253 result = sys.stderr.getvalue() 254 self.assert_(check_expected(result, 255 "No error rule is defined for exclusive state 'comment'\n")) 256 257 def test_lex_state_norule(self): 258 self.assertRaises(SyntaxError,run_import,"lex_state_norule") 259 result = sys.stderr.getvalue() 260 self.assert_(check_expected(result, 261 "No rules defined for state 'example'\n")) 262 263 def test_lex_token1(self): 264 self.assertRaises(SyntaxError,run_import,"lex_token1") 265 result = sys.stderr.getvalue() 266 self.assert_(check_expected(result, 267 "No token list is defined\n" 268 "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" 269 "Rule 't_PLUS' defined for an unspecified token PLUS\n" 270 "Rule 't_MINUS' defined for an unspecified token MINUS\n" 271 )) 272 273 def test_lex_token2(self): 274 self.assertRaises(SyntaxError,run_import,"lex_token2") 275 result = sys.stderr.getvalue() 276 self.assert_(check_expected(result, 277 "tokens must be a list or tuple\n" 278 "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" 279 "Rule 't_PLUS' defined for an unspecified token PLUS\n" 280 "Rule 't_MINUS' defined for an unspecified token MINUS\n" 281 )) 282 283 def test_lex_token3(self): 284 self.assertRaises(SyntaxError,run_import,"lex_token3") 285 result = sys.stderr.getvalue() 286 self.assert_(check_expected(result, 287 "Rule 't_MINUS' defined for an unspecified token MINUS\n")) 288 289 290 def test_lex_token4(self): 291 self.assertRaises(SyntaxError,run_import,"lex_token4") 292 result = sys.stderr.getvalue() 293 self.assert_(check_expected(result, 294 "Bad token name '-'\n")) 295 296 297 def test_lex_token5(self): 298 try: 299 run_import("lex_token5") 300 except ply.lex.LexError: 301 e = sys.exc_info()[1] 302 self.assert_(check_expected(str(e),"lex_token5.py:19: Rule 't_NUMBER' returned an unknown token type 'NUM'")) 303 304 def test_lex_token_dup(self): 305 run_import("lex_token_dup") 306 result = sys.stderr.getvalue() 307 self.assert_(check_expected(result, 308 "Token 'MINUS' multiply defined\n")) 309 310 311 def test_lex_literal1(self): 312 self.assertRaises(SyntaxError,run_import,"lex_literal1") 313 result = sys.stderr.getvalue() 314 self.assert_(check_expected(result, 315 "Invalid literal '**'. Must be a single character\n")) 316 317 def test_lex_literal2(self): 318 self.assertRaises(SyntaxError,run_import,"lex_literal2") 319 result = sys.stderr.getvalue() 320 self.assert_(check_expected(result, 321 "Invalid literals specification. literals must be a sequence of characters\n")) 322 323 import os 324 import subprocess 325 import shutil 326 327 # Tests related to various build options associated with lexers 328 class LexBuildOptionTests(unittest.TestCase): 329 def setUp(self): 330 sys.stderr = StringIO.StringIO() 331 sys.stdout = StringIO.StringIO() 332 def tearDown(self): 333 sys.stderr = sys.__stderr__ 334 sys.stdout = sys.__stdout__ 335 try: 336 shutil.rmtree("lexdir") 337 except OSError: 338 pass 339 340 def test_lex_module(self): 341 run_import("lex_module") 342 result = sys.stdout.getvalue() 343 self.assert_(check_expected(result, 344 "(NUMBER,3,1,0)\n" 345 "(PLUS,'+',1,1)\n" 346 "(NUMBER,4,1,2)\n")) 347 348 def test_lex_object(self): 349 run_import("lex_object") 350 result = sys.stdout.getvalue() 351 self.assert_(check_expected(result, 352 "(NUMBER,3,1,0)\n" 353 "(PLUS,'+',1,1)\n" 354 "(NUMBER,4,1,2)\n")) 355 356 def test_lex_closure(self): 357 run_import("lex_closure") 358 result = sys.stdout.getvalue() 359 self.assert_(check_expected(result, 360 "(NUMBER,3,1,0)\n" 361 "(PLUS,'+',1,1)\n" 362 "(NUMBER,4,1,2)\n")) 363 364 def test_lex_optimize(self): 365 try: 366 os.remove("lextab.py") 367 except OSError: 368 pass 369 try: 370 os.remove("lextab.pyc") 371 except OSError: 372 pass 373 try: 374 os.remove("lextab.pyo") 375 except OSError: 376 pass 377 run_import("lex_optimize") 378 379 result = sys.stdout.getvalue() 380 self.assert_(check_expected(result, 381 "(NUMBER,3,1,0)\n" 382 "(PLUS,'+',1,1)\n" 383 "(NUMBER,4,1,2)\n")) 384 self.assert_(os.path.exists("lextab.py")) 385 386 p = subprocess.Popen([sys.executable,'-O','lex_optimize.py'], 387 stdout=subprocess.PIPE) 388 result = p.stdout.read() 389 390 self.assert_(check_expected(result, 391 "(NUMBER,3,1,0)\n" 392 "(PLUS,'+',1,1)\n" 393 "(NUMBER,4,1,2)\n")) 394 if test_pyo: 395 self.assert_(pymodule_out_exists("lextab.pyo", 1)) 396 pymodule_out_remove("lextab.pyo", 1) 397 398 p = subprocess.Popen([sys.executable,'-OO','lex_optimize.py'], 399 stdout=subprocess.PIPE) 400 result = p.stdout.read() 401 self.assert_(check_expected(result, 402 "(NUMBER,3,1,0)\n" 403 "(PLUS,'+',1,1)\n" 404 "(NUMBER,4,1,2)\n")) 405 406 if test_pyo: 407 self.assert_(pymodule_out_exists("lextab.pyo", 2)) 408 try: 409 os.remove("lextab.py") 410 except OSError: 411 pass 412 try: 413 pymodule_out_remove("lextab.pyc") 414 except OSError: 415 pass 416 try: 417 pymodule_out_remove("lextab.pyo", 2) 418 except OSError: 419 pass 420 421 def test_lex_optimize2(self): 422 try: 423 os.remove("opt2tab.py") 424 except OSError: 425 pass 426 try: 427 os.remove("opt2tab.pyc") 428 except OSError: 429 pass 430 try: 431 os.remove("opt2tab.pyo") 432 except OSError: 433 pass 434 run_import("lex_optimize2") 435 result = sys.stdout.getvalue() 436 self.assert_(check_expected(result, 437 "(NUMBER,3,1,0)\n" 438 "(PLUS,'+',1,1)\n" 439 "(NUMBER,4,1,2)\n")) 440 self.assert_(os.path.exists("opt2tab.py")) 441 442 p = subprocess.Popen([sys.executable,'-O','lex_optimize2.py'], 443 stdout=subprocess.PIPE) 444 result = p.stdout.read() 445 self.assert_(check_expected(result, 446 "(NUMBER,3,1,0)\n" 447 "(PLUS,'+',1,1)\n" 448 "(NUMBER,4,1,2)\n")) 449 if test_pyo: 450 self.assert_(pymodule_out_exists("opt2tab.pyo", 1)) 451 pymodule_out_remove("opt2tab.pyo", 1) 452 p = subprocess.Popen([sys.executable,'-OO','lex_optimize2.py'], 453 stdout=subprocess.PIPE) 454 result = p.stdout.read() 455 self.assert_(check_expected(result, 456 "(NUMBER,3,1,0)\n" 457 "(PLUS,'+',1,1)\n" 458 "(NUMBER,4,1,2)\n")) 459 if test_pyo: 460 self.assert_(pymodule_out_exists("opt2tab.pyo", 2)) 461 try: 462 os.remove("opt2tab.py") 463 except OSError: 464 pass 465 try: 466 pymodule_out_remove("opt2tab.pyc") 467 except OSError: 468 pass 469 try: 470 pymodule_out_remove("opt2tab.pyo", 2) 471 except OSError: 472 pass 473 474 def test_lex_optimize3(self): 475 try: 476 shutil.rmtree("lexdir") 477 except OSError: 478 pass 479 480 os.mkdir("lexdir") 481 os.mkdir("lexdir/sub") 482 open("lexdir/__init__.py","w").write("") 483 open("lexdir/sub/__init__.py","w").write("") 484 run_import("lex_optimize3") 485 result = sys.stdout.getvalue() 486 self.assert_(check_expected(result, 487 "(NUMBER,3,1,0)\n" 488 "(PLUS,'+',1,1)\n" 489 "(NUMBER,4,1,2)\n")) 490 self.assert_(os.path.exists("lexdir/sub/calctab.py")) 491 492 p = subprocess.Popen([sys.executable,'-O','lex_optimize3.py'], 493 stdout=subprocess.PIPE) 494 result = p.stdout.read() 495 self.assert_(check_expected(result, 496 "(NUMBER,3,1,0)\n" 497 "(PLUS,'+',1,1)\n" 498 "(NUMBER,4,1,2)\n")) 499 if test_pyo: 500 self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo", 1)) 501 pymodule_out_remove("lexdir/sub/calctab.pyo", 1) 502 503 p = subprocess.Popen([sys.executable,'-OO','lex_optimize3.py'], 504 stdout=subprocess.PIPE) 505 result = p.stdout.read() 506 self.assert_(check_expected(result, 507 "(NUMBER,3,1,0)\n" 508 "(PLUS,'+',1,1)\n" 509 "(NUMBER,4,1,2)\n")) 510 if test_pyo: 511 self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo", 2)) 512 try: 513 shutil.rmtree("lexdir") 514 except OSError: 515 pass 516 517 def test_lex_opt_alias(self): 518 try: 519 os.remove("aliastab.py") 520 except OSError: 521 pass 522 try: 523 os.remove("aliastab.pyc") 524 except OSError: 525 pass 526 try: 527 os.remove("aliastab.pyo") 528 except OSError: 529 pass 530 run_import("lex_opt_alias") 531 result = sys.stdout.getvalue() 532 self.assert_(check_expected(result, 533 "(NUMBER,3,1,0)\n" 534 "(+,'+',1,1)\n" 535 "(NUMBER,4,1,2)\n")) 536 self.assert_(os.path.exists("aliastab.py")) 537 538 p = subprocess.Popen([sys.executable,'-O','lex_opt_alias.py'], 539 stdout=subprocess.PIPE) 540 result = p.stdout.read() 541 self.assert_(check_expected(result, 542 "(NUMBER,3,1,0)\n" 543 "(+,'+',1,1)\n" 544 "(NUMBER,4,1,2)\n")) 545 if test_pyo: 546 self.assert_(pymodule_out_exists("aliastab.pyo", 1)) 547 pymodule_out_remove("aliastab.pyo", 1) 548 549 p = subprocess.Popen([sys.executable,'-OO','lex_opt_alias.py'], 550 stdout=subprocess.PIPE) 551 result = p.stdout.read() 552 self.assert_(check_expected(result, 553 "(NUMBER,3,1,0)\n" 554 "(+,'+',1,1)\n" 555 "(NUMBER,4,1,2)\n")) 556 557 if test_pyo: 558 self.assert_(pymodule_out_exists("aliastab.pyo", 2)) 559 try: 560 os.remove("aliastab.py") 561 except OSError: 562 pass 563 try: 564 pymodule_out_remove("aliastab.pyc") 565 except OSError: 566 pass 567 try: 568 pymodule_out_remove("aliastab.pyo", 2) 569 except OSError: 570 pass 571 572 def test_lex_many_tokens(self): 573 try: 574 os.remove("manytab.py") 575 except OSError: 576 pass 577 try: 578 os.remove("manytab.pyc") 579 except OSError: 580 pass 581 try: 582 os.remove("manytab.pyo") 583 except OSError: 584 pass 585 run_import("lex_many_tokens") 586 result = sys.stdout.getvalue() 587 self.assert_(check_expected(result, 588 "(TOK34,'TOK34:',1,0)\n" 589 "(TOK143,'TOK143:',1,7)\n" 590 "(TOK269,'TOK269:',1,15)\n" 591 "(TOK372,'TOK372:',1,23)\n" 592 "(TOK452,'TOK452:',1,31)\n" 593 "(TOK561,'TOK561:',1,39)\n" 594 "(TOK999,'TOK999:',1,47)\n" 595 )) 596 597 self.assert_(os.path.exists("manytab.py")) 598 599 if implementation() == 'CPython': 600 p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], 601 stdout=subprocess.PIPE) 602 result = p.stdout.read() 603 self.assert_(check_expected(result, 604 "(TOK34,'TOK34:',1,0)\n" 605 "(TOK143,'TOK143:',1,7)\n" 606 "(TOK269,'TOK269:',1,15)\n" 607 "(TOK372,'TOK372:',1,23)\n" 608 "(TOK452,'TOK452:',1,31)\n" 609 "(TOK561,'TOK561:',1,39)\n" 610 "(TOK999,'TOK999:',1,47)\n" 611 )) 612 613 self.assert_(pymodule_out_exists("manytab.pyo", 1)) 614 pymodule_out_remove("manytab.pyo", 1) 615 try: 616 os.remove("manytab.py") 617 except OSError: 618 pass 619 try: 620 os.remove("manytab.pyc") 621 except OSError: 622 pass 623 try: 624 os.remove("manytab.pyo") 625 except OSError: 626 pass 627 628 # Tests related to run-time behavior of lexers 629 class LexRunTests(unittest.TestCase): 630 def setUp(self): 631 sys.stderr = StringIO.StringIO() 632 sys.stdout = StringIO.StringIO() 633 def tearDown(self): 634 sys.stderr = sys.__stderr__ 635 sys.stdout = sys.__stdout__ 636 637 def test_lex_hedit(self): 638 run_import("lex_hedit") 639 result = sys.stdout.getvalue() 640 self.assert_(check_expected(result, 641 "(H_EDIT_DESCRIPTOR,'abc',1,0)\n" 642 "(H_EDIT_DESCRIPTOR,'abcdefghij',1,6)\n" 643 "(H_EDIT_DESCRIPTOR,'xy',1,20)\n")) 644 645 def test_lex_state_try(self): 646 run_import("lex_state_try") 647 result = sys.stdout.getvalue() 648 self.assert_(check_expected(result, 649 "(NUMBER,'3',1,0)\n" 650 "(PLUS,'+',1,2)\n" 651 "(NUMBER,'4',1,4)\n" 652 "Entering comment state\n" 653 "comment body LexToken(body_part,'This is a comment */',1,9)\n" 654 "(PLUS,'+',1,30)\n" 655 "(NUMBER,'10',1,32)\n" 656 )) 657 658 659 660 unittest.main() 661