1 #!/usr/bin/env python 2 3 """ 4 5 (c) 2002, 2003, 2004, 2005 Simon Burton <simon (at] arrowtheory.com> 6 Released under GNU LGPL license. 7 8 """ 9 10 import sys 11 12 from lexer import Lexer 13 from parse_core import Symbols, Parser 14 import node as node_module 15 16 17 class Node(node_module.Node): 18 19 def is_typedef(self): 20 for x in self: 21 if isinstance(x,Node): 22 if x.is_typedef(): 23 return 1 24 return 0 25 26 #def explain(self): 27 #l = [] 28 #for x in self: 29 #if isinstance(x,Node): 30 #l.append(x.explain()) 31 #else: 32 #l.append(str(x)) 33 #return string.join(l," ") 34 ##(self.__class__.__name__,string.join(l) ) 35 36 def psource(self): 37 if hasattr(self,'lines'): 38 print "# "+string.join(self.lines,"\n# ")+"\n" 39 40 41 ################################################################### 42 # 43 ################################################################### 44 # 45 46 47 class BasicType(Node): 48 " int char short etc. " 49 def __init__(self,name): 50 Node.__init__(self,name) 51 52 class Qualifier(Node): 53 """ 54 """ 55 def __init__(self,name): 56 Node.__init__(self,name) 57 self.name=name 58 59 class StorageClass(Node): 60 """ 61 """ 62 def __init__(self,name): 63 Node.__init__(self,name) 64 self.name=name 65 66 class Typedef(StorageClass): 67 """ 68 """ 69 def __init__(self,s='typedef'): 70 Node.__init__(self,s) 71 #def explain(self): 72 #return "type" 73 74 class Ellipses(Node): 75 """ 76 """ 77 def __init__(self,s='...'): 78 Node.__init__(self,s) 79 80 class GCCBuiltin(BasicType): 81 """ 82 """ 83 pass 84 85 86 class Identifier(Node): 87 """ 88 """ 89 def __init__(self,name="",*items): 90 if name or 1: 91 Node.__init__(self,name,*items) 92 else: 93 Node.__init__(self) 94 self.name=name 95 96 class Function(Node,Parser): 97 """ 98 """ 99 def __init__(self,*items): 100 Node.__init__(self,*items) 101 102 def parse(self,lexer,symbols): 103 symbols = Symbols(symbols) 104 args = '' 105 #lexer.get_token() 106 if lexer.tok != ')': 107 if not lexer.tok: 108 self.parse_error(lexer) 109 #lexer.unget_token() # unget start of decl 110 while lexer.tok != ')': 111 node = ParameterDeclaration() 112 node.parse(lexer,symbols) 113 self.append( node ) 114 if lexer.tok != ')' and lexer.tok != ',': 115 self.parse_error(lexer) 116 if lexer.tok == ',': 117 lexer.get_token() 118 lexer.get_token() 119 120 121 class Pointer(Node): 122 """ 123 """ 124 def __init__(self,*items): 125 Node.__init__(self,*items) 126 127 class Array(Node,Parser): 128 """ 129 """ 130 def __init__(self,*items): 131 Node.__init__(self,*items) 132 133 def parse(self,lexer,symbols): 134 lexer.get_token() # a number or ']' 135 # XX 136 # HACK HACK: constant c expressions can appear in here: 137 # eg. [ 15 * sizeof (int) - 2 * sizeof (void *) ] 138 # XX 139 toks = [] 140 while lexer.tok != ']': 141 #self.append( lexer.kind ) 142 toks.append( lexer.tok ) 143 lexer.get_token() 144 child = " ".join(toks) 145 if child == "": 146 child = None 147 self.append( child ) 148 lexer.get_token() # read past the ']' 149 150 class Tag(Node): 151 """ 152 """ 153 pass 154 155 156 class Compound(Node,Parser): 157 "Struct or Union" 158 159 def __init__(self,*items,**kw): 160 Node.__init__(self,*items,**kw) 161 162 def parse(self,lexer,symbols): 163 symbols = Symbols(symbols) 164 tag = "" # anonymous 165 if lexer.tok != '{': 166 tag = lexer.tok 167 if not ( tag[0]=='_' or tag[0].isalpha() ): 168 self.parse_error(lexer ,"expected tag, got '%s'"%tag ) 169 lexer.get_token() 170 if tag: 171 self.append(Tag(tag)) 172 else: 173 self.append(Tag()) 174 self.tag = tag 175 if lexer.tok == '{': 176 fieldlist = [] 177 lexer.get_token() 178 if lexer.tok != '}': 179 if not lexer.tok: self.parse_error(lexer) 180 while lexer.tok != '}': 181 node = StructDeclaration() 182 node.parse(lexer,symbols) 183 fieldlist.append( node ) 184 self += fieldlist 185 lexer.get_token() 186 if self.verbose: 187 print "%s.__init__() #<--"%(self) 188 189 class Struct(Compound): 190 """ 191 """ 192 pass 193 194 class Union(Compound): 195 """ 196 """ 197 pass 198 199 class Enum(Node,Parser): 200 """ 201 """ 202 def __init__(self,*items,**kw): 203 Node.__init__(self,*items,**kw) 204 205 def parse(self,lexer,symbols): 206 tag = "" # anonymous 207 if lexer.tok != '{': 208 tag = lexer.tok 209 if not ( tag[0]=='_' or tag[0].isalpha() ): 210 self.parse_error(lexer ,"expected tag, got '%s'"%tag ) 211 lexer.get_token() 212 if tag: 213 self.append(Tag(tag)) 214 else: 215 self.append(Tag()) 216 self.tag = tag 217 if lexer.tok == '{': 218 lexer.get_token() 219 if lexer.tok != '}': # XX dopey control flow 220 if not lexer.tok: # XX dopey control flow 221 self.parse_error(lexer) # XX dopey control flow 222 while lexer.tok != '}': # XX dopey control flow 223 if lexer.kind is not None: 224 self.expected_error(lexer ,"identifier" ) 225 ident = Identifier(lexer.tok) 226 if symbols[ident[0]] is not None: 227 self.parse_error(lexer,"%s already defined."%ident[0]) 228 symbols[ident[0]]=ident 229 self.append( ident ) 230 lexer.get_token() 231 if lexer.tok == '=': 232 lexer.get_token() 233 # ConstantExpr 234 # XX hack hack XX 235 while lexer.tok!=',' and lexer.tok!='}': 236 lexer.get_token() 237 # if type( lexer.kind ) is not int: 238 # #self.parse_error(lexer ,"expected integer" ) 239 # # XX hack hack XX 240 # while lexer.tok!=',' and lexer.tok!='}': 241 # lexer.get_token() 242 # else: 243 # # put initializer into the Identifier 244 # ident.append( lexer.kind ) 245 # lexer.get_token() 246 if lexer.tok != '}': 247 if lexer.tok != ',': 248 self.expected_error(lexer,"}",",") 249 lexer.get_token() # ',' 250 lexer.get_token() 251 if self.verbose: 252 print "%s.__init__() #<--"%(self) 253 254 255 256 class Declarator(Node,Parser): 257 """ 258 """ 259 def __init__(self,*items): 260 Node.__init__(self,*items) 261 self.ident = None 262 263 def parse(self,lexer,symbols): 264 #Parser.parse_enter(self,lexer) 265 stack = [] 266 # read up to identifier, pushing tokens onto stack 267 self.ident = self.parse_identifier(lexer,symbols,stack) 268 self.name = '' 269 if self.ident is not None: 270 self.append( self.ident ) 271 self.name = self.ident.name 272 # now read outwards from identifier 273 self.parse_declarator(lexer,symbols,stack) 274 #Parser.parse_leave(self,lexer) 275 276 def parse_identifier(self,lexer,symbols,stack): 277 if self.verbose: 278 print "%s.parse_identifier()"%self 279 ident = None 280 if lexer.tok != ';': 281 while lexer.tok and lexer.kind is not None: 282 stack.append( (lexer.tok, lexer.kind) ) 283 lexer.get_token() 284 if lexer.tok: 285 ident = Identifier( lexer.tok ) 286 #stack.append( (ident.name, ident) ) 287 lexer.get_token() 288 if self.verbose: 289 print "%s.parse_identifier()=%s"%(self,repr(ident)) 290 return ident 291 292 def parse_declarator(self,lexer,symbols,stack,level=0): 293 if self.verbose: 294 print " "*level+"%s.parse_declarator(%s) # --->"%\ 295 (self,stack) 296 if lexer.tok == '[': 297 while lexer.tok == '[': 298 node = Array() 299 node.parse(lexer,symbols) 300 self.append(node) 301 if lexer.tok == '(': 302 self.parse_error(lexer ,"array of functions" ) 303 elif lexer.tok == '(': 304 lexer.get_token() 305 node = Function() 306 node.parse(lexer,symbols) 307 self.append( node ) 308 if lexer.tok == '(': 309 self.parse_error(lexer ,"function returns a function" ) 310 if lexer.tok == '[': 311 self.parse_error(lexer ,"function returns an array" ) 312 while stack: 313 tok, kind = stack[-1] # peek 314 if tok == '(': 315 stack.pop() 316 self.consume(lexer,')') 317 self.parse_declarator(lexer,symbols,stack,level+1) 318 elif tok == '*': 319 stack.pop() 320 self.append( Pointer() ) 321 else: 322 tok, kind = stack.pop() 323 self.append( kind ) 324 if self.verbose: 325 print " "*level+"%s.parse_declarator(%s) # <---"%\ 326 (self,stack) 327 328 329 class AbstractDeclarator(Declarator): 330 """ used in ParameterDeclaration; may lack an identifier """ 331 332 def parse_identifier(self,lexer,symbols,stack): 333 if self.verbose: 334 print "%s.parse_identifier()"%self 335 ident = None 336 ident = Identifier() 337 while 1: 338 if lexer.tok == ';': 339 self.parse_error(lexer) 340 if lexer.tok == ')': 341 break 342 if lexer.tok == ',': 343 break 344 if lexer.tok == '[': 345 break 346 if lexer.kind is None: 347 #print "%s.new identifier"%self 348 ident = Identifier( lexer.tok ) 349 lexer.get_token() 350 #stack.append( (ident.name, ident) ) 351 break 352 stack.append( (lexer.tok, lexer.kind) ) 353 lexer.get_token() 354 if self.verbose: 355 print "%s.parse_identifier()=%s"%(self,repr(ident)) 356 return ident 357 358 class FieldLength(Node): 359 """ 360 """ 361 pass 362 363 class StructDeclarator(Declarator): 364 """ 365 """ 366 def parse(self,lexer,symbols): 367 if lexer.tok != ':': 368 Declarator.parse(self,lexer,symbols) 369 if lexer.tok == ':': 370 lexer.get_token() 371 # ConstantExpr 372 length = int(lexer.tok) 373 #print "length = ",length 374 self.append( FieldLength(length) ) 375 lexer.get_token() 376 377 class DeclarationSpecifiers(Node,Parser): 378 """ 379 """ 380 def __init__(self,*items): 381 Node.__init__(self,*items) 382 383 def __eq__(self,other): 384 " unordered (set/bag) equality " 385 if not isinstance(other,Node): 386 return 0 387 for i in range(len(self)): 388 if not self[i] in other: 389 return 0 390 for i in range(len(other)): 391 if not other[i] in self: 392 return 0 393 return 1 394 395 def parse(self,lexer,symbols): 396 self.parse_spec(lexer,symbols) 397 self.reverse() 398 399 def parse_spec(self,lexer,symbols): 400 typespec = None 401 while lexer.tok: 402 if isinstance( lexer.kind, TypeAlias ) or\ 403 isinstance( lexer.kind, BasicType ): 404 if typespec is not None: 405 self.parse_error(lexer ,"type already specified as %s"\ 406 %typespec ) 407 typespec=lexer.kind 408 self.append( lexer.kind ) 409 lexer.get_token() 410 elif isinstance( lexer.kind, Qualifier ): 411 self.append( lexer.kind ) 412 lexer.get_token() 413 elif isinstance( lexer.kind, StorageClass ): 414 self.append( lexer.kind ) 415 lexer.get_token() 416 elif lexer.tok=='struct': 417 lexer.get_token() 418 self.parse_struct(lexer,symbols) 419 break #? 420 elif lexer.tok=='union': 421 lexer.get_token() 422 self.parse_union(lexer,symbols) 423 break #? 424 elif lexer.tok=='enum': 425 lexer.get_token() 426 self.parse_enum(lexer,symbols) 427 break #? 428 elif lexer.kind is None: 429 # identifier 430 break 431 else: 432 break 433 434 def parse_struct(self,lexer,symbols): 435 if self.verbose: 436 print "%s.parse_struct()"%(self) 437 node = Struct() 438 node.parse(lexer,symbols) 439 _node = None 440 if node.tag: 441 _node = symbols.get_tag( node.tag ) 442 if _node is not None: 443 if not isinstance( _node, Struct ): 444 self.parse_error(lexer,"tag defined as wrong kind") 445 if len(node)>1: 446 if len(_node)>1: 447 self.parse_error(lexer,"tag already defined as %s"%_node) 448 #symbols.set_tag( node.tag, node ) 449 #else: 450 # refer to the previously defined struct 451 ##node = _node 452 #node = _node.clone() 453 if 0: 454 # refer to the previously defined struct 455 if len(node)==1: 456 _node = symbols.deep_get_tag( node.tag ) 457 if _node is not None: 458 node=_node 459 # But what about any future reference to the struct ? 460 if node.tag: 461 symbols.set_tag( node.tag, node ) 462 self.append( node ) 463 464 def parse_union(self,lexer,symbols): 465 if self.verbose: 466 print "%s.parse_union(%s)"%(self,node) 467 node = Union() 468 node.parse(lexer,symbols) 469 _node = None 470 if node.tag: 471 _node = symbols.get_tag( node.tag ) 472 if _node is not None: 473 if not isinstance( _node, Union ): 474 self.parse_error(lexer,"tag %s defined as wrong kind"%repr(node.tag)) 475 if len(node)>1: 476 if len(_node)>1: 477 self.parse_error(lexer,"tag already defined as %s"%_node) 478 #symbols.set_tag( node.tag, node ) 479 #else: 480 #node = _node 481 #if len(node)==1: 482 #_node = symbols.deep_get_tag( node.tag ) 483 #if _node is not None: 484 #node=_node 485 if node.tag: 486 symbols.set_tag( node.tag, node ) 487 self.append( node ) 488 489 def parse_enum(self,lexer,symbols): 490 if self.verbose: 491 print "%s.parse_enum(%s)"%(self,node) 492 node = Enum() 493 node.parse(lexer,symbols) 494 _node = None 495 if node.tag: 496 _node = symbols.get_tag( node.tag ) 497 if _node is not None: 498 if not isinstance( _node, Enum ): 499 self.parse_error(lexer,"tag defined as wrong kind") 500 if len(node)>1: 501 if len(_node)>1: 502 self.parse_error(lexer,"tag already defined as %s"%_node) 503 #symbols.set_tag( node.tag, node ) 504 #else: 505 #node = _node 506 #if len(node)==1: 507 #_node = symbols.deep_get_tag( node.tag ) 508 #if _node is not None: 509 #node=_node 510 if node.tag: 511 symbols.set_tag( node.tag, node ) 512 self.append( node ) 513 514 def is_typedef(self): 515 return self.find(Typedef) is not None 516 517 def needs_declarator(self): 518 for node in self: 519 if isinstance( node, Struct ): 520 return False 521 if isinstance( node, Enum ): 522 return False 523 if isinstance( node, Union ): 524 return False 525 return True 526 527 528 529 class TypeSpecifiers(DeclarationSpecifiers): 530 " used in ParameterDeclaration " 531 532 def parse_spec(self,lexer,symbols): 533 typespec = None 534 while lexer.tok: 535 if isinstance( lexer.kind, TypeAlias ) or\ 536 isinstance( lexer.kind, BasicType ): 537 if typespec is not None: 538 self.parse_error(lexer ,"type already specified as %s"\ 539 %typespec ) 540 typespec=lexer.kind 541 self.append( lexer.kind ) 542 lexer.get_token() 543 elif isinstance( lexer.kind, Qualifier ): 544 self.append( lexer.kind ) 545 lexer.get_token() 546 elif isinstance( lexer.kind, StorageClass ): 547 self.parse_error(lexer ,"'%s' cannot appear here"%lexer.tok ) 548 elif lexer.tok=='struct': 549 lexer.get_token() 550 self.parse_struct(lexer,symbols) 551 break #? 552 elif lexer.tok=='union': 553 lexer.get_token() 554 self.parse_union(lexer,symbols) 555 break #? 556 elif lexer.tok=='enum': 557 lexer.get_token() 558 self.parse_enum(lexer,symbols) 559 break #? 560 elif lexer.kind is None: 561 # identifier 562 break 563 else: 564 break 565 566 567 class Initializer(Node,Parser): 568 """ 569 """ 570 def __init__(self,*items): 571 Node.__init__(self,*items) 572 573 def parse(self,lexer,symbols): 574 self.parse_error(lexer,"not implemented") 575 576 577 class TypeAlias(Node): 578 " typedefed things " 579 580 def __init__(self,name,decl=None): 581 Node.__init__(self,name)#,decl) 582 self.name=name 583 self.decl=decl 584 585 586 class Declaration(Node,Parser): 587 """ 588 """ 589 def __init__(self,*items): 590 Node.__init__(self,*items) 591 #self.acted=False 592 593 def parse(self,lexer,symbols): 594 if not lexer.tok: 595 return 596 Parser.parse_enter(self,lexer) 597 declspec = DeclarationSpecifiers() 598 declspec.parse(lexer,symbols) 599 if len(declspec)==0: 600 if lexer.tok == ';': 601 lexer.get_token() 602 # empty declaration... 603 return 604 self.parse_error(lexer, 605 "expected specifiers, got '%s'"%lexer.tok ) 606 self.append(declspec) 607 while 1: 608 decl = Declarator() 609 decl.parse(lexer,symbols) 610 if len(decl)==0: 611 if declspec.needs_declarator(): 612 self.parse_error(lexer, 613 "expected declarator, got '%s'"%lexer.tok ) 614 self.append(decl) 615 ident = decl.ident 616 if ident is not None: 617 #if len(ident): 618 # install symbol 619 node = symbols[ident[0]] 620 if node is not None: 621 # we allow functions to be defined (as same) again 622 #print node.deepstr(),'\n', self.deepstr() 623 _node = node.clone() 624 _node.delete(Identifier) 625 _self = self.clone() 626 _self.delete(Identifier) 627 if _node != _self: 628 self.parse_error(lexer, 629 "\n%s\n already defined as \n%s\n"%\ 630 (self.deepstr(),node.deepstr())) 631 else: 632 if self.is_typedef(): 633 #lexer.mktypedef( ident[0], self ) 634 tp = TypeAlias(ident[0],decl) 635 lexer.mktypedef( ident[0], tp ) 636 else: 637 symbols[ident[0]] = self 638 if lexer.tok == '=': 639 # parse initializer 640 lexer.get_token() 641 init = Initializer() 642 init.parse(lexer,symbols) 643 ident.append( init ) # as in Enum 644 #else: struct, union or enum 645 if lexer.tok == ';': 646 # no more declarators 647 break 648 if lexer.tok == '{': 649 # ! ahhh, function body !!! 650 # sys.stderr.write( 651 # "WARNING: function body found at line %s\n"%lexer.lno ) 652 bcount = 1 653 while bcount: 654 lexer.get_brace_token() 655 if lexer.tok == '}': 656 bcount -= 1 657 if lexer.tok == '{': 658 bcount += 1 659 lexer.get_token() 660 Parser.parse_leave(self,lexer) 661 return 662 self.consume(lexer,',') 663 self.consume(lexer,';') 664 Parser.parse_leave(self,lexer) 665 666 def is_typedef(self): 667 spec=self[0] 668 assert isinstance(spec,DeclarationSpecifiers), self.deepstr() 669 return spec.is_typedef() 670 671 672 class ParameterDeclaration(Declaration): 673 """ 674 """ 675 def parse(self,lexer,symbols): 676 typespec = TypeSpecifiers() 677 typespec.parse(lexer,symbols) 678 self.append(typespec) 679 decl = AbstractDeclarator() 680 decl.parse(lexer,symbols) 681 self.append(decl) 682 ident = decl.ident 683 if ident is not None and ident[0]: 684 node = symbols[ident[0]] 685 if node is not None: 686 self.parse_error(lexer, 687 "%s already defined as %s"%(ident,node)) 688 else: 689 symbols[ident[0]] = self 690 691 692 class StructDeclaration(Declaration): 693 """ 694 """ 695 def parse(self,lexer,symbols): 696 if not lexer.tok: 697 return 698 declspec = DeclarationSpecifiers() 699 declspec.parse(lexer,symbols) 700 self.append(declspec) 701 if len(declspec)==0: 702 if lexer.tok == ';': 703 lexer.get_token() 704 # empty declaration... 705 return 706 self.parse_error(lexer, 707 "expected specifiers, got '%s'"%lexer.tok ) 708 while 1: 709 decl = StructDeclarator() 710 decl.parse(lexer,symbols) 711 if len(decl)==0: 712 self.parse_error(lexer, 713 "expected declarator, got '%s'"%lexer.tok ) 714 self.append(decl) 715 ident = decl.ident 716 if ident is not None: 717 node = symbols[ident[0]] 718 if node is not None: 719 self.parse_error(lexer , 720 "%s already defined as %s"%(ident,node)) 721 else: 722 if declspec.is_typedef(): 723 self.parse_error(lexer,"typedef in struct or union") 724 else: 725 symbols[ident[0]] = self 726 if lexer.tok == ';': 727 break 728 self.consume(lexer,',') 729 self.consume(lexer,';') 730 731 732 class TransUnit(Node,Parser): 733 """ 734 """ 735 def __init__(self,*items,**kw): 736 Node.__init__(self,*items,**kw) 737 738 def parse(self,s,verbose=0): 739 self.symbols = Symbols() 740 self.lexer = Lexer(s,verbose=verbose) #,host=__module__) 741 node = None 742 while self.lexer.tok: 743 node=Declaration() 744 node.parse(self.lexer,self.symbols) 745 #sys.stderr.write( "# line %s\n"%self.lexer.lno ) 746 if node: 747 self.append(node) 748 #node.psource() 749 #print node.deepstr(),'\n' 750 #node.act() 751 752 def strip(self,files): 753 " leave only the declarations from <files> " 754 i=0 755 while i<len(self): 756 if self[i].file in files: 757 i=i+1 758 else: 759 self.pop(i) 760 761 def strip_filter(self,cb): 762 " leave only the declarations such that cb(file) " 763 i=0 764 while i<len(self): 765 if cb(self[i].file): 766 i=i+1 767 else: 768 self.pop(i) 769 770 def assert_no_dups(self): 771 check={} 772 for node in self.nodes(): 773 assert not check.has_key(id(node)) 774 check[id(node)]=1 775 776 777 778 try: 779 import NoModule 780 import psyco 781 from psyco.classes import * 782 except ImportError: 783 class _psyco: 784 def jit(self): pass 785 def bind(self, f): pass 786 def proxy(self, f): return f 787 psyco = _psyco() 788 psyco.bind( Lexer.get_token ) 789 psyco.bind( Node ) 790 791 def run0(): 792 verbose = 0 793 if not sys.argv[1:]: 794 s = sys.stdin.read() 795 if sys.argv[1:]: 796 s = sys.argv[1] 797 #if sys.argv[2:]: 798 #verbose = int(sys.argv[2]) 799 if 0: 800 import profile 801 profile.run('TransUnit(s)','prof.out') 802 import pstats 803 p=pstats.Stats('prof.out') 804 p.strip_dirs().sort_stats(-1).print_stats() 805 else: 806 node = TransUnit(verbose = 1 ) 807 node.parse(s) 808 node.act(1,1,1) 809 810 def run1(): 811 cstr = "char *(*)() ," 812 node = AbstractDeclarator() 813 node.parse( Lexer(cstr,True), Symbols() ) 814 print node.deepstr() 815 816 if __name__=="__main__": 817 pass 818 819 820