1 #!/usr/bin/env python 2 # 3 # Copyright 2007 Neal Norwitz 4 # Portions Copyright 2007 Google Inc. 5 # 6 # Licensed under the Apache License, Version 2.0 (the "License"); 7 # you may not use this file except in compliance with the License. 8 # You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 18 """Generate an Abstract Syntax Tree (AST) for C++.""" 19 20 __author__ = 'nnorwitz (at] google.com (Neal Norwitz)' 21 22 23 # TODO: 24 # * Tokens should never be exported, need to convert to Nodes 25 # (return types, parameters, etc.) 26 # * Handle static class data for templatized classes 27 # * Handle casts (both C++ and C-style) 28 # * Handle conditions and loops (if/else, switch, for, while/do) 29 # 30 # TODO much, much later: 31 # * Handle #define 32 # * exceptions 33 34 35 try: 36 # Python 3.x 37 import builtins 38 except ImportError: 39 # Python 2.x 40 import __builtin__ as builtins 41 42 import sys 43 import traceback 44 45 from cpp import keywords 46 from cpp import tokenize 47 from cpp import utils 48 49 50 if not hasattr(builtins, 'reversed'): 51 # Support Python 2.3 and earlier. 52 def reversed(seq): 53 for i in range(len(seq)-1, -1, -1): 54 yield seq[i] 55 56 if not hasattr(builtins, 'next'): 57 # Support Python 2.5 and earlier. 58 def next(obj): 59 return obj.next() 60 61 62 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) 63 64 FUNCTION_NONE = 0x00 65 FUNCTION_CONST = 0x01 66 FUNCTION_VIRTUAL = 0x02 67 FUNCTION_PURE_VIRTUAL = 0x04 68 FUNCTION_CTOR = 0x08 69 FUNCTION_DTOR = 0x10 70 FUNCTION_ATTRIBUTE = 0x20 71 FUNCTION_UNKNOWN_ANNOTATION = 0x40 72 FUNCTION_THROW = 0x80 73 74 """ 75 These are currently unused. Should really handle these properly at some point. 76 77 TYPE_MODIFIER_INLINE = 0x010000 78 TYPE_MODIFIER_EXTERN = 0x020000 79 TYPE_MODIFIER_STATIC = 0x040000 80 TYPE_MODIFIER_CONST = 0x080000 81 TYPE_MODIFIER_REGISTER = 0x100000 82 TYPE_MODIFIER_VOLATILE = 0x200000 83 TYPE_MODIFIER_MUTABLE = 0x400000 84 85 TYPE_MODIFIER_MAP = { 86 'inline': TYPE_MODIFIER_INLINE, 87 'extern': TYPE_MODIFIER_EXTERN, 88 'static': TYPE_MODIFIER_STATIC, 89 'const': TYPE_MODIFIER_CONST, 90 'register': TYPE_MODIFIER_REGISTER, 91 'volatile': TYPE_MODIFIER_VOLATILE, 92 'mutable': TYPE_MODIFIER_MUTABLE, 93 } 94 """ 95 96 _INTERNAL_TOKEN = 'internal' 97 _NAMESPACE_POP = 'ns-pop' 98 99 100 # TODO(nnorwitz): use this as a singleton for templated_types, etc 101 # where we don't want to create a new empty dict each time. It is also const. 102 class _NullDict(object): 103 __contains__ = lambda self: False 104 keys = values = items = iterkeys = itervalues = iteritems = lambda self: () 105 106 107 # TODO(nnorwitz): move AST nodes into a separate module. 108 class Node(object): 109 """Base AST node.""" 110 111 def __init__(self, start, end): 112 self.start = start 113 self.end = end 114 115 def IsDeclaration(self): 116 """Returns bool if this node is a declaration.""" 117 return False 118 119 def IsDefinition(self): 120 """Returns bool if this node is a definition.""" 121 return False 122 123 def IsExportable(self): 124 """Returns bool if this node exportable from a header file.""" 125 return False 126 127 def Requires(self, node): 128 """Does this AST node require the definition of the node passed in?""" 129 return False 130 131 def XXX__str__(self): 132 return self._StringHelper(self.__class__.__name__, '') 133 134 def _StringHelper(self, name, suffix): 135 if not utils.DEBUG: 136 return '%s(%s)' % (name, suffix) 137 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) 138 139 def __repr__(self): 140 return str(self) 141 142 143 class Define(Node): 144 def __init__(self, start, end, name, definition): 145 Node.__init__(self, start, end) 146 self.name = name 147 self.definition = definition 148 149 def __str__(self): 150 value = '%s %s' % (self.name, self.definition) 151 return self._StringHelper(self.__class__.__name__, value) 152 153 154 class Include(Node): 155 def __init__(self, start, end, filename, system): 156 Node.__init__(self, start, end) 157 self.filename = filename 158 self.system = system 159 160 def __str__(self): 161 fmt = '"%s"' 162 if self.system: 163 fmt = '<%s>' 164 return self._StringHelper(self.__class__.__name__, fmt % self.filename) 165 166 167 class Goto(Node): 168 def __init__(self, start, end, label): 169 Node.__init__(self, start, end) 170 self.label = label 171 172 def __str__(self): 173 return self._StringHelper(self.__class__.__name__, str(self.label)) 174 175 176 class Expr(Node): 177 def __init__(self, start, end, expr): 178 Node.__init__(self, start, end) 179 self.expr = expr 180 181 def Requires(self, node): 182 # TODO(nnorwitz): impl. 183 return False 184 185 def __str__(self): 186 return self._StringHelper(self.__class__.__name__, str(self.expr)) 187 188 189 class Return(Expr): 190 pass 191 192 193 class Delete(Expr): 194 pass 195 196 197 class Friend(Expr): 198 def __init__(self, start, end, expr, namespace): 199 Expr.__init__(self, start, end, expr) 200 self.namespace = namespace[:] 201 202 203 class Using(Node): 204 def __init__(self, start, end, names): 205 Node.__init__(self, start, end) 206 self.names = names 207 208 def __str__(self): 209 return self._StringHelper(self.__class__.__name__, str(self.names)) 210 211 212 class Parameter(Node): 213 def __init__(self, start, end, name, parameter_type, default): 214 Node.__init__(self, start, end) 215 self.name = name 216 self.type = parameter_type 217 self.default = default 218 219 def Requires(self, node): 220 # TODO(nnorwitz): handle namespaces, etc. 221 return self.type.name == node.name 222 223 def __str__(self): 224 name = str(self.type) 225 suffix = '%s %s' % (name, self.name) 226 if self.default: 227 suffix += ' = ' + ''.join([d.name for d in self.default]) 228 return self._StringHelper(self.__class__.__name__, suffix) 229 230 231 class _GenericDeclaration(Node): 232 def __init__(self, start, end, name, namespace): 233 Node.__init__(self, start, end) 234 self.name = name 235 self.namespace = namespace[:] 236 237 def FullName(self): 238 prefix = '' 239 if self.namespace and self.namespace[-1]: 240 prefix = '::'.join(self.namespace) + '::' 241 return prefix + self.name 242 243 def _TypeStringHelper(self, suffix): 244 if self.namespace: 245 names = [n or '<anonymous>' for n in self.namespace] 246 suffix += ' in ' + '::'.join(names) 247 return self._StringHelper(self.__class__.__name__, suffix) 248 249 250 # TODO(nnorwitz): merge with Parameter in some way? 251 class VariableDeclaration(_GenericDeclaration): 252 def __init__(self, start, end, name, var_type, initial_value, namespace): 253 _GenericDeclaration.__init__(self, start, end, name, namespace) 254 self.type = var_type 255 self.initial_value = initial_value 256 257 def Requires(self, node): 258 # TODO(nnorwitz): handle namespaces, etc. 259 return self.type.name == node.name 260 261 def ToString(self): 262 """Return a string that tries to reconstitute the variable decl.""" 263 suffix = '%s %s' % (self.type, self.name) 264 if self.initial_value: 265 suffix += ' = ' + self.initial_value 266 return suffix 267 268 def __str__(self): 269 return self._StringHelper(self.__class__.__name__, self.ToString()) 270 271 272 class Typedef(_GenericDeclaration): 273 def __init__(self, start, end, name, alias, namespace): 274 _GenericDeclaration.__init__(self, start, end, name, namespace) 275 self.alias = alias 276 277 def IsDefinition(self): 278 return True 279 280 def IsExportable(self): 281 return True 282 283 def Requires(self, node): 284 # TODO(nnorwitz): handle namespaces, etc. 285 name = node.name 286 for token in self.alias: 287 if token is not None and name == token.name: 288 return True 289 return False 290 291 def __str__(self): 292 suffix = '%s, %s' % (self.name, self.alias) 293 return self._TypeStringHelper(suffix) 294 295 296 class _NestedType(_GenericDeclaration): 297 def __init__(self, start, end, name, fields, namespace): 298 _GenericDeclaration.__init__(self, start, end, name, namespace) 299 self.fields = fields 300 301 def IsDefinition(self): 302 return True 303 304 def IsExportable(self): 305 return True 306 307 def __str__(self): 308 suffix = '%s, {%s}' % (self.name, self.fields) 309 return self._TypeStringHelper(suffix) 310 311 312 class Union(_NestedType): 313 pass 314 315 316 class Enum(_NestedType): 317 pass 318 319 320 class Class(_GenericDeclaration): 321 def __init__(self, start, end, name, bases, templated_types, body, namespace): 322 _GenericDeclaration.__init__(self, start, end, name, namespace) 323 self.bases = bases 324 self.body = body 325 self.templated_types = templated_types 326 327 def IsDeclaration(self): 328 return self.bases is None and self.body is None 329 330 def IsDefinition(self): 331 return not self.IsDeclaration() 332 333 def IsExportable(self): 334 return not self.IsDeclaration() 335 336 def Requires(self, node): 337 # TODO(nnorwitz): handle namespaces, etc. 338 if self.bases: 339 for token_list in self.bases: 340 # TODO(nnorwitz): bases are tokens, do name comparision. 341 for token in token_list: 342 if token.name == node.name: 343 return True 344 # TODO(nnorwitz): search in body too. 345 return False 346 347 def __str__(self): 348 name = self.name 349 if self.templated_types: 350 name += '<%s>' % self.templated_types 351 suffix = '%s, %s, %s' % (name, self.bases, self.body) 352 return self._TypeStringHelper(suffix) 353 354 355 class Struct(Class): 356 pass 357 358 359 class Function(_GenericDeclaration): 360 def __init__(self, start, end, name, return_type, parameters, 361 modifiers, templated_types, body, namespace): 362 _GenericDeclaration.__init__(self, start, end, name, namespace) 363 converter = TypeConverter(namespace) 364 self.return_type = converter.CreateReturnType(return_type) 365 self.parameters = converter.ToParameters(parameters) 366 self.modifiers = modifiers 367 self.body = body 368 self.templated_types = templated_types 369 370 def IsDeclaration(self): 371 return self.body is None 372 373 def IsDefinition(self): 374 return self.body is not None 375 376 def IsExportable(self): 377 if self.return_type and 'static' in self.return_type.modifiers: 378 return False 379 return None not in self.namespace 380 381 def Requires(self, node): 382 if self.parameters: 383 # TODO(nnorwitz): parameters are tokens, do name comparision. 384 for p in self.parameters: 385 if p.name == node.name: 386 return True 387 # TODO(nnorwitz): search in body too. 388 return False 389 390 def __str__(self): 391 # TODO(nnorwitz): add templated_types. 392 suffix = ('%s %s(%s), 0x%02x, %s' % 393 (self.return_type, self.name, self.parameters, 394 self.modifiers, self.body)) 395 return self._TypeStringHelper(suffix) 396 397 398 class Method(Function): 399 def __init__(self, start, end, name, in_class, return_type, parameters, 400 modifiers, templated_types, body, namespace): 401 Function.__init__(self, start, end, name, return_type, parameters, 402 modifiers, templated_types, body, namespace) 403 # TODO(nnorwitz): in_class could also be a namespace which can 404 # mess up finding functions properly. 405 self.in_class = in_class 406 407 408 class Type(_GenericDeclaration): 409 """Type used for any variable (eg class, primitive, struct, etc).""" 410 411 def __init__(self, start, end, name, templated_types, modifiers, 412 reference, pointer, array): 413 """ 414 Args: 415 name: str name of main type 416 templated_types: [Class (Type?)] template type info between <> 417 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. 418 reference, pointer, array: bools 419 """ 420 _GenericDeclaration.__init__(self, start, end, name, []) 421 self.templated_types = templated_types 422 if not name and modifiers: 423 self.name = modifiers.pop() 424 self.modifiers = modifiers 425 self.reference = reference 426 self.pointer = pointer 427 self.array = array 428 429 def __str__(self): 430 prefix = '' 431 if self.modifiers: 432 prefix = ' '.join(self.modifiers) + ' ' 433 name = str(self.name) 434 if self.templated_types: 435 name += '<%s>' % self.templated_types 436 suffix = prefix + name 437 if self.reference: 438 suffix += '&' 439 if self.pointer: 440 suffix += '*' 441 if self.array: 442 suffix += '[]' 443 return self._TypeStringHelper(suffix) 444 445 # By definition, Is* are always False. A Type can only exist in 446 # some sort of variable declaration, parameter, or return value. 447 def IsDeclaration(self): 448 return False 449 450 def IsDefinition(self): 451 return False 452 453 def IsExportable(self): 454 return False 455 456 457 class TypeConverter(object): 458 459 def __init__(self, namespace_stack): 460 self.namespace_stack = namespace_stack 461 462 def _GetTemplateEnd(self, tokens, start): 463 count = 1 464 end = start 465 while 1: 466 token = tokens[end] 467 end += 1 468 if token.name == '<': 469 count += 1 470 elif token.name == '>': 471 count -= 1 472 if count == 0: 473 break 474 return tokens[start:end-1], end 475 476 def ToType(self, tokens): 477 """Convert [Token,...] to [Class(...), ] useful for base classes. 478 For example, code like class Foo : public Bar<x, y> { ... }; 479 the "Bar<x, y>" portion gets converted to an AST. 480 481 Returns: 482 [Class(...), ...] 483 """ 484 result = [] 485 name_tokens = [] 486 reference = pointer = array = False 487 488 def AddType(templated_types): 489 # Partition tokens into name and modifier tokens. 490 names = [] 491 modifiers = [] 492 for t in name_tokens: 493 if keywords.IsKeyword(t.name): 494 modifiers.append(t.name) 495 else: 496 names.append(t.name) 497 name = ''.join(names) 498 result.append(Type(name_tokens[0].start, name_tokens[-1].end, 499 name, templated_types, modifiers, 500 reference, pointer, array)) 501 del name_tokens[:] 502 503 i = 0 504 end = len(tokens) 505 while i < end: 506 token = tokens[i] 507 if token.name == '<': 508 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) 509 AddType(self.ToType(new_tokens)) 510 # If there is a comma after the template, we need to consume 511 # that here otherwise it becomes part of the name. 512 i = new_end 513 reference = pointer = array = False 514 elif token.name == ',': 515 AddType([]) 516 reference = pointer = array = False 517 elif token.name == '*': 518 pointer = True 519 elif token.name == '&': 520 reference = True 521 elif token.name == '[': 522 pointer = True 523 elif token.name == ']': 524 pass 525 else: 526 name_tokens.append(token) 527 i += 1 528 529 if name_tokens: 530 # No '<' in the tokens, just a simple name and no template. 531 AddType([]) 532 return result 533 534 def DeclarationToParts(self, parts, needs_name_removed): 535 name = None 536 default = [] 537 if needs_name_removed: 538 # Handle default (initial) values properly. 539 for i, t in enumerate(parts): 540 if t.name == '=': 541 default = parts[i+1:] 542 name = parts[i-1].name 543 if name == ']' and parts[i-2].name == '[': 544 name = parts[i-3].name 545 i -= 1 546 parts = parts[:i-1] 547 break 548 else: 549 if parts[-1].token_type == tokenize.NAME: 550 name = parts.pop().name 551 else: 552 # TODO(nnorwitz): this is a hack that happens for code like 553 # Register(Foo<T>); where it thinks this is a function call 554 # but it's actually a declaration. 555 name = '???' 556 modifiers = [] 557 type_name = [] 558 other_tokens = [] 559 templated_types = [] 560 i = 0 561 end = len(parts) 562 while i < end: 563 p = parts[i] 564 if keywords.IsKeyword(p.name): 565 modifiers.append(p.name) 566 elif p.name == '<': 567 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) 568 templated_types = self.ToType(templated_tokens) 569 i = new_end - 1 570 # Don't add a spurious :: to data members being initialized. 571 next_index = i + 1 572 if next_index < end and parts[next_index].name == '::': 573 i += 1 574 elif p.name in ('[', ']', '='): 575 # These are handled elsewhere. 576 other_tokens.append(p) 577 elif p.name not in ('*', '&', '>'): 578 # Ensure that names have a space between them. 579 if (type_name and type_name[-1].token_type == tokenize.NAME and 580 p.token_type == tokenize.NAME): 581 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) 582 type_name.append(p) 583 else: 584 other_tokens.append(p) 585 i += 1 586 type_name = ''.join([t.name for t in type_name]) 587 return name, type_name, templated_types, modifiers, default, other_tokens 588 589 def ToParameters(self, tokens): 590 if not tokens: 591 return [] 592 593 result = [] 594 name = type_name = '' 595 type_modifiers = [] 596 pointer = reference = array = False 597 first_token = None 598 default = [] 599 600 def AddParameter(): 601 if default: 602 del default[0] # Remove flag. 603 end = type_modifiers[-1].end 604 parts = self.DeclarationToParts(type_modifiers, True) 605 (name, type_name, templated_types, modifiers, 606 unused_default, unused_other_tokens) = parts 607 parameter_type = Type(first_token.start, first_token.end, 608 type_name, templated_types, modifiers, 609 reference, pointer, array) 610 p = Parameter(first_token.start, end, name, 611 parameter_type, default) 612 result.append(p) 613 614 template_count = 0 615 for s in tokens: 616 if not first_token: 617 first_token = s 618 if s.name == '<': 619 template_count += 1 620 elif s.name == '>': 621 template_count -= 1 622 if template_count > 0: 623 type_modifiers.append(s) 624 continue 625 626 if s.name == ',': 627 AddParameter() 628 name = type_name = '' 629 type_modifiers = [] 630 pointer = reference = array = False 631 first_token = None 632 default = [] 633 elif s.name == '*': 634 pointer = True 635 elif s.name == '&': 636 reference = True 637 elif s.name == '[': 638 array = True 639 elif s.name == ']': 640 pass # Just don't add to type_modifiers. 641 elif s.name == '=': 642 # Got a default value. Add any value (None) as a flag. 643 default.append(None) 644 elif default: 645 default.append(s) 646 else: 647 type_modifiers.append(s) 648 AddParameter() 649 return result 650 651 def CreateReturnType(self, return_type_seq): 652 if not return_type_seq: 653 return None 654 start = return_type_seq[0].start 655 end = return_type_seq[-1].end 656 _, name, templated_types, modifiers, default, other_tokens = \ 657 self.DeclarationToParts(return_type_seq, False) 658 names = [n.name for n in other_tokens] 659 reference = '&' in names 660 pointer = '*' in names 661 array = '[' in names 662 return Type(start, end, name, templated_types, modifiers, 663 reference, pointer, array) 664 665 def GetTemplateIndices(self, names): 666 # names is a list of strings. 667 start = names.index('<') 668 end = len(names) - 1 669 while end > 0: 670 if names[end] == '>': 671 break 672 end -= 1 673 return start, end+1 674 675 class AstBuilder(object): 676 def __init__(self, token_stream, filename, in_class='', visibility=None, 677 namespace_stack=[]): 678 self.tokens = token_stream 679 self.filename = filename 680 # TODO(nnorwitz): use a better data structure (deque) for the queue. 681 # Switching directions of the "queue" improved perf by about 25%. 682 # Using a deque should be even better since we access from both sides. 683 self.token_queue = [] 684 self.namespace_stack = namespace_stack[:] 685 self.in_class = in_class 686 if in_class is None: 687 self.in_class_name_only = None 688 else: 689 self.in_class_name_only = in_class.split('::')[-1] 690 self.visibility = visibility 691 self.in_function = False 692 self.current_token = None 693 # Keep the state whether we are currently handling a typedef or not. 694 self._handling_typedef = False 695 696 self.converter = TypeConverter(self.namespace_stack) 697 698 def HandleError(self, msg, token): 699 printable_queue = list(reversed(self.token_queue[-20:])) 700 sys.stderr.write('Got %s in %s @ %s %s\n' % 701 (msg, self.filename, token, printable_queue)) 702 703 def Generate(self): 704 while 1: 705 token = self._GetNextToken() 706 if not token: 707 break 708 709 # Get the next token. 710 self.current_token = token 711 712 # Dispatch on the next token type. 713 if token.token_type == _INTERNAL_TOKEN: 714 if token.name == _NAMESPACE_POP: 715 self.namespace_stack.pop() 716 continue 717 718 try: 719 result = self._GenerateOne(token) 720 if result is not None: 721 yield result 722 except: 723 self.HandleError('exception', token) 724 raise 725 726 def _CreateVariable(self, pos_token, name, type_name, type_modifiers, 727 ref_pointer_name_seq, templated_types, value=None): 728 reference = '&' in ref_pointer_name_seq 729 pointer = '*' in ref_pointer_name_seq 730 array = '[' in ref_pointer_name_seq 731 var_type = Type(pos_token.start, pos_token.end, type_name, 732 templated_types, type_modifiers, 733 reference, pointer, array) 734 return VariableDeclaration(pos_token.start, pos_token.end, 735 name, var_type, value, self.namespace_stack) 736 737 def _GenerateOne(self, token): 738 if token.token_type == tokenize.NAME: 739 if (keywords.IsKeyword(token.name) and 740 not keywords.IsBuiltinType(token.name)): 741 method = getattr(self, 'handle_' + token.name) 742 return method() 743 elif token.name == self.in_class_name_only: 744 # The token name is the same as the class, must be a ctor if 745 # there is a paren. Otherwise, it's the return type. 746 # Peek ahead to get the next token to figure out which. 747 next = self._GetNextToken() 748 self._AddBackToken(next) 749 if next.token_type == tokenize.SYNTAX and next.name == '(': 750 return self._GetMethod([token], FUNCTION_CTOR, None, True) 751 # Fall through--handle like any other method. 752 753 # Handle data or function declaration/definition. 754 syntax = tokenize.SYNTAX 755 temp_tokens, last_token = \ 756 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[') 757 temp_tokens.insert(0, token) 758 if last_token.name == '(': 759 # If there is an assignment before the paren, 760 # this is an expression, not a method. 761 expr = bool([e for e in temp_tokens if e.name == '=']) 762 if expr: 763 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') 764 temp_tokens.append(last_token) 765 temp_tokens.extend(new_temp) 766 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) 767 768 if last_token.name == '[': 769 # Handle array, this isn't a method, unless it's an operator. 770 # TODO(nnorwitz): keep the size somewhere. 771 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') 772 temp_tokens.append(last_token) 773 if temp_tokens[-2].name == 'operator': 774 temp_tokens.append(self._GetNextToken()) 775 else: 776 temp_tokens2, last_token = \ 777 self._GetVarTokensUpTo(tokenize.SYNTAX, ';') 778 temp_tokens.extend(temp_tokens2) 779 780 if last_token.name == ';': 781 # Handle data, this isn't a method. 782 parts = self.converter.DeclarationToParts(temp_tokens, True) 783 (name, type_name, templated_types, modifiers, default, 784 unused_other_tokens) = parts 785 786 t0 = temp_tokens[0] 787 names = [t.name for t in temp_tokens] 788 if templated_types: 789 start, end = self.converter.GetTemplateIndices(names) 790 names = names[:start] + names[end:] 791 default = ''.join([t.name for t in default]) 792 return self._CreateVariable(t0, name, type_name, modifiers, 793 names, templated_types, default) 794 if last_token.name == '{': 795 self._AddBackTokens(temp_tokens[1:]) 796 self._AddBackToken(last_token) 797 method_name = temp_tokens[0].name 798 method = getattr(self, 'handle_' + method_name, None) 799 if not method: 800 # Must be declaring a variable. 801 # TODO(nnorwitz): handle the declaration. 802 return None 803 return method() 804 return self._GetMethod(temp_tokens, 0, None, False) 805 elif token.token_type == tokenize.SYNTAX: 806 if token.name == '~' and self.in_class: 807 # Must be a dtor (probably not in method body). 808 token = self._GetNextToken() 809 # self.in_class can contain A::Name, but the dtor will only 810 # be Name. Make sure to compare against the right value. 811 if (token.token_type == tokenize.NAME and 812 token.name == self.in_class_name_only): 813 return self._GetMethod([token], FUNCTION_DTOR, None, True) 814 # TODO(nnorwitz): handle a lot more syntax. 815 elif token.token_type == tokenize.PREPROCESSOR: 816 # TODO(nnorwitz): handle more preprocessor directives. 817 # token starts with a #, so remove it and strip whitespace. 818 name = token.name[1:].lstrip() 819 if name.startswith('include'): 820 # Remove "include". 821 name = name[7:].strip() 822 assert name 823 # Handle #include \<newline> "header-on-second-line.h". 824 if name.startswith('\\'): 825 name = name[1:].strip() 826 assert name[0] in '<"', token 827 assert name[-1] in '>"', token 828 system = name[0] == '<' 829 filename = name[1:-1] 830 return Include(token.start, token.end, filename, system) 831 if name.startswith('define'): 832 # Remove "define". 833 name = name[6:].strip() 834 assert name 835 value = '' 836 for i, c in enumerate(name): 837 if c.isspace(): 838 value = name[i:].lstrip() 839 name = name[:i] 840 break 841 return Define(token.start, token.end, name, value) 842 if name.startswith('if') and name[2:3].isspace(): 843 condition = name[3:].strip() 844 if condition.startswith('0') or condition.startswith('(0)'): 845 self._SkipIf0Blocks() 846 return None 847 848 def _GetTokensUpTo(self, expected_token_type, expected_token): 849 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] 850 851 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): 852 last_token = self._GetNextToken() 853 tokens = [] 854 while (last_token.token_type != expected_token_type or 855 last_token.name not in expected_tokens): 856 tokens.append(last_token) 857 last_token = self._GetNextToken() 858 return tokens, last_token 859 860 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary. 861 def _IgnoreUpTo(self, token_type, token): 862 unused_tokens = self._GetTokensUpTo(token_type, token) 863 864 def _SkipIf0Blocks(self): 865 count = 1 866 while 1: 867 token = self._GetNextToken() 868 if token.token_type != tokenize.PREPROCESSOR: 869 continue 870 871 name = token.name[1:].lstrip() 872 if name.startswith('endif'): 873 count -= 1 874 if count == 0: 875 break 876 elif name.startswith('if'): 877 count += 1 878 879 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): 880 if GetNextToken is None: 881 GetNextToken = self._GetNextToken 882 # Assumes the current token is open_paren and we will consume 883 # and return up to the close_paren. 884 count = 1 885 token = GetNextToken() 886 while 1: 887 if token.token_type == tokenize.SYNTAX: 888 if token.name == open_paren: 889 count += 1 890 elif token.name == close_paren: 891 count -= 1 892 if count == 0: 893 break 894 yield token 895 token = GetNextToken() 896 yield token 897 898 def _GetParameters(self): 899 return self._GetMatchingChar('(', ')') 900 901 def GetScope(self): 902 return self._GetMatchingChar('{', '}') 903 904 def _GetNextToken(self): 905 if self.token_queue: 906 return self.token_queue.pop() 907 return next(self.tokens) 908 909 def _AddBackToken(self, token): 910 if token.whence == tokenize.WHENCE_STREAM: 911 token.whence = tokenize.WHENCE_QUEUE 912 self.token_queue.insert(0, token) 913 else: 914 assert token.whence == tokenize.WHENCE_QUEUE, token 915 self.token_queue.append(token) 916 917 def _AddBackTokens(self, tokens): 918 if tokens: 919 if tokens[-1].whence == tokenize.WHENCE_STREAM: 920 for token in tokens: 921 token.whence = tokenize.WHENCE_QUEUE 922 self.token_queue[:0] = reversed(tokens) 923 else: 924 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens 925 self.token_queue.extend(reversed(tokens)) 926 927 def GetName(self, seq=None): 928 """Returns ([tokens], next_token_info).""" 929 GetNextToken = self._GetNextToken 930 if seq is not None: 931 it = iter(seq) 932 GetNextToken = lambda: next(it) 933 next_token = GetNextToken() 934 tokens = [] 935 last_token_was_name = False 936 while (next_token.token_type == tokenize.NAME or 937 (next_token.token_type == tokenize.SYNTAX and 938 next_token.name in ('::', '<'))): 939 # Two NAMEs in a row means the identifier should terminate. 940 # It's probably some sort of variable declaration. 941 if last_token_was_name and next_token.token_type == tokenize.NAME: 942 break 943 last_token_was_name = next_token.token_type == tokenize.NAME 944 tokens.append(next_token) 945 # Handle templated names. 946 if next_token.name == '<': 947 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) 948 last_token_was_name = True 949 next_token = GetNextToken() 950 return tokens, next_token 951 952 def GetMethod(self, modifiers, templated_types): 953 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 954 assert len(return_type_and_name) >= 1 955 return self._GetMethod(return_type_and_name, modifiers, templated_types, 956 False) 957 958 def _GetMethod(self, return_type_and_name, modifiers, templated_types, 959 get_paren): 960 template_portion = None 961 if get_paren: 962 token = self._GetNextToken() 963 assert token.token_type == tokenize.SYNTAX, token 964 if token.name == '<': 965 # Handle templatized dtors. 966 template_portion = [token] 967 template_portion.extend(self._GetMatchingChar('<', '>')) 968 token = self._GetNextToken() 969 assert token.token_type == tokenize.SYNTAX, token 970 assert token.name == '(', token 971 972 name = return_type_and_name.pop() 973 # Handle templatized ctors. 974 if name.name == '>': 975 index = 1 976 while return_type_and_name[index].name != '<': 977 index += 1 978 template_portion = return_type_and_name[index:] + [name] 979 del return_type_and_name[index:] 980 name = return_type_and_name.pop() 981 elif name.name == ']': 982 rt = return_type_and_name 983 assert rt[-1].name == '[', return_type_and_name 984 assert rt[-2].name == 'operator', return_type_and_name 985 name_seq = return_type_and_name[-2:] 986 del return_type_and_name[-2:] 987 name = tokenize.Token(tokenize.NAME, 'operator[]', 988 name_seq[0].start, name.end) 989 # Get the open paren so _GetParameters() below works. 990 unused_open_paren = self._GetNextToken() 991 992 # TODO(nnorwitz): store template_portion. 993 return_type = return_type_and_name 994 indices = name 995 if return_type: 996 indices = return_type[0] 997 998 # Force ctor for templatized ctors. 999 if name.name == self.in_class and not modifiers: 1000 modifiers |= FUNCTION_CTOR 1001 parameters = list(self._GetParameters()) 1002 del parameters[-1] # Remove trailing ')'. 1003 1004 # Handling operator() is especially weird. 1005 if name.name == 'operator' and not parameters: 1006 token = self._GetNextToken() 1007 assert token.name == '(', token 1008 parameters = list(self._GetParameters()) 1009 del parameters[-1] # Remove trailing ')'. 1010 1011 token = self._GetNextToken() 1012 while token.token_type == tokenize.NAME: 1013 modifier_token = token 1014 token = self._GetNextToken() 1015 if modifier_token.name == 'const': 1016 modifiers |= FUNCTION_CONST 1017 elif modifier_token.name == '__attribute__': 1018 # TODO(nnorwitz): handle more __attribute__ details. 1019 modifiers |= FUNCTION_ATTRIBUTE 1020 assert token.name == '(', token 1021 # Consume everything between the (parens). 1022 unused_tokens = list(self._GetMatchingChar('(', ')')) 1023 token = self._GetNextToken() 1024 elif modifier_token.name == 'throw': 1025 modifiers |= FUNCTION_THROW 1026 assert token.name == '(', token 1027 # Consume everything between the (parens). 1028 unused_tokens = list(self._GetMatchingChar('(', ')')) 1029 token = self._GetNextToken() 1030 elif modifier_token.name == modifier_token.name.upper(): 1031 # HACK(nnorwitz): assume that all upper-case names 1032 # are some macro we aren't expanding. 1033 modifiers |= FUNCTION_UNKNOWN_ANNOTATION 1034 else: 1035 self.HandleError('unexpected token', modifier_token) 1036 1037 assert token.token_type == tokenize.SYNTAX, token 1038 # Handle ctor initializers. 1039 if token.name == ':': 1040 # TODO(nnorwitz): anything else to handle for initializer list? 1041 while token.name != ';' and token.name != '{': 1042 token = self._GetNextToken() 1043 1044 # Handle pointer to functions that are really data but look 1045 # like method declarations. 1046 if token.name == '(': 1047 if parameters[0].name == '*': 1048 # name contains the return type. 1049 name = parameters.pop() 1050 # parameters contains the name of the data. 1051 modifiers = [p.name for p in parameters] 1052 # Already at the ( to open the parameter list. 1053 function_parameters = list(self._GetMatchingChar('(', ')')) 1054 del function_parameters[-1] # Remove trailing ')'. 1055 # TODO(nnorwitz): store the function_parameters. 1056 token = self._GetNextToken() 1057 assert token.token_type == tokenize.SYNTAX, token 1058 assert token.name == ';', token 1059 return self._CreateVariable(indices, name.name, indices.name, 1060 modifiers, '', None) 1061 # At this point, we got something like: 1062 # return_type (type::*name_)(params); 1063 # This is a data member called name_ that is a function pointer. 1064 # With this code: void (sq_type::*field_)(string&); 1065 # We get: name=void return_type=[] parameters=sq_type ... field_ 1066 # TODO(nnorwitz): is return_type always empty? 1067 # TODO(nnorwitz): this isn't even close to being correct. 1068 # Just put in something so we don't crash and can move on. 1069 real_name = parameters[-1] 1070 modifiers = [p.name for p in self._GetParameters()] 1071 del modifiers[-1] # Remove trailing ')'. 1072 return self._CreateVariable(indices, real_name.name, indices.name, 1073 modifiers, '', None) 1074 1075 if token.name == '{': 1076 body = list(self.GetScope()) 1077 del body[-1] # Remove trailing '}'. 1078 else: 1079 body = None 1080 if token.name == '=': 1081 token = self._GetNextToken() 1082 assert token.token_type == tokenize.CONSTANT, token 1083 assert token.name == '0', token 1084 modifiers |= FUNCTION_PURE_VIRTUAL 1085 token = self._GetNextToken() 1086 1087 if token.name == '[': 1088 # TODO(nnorwitz): store tokens and improve parsing. 1089 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; 1090 tokens = list(self._GetMatchingChar('[', ']')) 1091 token = self._GetNextToken() 1092 1093 assert token.name == ';', (token, return_type_and_name, parameters) 1094 1095 # Looks like we got a method, not a function. 1096 if len(return_type) > 2 and return_type[-1].name == '::': 1097 return_type, in_class = \ 1098 self._GetReturnTypeAndClassName(return_type) 1099 return Method(indices.start, indices.end, name.name, in_class, 1100 return_type, parameters, modifiers, templated_types, 1101 body, self.namespace_stack) 1102 return Function(indices.start, indices.end, name.name, return_type, 1103 parameters, modifiers, templated_types, body, 1104 self.namespace_stack) 1105 1106 def _GetReturnTypeAndClassName(self, token_seq): 1107 # Splitting the return type from the class name in a method 1108 # can be tricky. For example, Return::Type::Is::Hard::To::Find(). 1109 # Where is the return type and where is the class name? 1110 # The heuristic used is to pull the last name as the class name. 1111 # This includes all the templated type info. 1112 # TODO(nnorwitz): if there is only One name like in the 1113 # example above, punt and assume the last bit is the class name. 1114 1115 # Ignore a :: prefix, if exists so we can find the first real name. 1116 i = 0 1117 if token_seq[0].name == '::': 1118 i = 1 1119 # Ignore a :: suffix, if exists. 1120 end = len(token_seq) - 1 1121 if token_seq[end-1].name == '::': 1122 end -= 1 1123 1124 # Make a copy of the sequence so we can append a sentinel 1125 # value. This is required for GetName will has to have some 1126 # terminating condition beyond the last name. 1127 seq_copy = token_seq[i:end] 1128 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) 1129 names = [] 1130 while i < end: 1131 # Iterate through the sequence parsing out each name. 1132 new_name, next = self.GetName(seq_copy[i:]) 1133 assert new_name, 'Got empty new_name, next=%s' % next 1134 # We got a pointer or ref. Add it to the name. 1135 if next and next.token_type == tokenize.SYNTAX: 1136 new_name.append(next) 1137 names.append(new_name) 1138 i += len(new_name) 1139 1140 # Now that we have the names, it's time to undo what we did. 1141 1142 # Remove the sentinel value. 1143 names[-1].pop() 1144 # Flatten the token sequence for the return type. 1145 return_type = [e for seq in names[:-1] for e in seq] 1146 # The class name is the last name. 1147 class_name = names[-1] 1148 return return_type, class_name 1149 1150 def handle_bool(self): 1151 pass 1152 1153 def handle_char(self): 1154 pass 1155 1156 def handle_int(self): 1157 pass 1158 1159 def handle_long(self): 1160 pass 1161 1162 def handle_short(self): 1163 pass 1164 1165 def handle_double(self): 1166 pass 1167 1168 def handle_float(self): 1169 pass 1170 1171 def handle_void(self): 1172 pass 1173 1174 def handle_wchar_t(self): 1175 pass 1176 1177 def handle_unsigned(self): 1178 pass 1179 1180 def handle_signed(self): 1181 pass 1182 1183 def _GetNestedType(self, ctor): 1184 name = None 1185 name_tokens, token = self.GetName() 1186 if name_tokens: 1187 name = ''.join([t.name for t in name_tokens]) 1188 1189 # Handle forward declarations. 1190 if token.token_type == tokenize.SYNTAX and token.name == ';': 1191 return ctor(token.start, token.end, name, None, 1192 self.namespace_stack) 1193 1194 if token.token_type == tokenize.NAME and self._handling_typedef: 1195 self._AddBackToken(token) 1196 return ctor(token.start, token.end, name, None, 1197 self.namespace_stack) 1198 1199 # Must be the type declaration. 1200 fields = list(self._GetMatchingChar('{', '}')) 1201 del fields[-1] # Remove trailing '}'. 1202 if token.token_type == tokenize.SYNTAX and token.name == '{': 1203 next = self._GetNextToken() 1204 new_type = ctor(token.start, token.end, name, fields, 1205 self.namespace_stack) 1206 # A name means this is an anonymous type and the name 1207 # is the variable declaration. 1208 if next.token_type != tokenize.NAME: 1209 return new_type 1210 name = new_type 1211 token = next 1212 1213 # Must be variable declaration using the type prefixed with keyword. 1214 assert token.token_type == tokenize.NAME, token 1215 return self._CreateVariable(token, token.name, name, [], '', None) 1216 1217 def handle_struct(self): 1218 # Special case the handling typedef/aliasing of structs here. 1219 # It would be a pain to handle in the class code. 1220 name_tokens, var_token = self.GetName() 1221 if name_tokens: 1222 next_token = self._GetNextToken() 1223 is_syntax = (var_token.token_type == tokenize.SYNTAX and 1224 var_token.name[0] in '*&') 1225 is_variable = (var_token.token_type == tokenize.NAME and 1226 next_token.name == ';') 1227 variable = var_token 1228 if is_syntax and not is_variable: 1229 variable = next_token 1230 temp = self._GetNextToken() 1231 if temp.token_type == tokenize.SYNTAX and temp.name == '(': 1232 # Handle methods declared to return a struct. 1233 t0 = name_tokens[0] 1234 struct = tokenize.Token(tokenize.NAME, 'struct', 1235 t0.start-7, t0.start-2) 1236 type_and_name = [struct] 1237 type_and_name.extend(name_tokens) 1238 type_and_name.extend((var_token, next_token)) 1239 return self._GetMethod(type_and_name, 0, None, False) 1240 assert temp.name == ';', (temp, name_tokens, var_token) 1241 if is_syntax or (is_variable and not self._handling_typedef): 1242 modifiers = ['struct'] 1243 type_name = ''.join([t.name for t in name_tokens]) 1244 position = name_tokens[0] 1245 return self._CreateVariable(position, variable.name, type_name, 1246 modifiers, var_token.name, None) 1247 name_tokens.extend((var_token, next_token)) 1248 self._AddBackTokens(name_tokens) 1249 else: 1250 self._AddBackToken(var_token) 1251 return self._GetClass(Struct, VISIBILITY_PUBLIC, None) 1252 1253 def handle_union(self): 1254 return self._GetNestedType(Union) 1255 1256 def handle_enum(self): 1257 return self._GetNestedType(Enum) 1258 1259 def handle_auto(self): 1260 # TODO(nnorwitz): warn about using auto? Probably not since it 1261 # will be reclaimed and useful for C++0x. 1262 pass 1263 1264 def handle_register(self): 1265 pass 1266 1267 def handle_const(self): 1268 pass 1269 1270 def handle_inline(self): 1271 pass 1272 1273 def handle_extern(self): 1274 pass 1275 1276 def handle_static(self): 1277 pass 1278 1279 def handle_virtual(self): 1280 # What follows must be a method. 1281 token = token2 = self._GetNextToken() 1282 if token.name == 'inline': 1283 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. 1284 token2 = self._GetNextToken() 1285 if token2.token_type == tokenize.SYNTAX and token2.name == '~': 1286 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) 1287 assert token.token_type == tokenize.NAME or token.name == '::', token 1288 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 1289 return_type_and_name.insert(0, token) 1290 if token2 is not token: 1291 return_type_and_name.insert(1, token2) 1292 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, 1293 None, False) 1294 1295 def handle_volatile(self): 1296 pass 1297 1298 def handle_mutable(self): 1299 pass 1300 1301 def handle_public(self): 1302 assert self.in_class 1303 self.visibility = VISIBILITY_PUBLIC 1304 1305 def handle_protected(self): 1306 assert self.in_class 1307 self.visibility = VISIBILITY_PROTECTED 1308 1309 def handle_private(self): 1310 assert self.in_class 1311 self.visibility = VISIBILITY_PRIVATE 1312 1313 def handle_friend(self): 1314 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1315 assert tokens 1316 t0 = tokens[0] 1317 return Friend(t0.start, t0.end, tokens, self.namespace_stack) 1318 1319 def handle_static_cast(self): 1320 pass 1321 1322 def handle_const_cast(self): 1323 pass 1324 1325 def handle_dynamic_cast(self): 1326 pass 1327 1328 def handle_reinterpret_cast(self): 1329 pass 1330 1331 def handle_new(self): 1332 pass 1333 1334 def handle_delete(self): 1335 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1336 assert tokens 1337 return Delete(tokens[0].start, tokens[0].end, tokens) 1338 1339 def handle_typedef(self): 1340 token = self._GetNextToken() 1341 if (token.token_type == tokenize.NAME and 1342 keywords.IsKeyword(token.name)): 1343 # Token must be struct/enum/union/class. 1344 method = getattr(self, 'handle_' + token.name) 1345 self._handling_typedef = True 1346 tokens = [method()] 1347 self._handling_typedef = False 1348 else: 1349 tokens = [token] 1350 1351 # Get the remainder of the typedef up to the semi-colon. 1352 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) 1353 1354 # TODO(nnorwitz): clean all this up. 1355 assert tokens 1356 name = tokens.pop() 1357 indices = name 1358 if tokens: 1359 indices = tokens[0] 1360 if not indices: 1361 indices = token 1362 if name.name == ')': 1363 # HACK(nnorwitz): Handle pointers to functions "properly". 1364 if (len(tokens) >= 4 and 1365 tokens[1].name == '(' and tokens[2].name == '*'): 1366 tokens.append(name) 1367 name = tokens[3] 1368 elif name.name == ']': 1369 # HACK(nnorwitz): Handle arrays properly. 1370 if len(tokens) >= 2: 1371 tokens.append(name) 1372 name = tokens[1] 1373 new_type = tokens 1374 if tokens and isinstance(tokens[0], tokenize.Token): 1375 new_type = self.converter.ToType(tokens)[0] 1376 return Typedef(indices.start, indices.end, name.name, 1377 new_type, self.namespace_stack) 1378 1379 def handle_typeid(self): 1380 pass # Not needed yet. 1381 1382 def handle_typename(self): 1383 pass # Not needed yet. 1384 1385 def _GetTemplatedTypes(self): 1386 result = {} 1387 tokens = list(self._GetMatchingChar('<', '>')) 1388 len_tokens = len(tokens) - 1 # Ignore trailing '>'. 1389 i = 0 1390 while i < len_tokens: 1391 key = tokens[i].name 1392 i += 1 1393 if keywords.IsKeyword(key) or key == ',': 1394 continue 1395 type_name = default = None 1396 if i < len_tokens: 1397 i += 1 1398 if tokens[i-1].name == '=': 1399 assert i < len_tokens, '%s %s' % (i, tokens) 1400 default, unused_next_token = self.GetName(tokens[i:]) 1401 i += len(default) 1402 else: 1403 if tokens[i-1].name != ',': 1404 # We got something like: Type variable. 1405 # Re-adjust the key (variable) and type_name (Type). 1406 key = tokens[i-1].name 1407 type_name = tokens[i-2] 1408 1409 result[key] = (type_name, default) 1410 return result 1411 1412 def handle_template(self): 1413 token = self._GetNextToken() 1414 assert token.token_type == tokenize.SYNTAX, token 1415 assert token.name == '<', token 1416 templated_types = self._GetTemplatedTypes() 1417 # TODO(nnorwitz): for now, just ignore the template params. 1418 token = self._GetNextToken() 1419 if token.token_type == tokenize.NAME: 1420 if token.name == 'class': 1421 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types) 1422 elif token.name == 'struct': 1423 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types) 1424 elif token.name == 'friend': 1425 return self.handle_friend() 1426 self._AddBackToken(token) 1427 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') 1428 tokens.append(last) 1429 self._AddBackTokens(tokens) 1430 if last.name == '(': 1431 return self.GetMethod(FUNCTION_NONE, templated_types) 1432 # Must be a variable definition. 1433 return None 1434 1435 def handle_true(self): 1436 pass # Nothing to do. 1437 1438 def handle_false(self): 1439 pass # Nothing to do. 1440 1441 def handle_asm(self): 1442 pass # Not needed yet. 1443 1444 def handle_class(self): 1445 return self._GetClass(Class, VISIBILITY_PRIVATE, None) 1446 1447 def _GetBases(self): 1448 # Get base classes. 1449 bases = [] 1450 while 1: 1451 token = self._GetNextToken() 1452 assert token.token_type == tokenize.NAME, token 1453 # TODO(nnorwitz): store kind of inheritance...maybe. 1454 if token.name not in ('public', 'protected', 'private'): 1455 # If inheritance type is not specified, it is private. 1456 # Just put the token back so we can form a name. 1457 # TODO(nnorwitz): it would be good to warn about this. 1458 self._AddBackToken(token) 1459 else: 1460 # Check for virtual inheritance. 1461 token = self._GetNextToken() 1462 if token.name != 'virtual': 1463 self._AddBackToken(token) 1464 else: 1465 # TODO(nnorwitz): store that we got virtual for this base. 1466 pass 1467 base, next_token = self.GetName() 1468 bases_ast = self.converter.ToType(base) 1469 assert len(bases_ast) == 1, bases_ast 1470 bases.append(bases_ast[0]) 1471 assert next_token.token_type == tokenize.SYNTAX, next_token 1472 if next_token.name == '{': 1473 token = next_token 1474 break 1475 # Support multiple inheritance. 1476 assert next_token.name == ',', next_token 1477 return bases, token 1478 1479 def _GetClass(self, class_type, visibility, templated_types): 1480 class_name = None 1481 class_token = self._GetNextToken() 1482 if class_token.token_type != tokenize.NAME: 1483 assert class_token.token_type == tokenize.SYNTAX, class_token 1484 token = class_token 1485 else: 1486 # Skip any macro (e.g. storage class specifiers) after the 1487 # 'class' keyword. 1488 next_token = self._GetNextToken() 1489 if next_token.token_type == tokenize.NAME: 1490 self._AddBackToken(next_token) 1491 else: 1492 self._AddBackTokens([class_token, next_token]) 1493 name_tokens, token = self.GetName() 1494 class_name = ''.join([t.name for t in name_tokens]) 1495 bases = None 1496 if token.token_type == tokenize.SYNTAX: 1497 if token.name == ';': 1498 # Forward declaration. 1499 return class_type(class_token.start, class_token.end, 1500 class_name, None, templated_types, None, 1501 self.namespace_stack) 1502 if token.name in '*&': 1503 # Inline forward declaration. Could be method or data. 1504 name_token = self._GetNextToken() 1505 next_token = self._GetNextToken() 1506 if next_token.name == ';': 1507 # Handle data 1508 modifiers = ['class'] 1509 return self._CreateVariable(class_token, name_token.name, 1510 class_name, 1511 modifiers, token.name, None) 1512 else: 1513 # Assume this is a method. 1514 tokens = (class_token, token, name_token, next_token) 1515 self._AddBackTokens(tokens) 1516 return self.GetMethod(FUNCTION_NONE, None) 1517 if token.name == ':': 1518 bases, token = self._GetBases() 1519 1520 body = None 1521 if token.token_type == tokenize.SYNTAX and token.name == '{': 1522 assert token.token_type == tokenize.SYNTAX, token 1523 assert token.name == '{', token 1524 1525 ast = AstBuilder(self.GetScope(), self.filename, class_name, 1526 visibility, self.namespace_stack) 1527 body = list(ast.Generate()) 1528 1529 if not self._handling_typedef: 1530 token = self._GetNextToken() 1531 if token.token_type != tokenize.NAME: 1532 assert token.token_type == tokenize.SYNTAX, token 1533 assert token.name == ';', token 1534 else: 1535 new_class = class_type(class_token.start, class_token.end, 1536 class_name, bases, None, 1537 body, self.namespace_stack) 1538 1539 modifiers = [] 1540 return self._CreateVariable(class_token, 1541 token.name, new_class, 1542 modifiers, token.name, None) 1543 else: 1544 if not self._handling_typedef: 1545 self.HandleError('non-typedef token', token) 1546 self._AddBackToken(token) 1547 1548 return class_type(class_token.start, class_token.end, class_name, 1549 bases, None, body, self.namespace_stack) 1550 1551 def handle_namespace(self): 1552 token = self._GetNextToken() 1553 # Support anonymous namespaces. 1554 name = None 1555 if token.token_type == tokenize.NAME: 1556 name = token.name 1557 token = self._GetNextToken() 1558 self.namespace_stack.append(name) 1559 assert token.token_type == tokenize.SYNTAX, token 1560 # Create an internal token that denotes when the namespace is complete. 1561 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, 1562 None, None) 1563 internal_token.whence = token.whence 1564 if token.name == '=': 1565 # TODO(nnorwitz): handle aliasing namespaces. 1566 name, next_token = self.GetName() 1567 assert next_token.name == ';', next_token 1568 self._AddBackToken(internal_token) 1569 else: 1570 assert token.name == '{', token 1571 tokens = list(self.GetScope()) 1572 # Replace the trailing } with the internal namespace pop token. 1573 tokens[-1] = internal_token 1574 # Handle namespace with nothing in it. 1575 self._AddBackTokens(tokens) 1576 return None 1577 1578 def handle_using(self): 1579 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1580 assert tokens 1581 return Using(tokens[0].start, tokens[0].end, tokens) 1582 1583 def handle_explicit(self): 1584 assert self.in_class 1585 # Nothing much to do. 1586 # TODO(nnorwitz): maybe verify the method name == class name. 1587 # This must be a ctor. 1588 return self.GetMethod(FUNCTION_CTOR, None) 1589 1590 def handle_this(self): 1591 pass # Nothing to do. 1592 1593 def handle_operator(self): 1594 # Pull off the next token(s?) and make that part of the method name. 1595 pass 1596 1597 def handle_sizeof(self): 1598 pass 1599 1600 def handle_case(self): 1601 pass 1602 1603 def handle_switch(self): 1604 pass 1605 1606 def handle_default(self): 1607 token = self._GetNextToken() 1608 assert token.token_type == tokenize.SYNTAX 1609 assert token.name == ':' 1610 1611 def handle_if(self): 1612 pass 1613 1614 def handle_else(self): 1615 pass 1616 1617 def handle_return(self): 1618 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1619 if not tokens: 1620 return Return(self.current_token.start, self.current_token.end, None) 1621 return Return(tokens[0].start, tokens[0].end, tokens) 1622 1623 def handle_goto(self): 1624 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1625 assert len(tokens) == 1, str(tokens) 1626 return Goto(tokens[0].start, tokens[0].end, tokens[0].name) 1627 1628 def handle_try(self): 1629 pass # Not needed yet. 1630 1631 def handle_catch(self): 1632 pass # Not needed yet. 1633 1634 def handle_throw(self): 1635 pass # Not needed yet. 1636 1637 def handle_while(self): 1638 pass 1639 1640 def handle_do(self): 1641 pass 1642 1643 def handle_for(self): 1644 pass 1645 1646 def handle_break(self): 1647 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1648 1649 def handle_continue(self): 1650 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1651 1652 1653 def BuilderFromSource(source, filename): 1654 """Utility method that returns an AstBuilder from source code. 1655 1656 Args: 1657 source: 'C++ source code' 1658 filename: 'file1' 1659 1660 Returns: 1661 AstBuilder 1662 """ 1663 return AstBuilder(tokenize.GetTokens(source), filename) 1664 1665 1666 def PrintIndentifiers(filename, should_print): 1667 """Prints all identifiers for a C++ source file. 1668 1669 Args: 1670 filename: 'file1' 1671 should_print: predicate with signature: bool Function(token) 1672 """ 1673 source = utils.ReadFile(filename, False) 1674 if source is None: 1675 sys.stderr.write('Unable to find: %s\n' % filename) 1676 return 1677 1678 #print('Processing %s' % actual_filename) 1679 builder = BuilderFromSource(source, filename) 1680 try: 1681 for node in builder.Generate(): 1682 if should_print(node): 1683 print(node.name) 1684 except KeyboardInterrupt: 1685 return 1686 except: 1687 pass 1688 1689 1690 def PrintAllIndentifiers(filenames, should_print): 1691 """Prints all identifiers for each C++ source file in filenames. 1692 1693 Args: 1694 filenames: ['file1', 'file2', ...] 1695 should_print: predicate with signature: bool Function(token) 1696 """ 1697 for path in filenames: 1698 PrintIndentifiers(path, should_print) 1699 1700 1701 def main(argv): 1702 for filename in argv[1:]: 1703 source = utils.ReadFile(filename) 1704 if source is None: 1705 continue 1706 1707 print('Processing %s' % filename) 1708 builder = BuilderFromSource(source, filename) 1709 try: 1710 entire_ast = filter(None, builder.Generate()) 1711 except KeyboardInterrupt: 1712 return 1713 except: 1714 # Already printed a warning, print the traceback and continue. 1715 traceback.print_exc() 1716 else: 1717 if utils.DEBUG: 1718 for ast in entire_ast: 1719 print(ast) 1720 1721 1722 if __name__ == '__main__': 1723 main(sys.argv) 1724