1 #!/usr/bin/env python 2 # 3 # Copyright 2007 Neal Norwitz 4 # Portions Copyright 2007 Google Inc. 5 # 6 # Licensed under the Apache License, Version 2.0 (the "License"); 7 # you may not use this file except in compliance with the License. 8 # You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 18 """Generate an Abstract Syntax Tree (AST) for C++.""" 19 20 __author__ = 'nnorwitz (at] google.com (Neal Norwitz)' 21 22 23 # TODO: 24 # * Tokens should never be exported, need to convert to Nodes 25 # (return types, parameters, etc.) 26 # * Handle static class data for templatized classes 27 # * Handle casts (both C++ and C-style) 28 # * Handle conditions and loops (if/else, switch, for, while/do) 29 # 30 # TODO much, much later: 31 # * Handle #define 32 # * exceptions 33 34 35 try: 36 # Python 3.x 37 import builtins 38 except ImportError: 39 # Python 2.x 40 import __builtin__ as builtins 41 42 import sys 43 import traceback 44 45 from cpp import keywords 46 from cpp import tokenize 47 from cpp import utils 48 49 50 if not hasattr(builtins, 'reversed'): 51 # Support Python 2.3 and earlier. 52 def reversed(seq): 53 for i in range(len(seq)-1, -1, -1): 54 yield seq[i] 55 56 if not hasattr(builtins, 'next'): 57 # Support Python 2.5 and earlier. 58 def next(obj): 59 return obj.next() 60 61 62 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) 63 64 FUNCTION_NONE = 0x00 65 FUNCTION_CONST = 0x01 66 FUNCTION_VIRTUAL = 0x02 67 FUNCTION_PURE_VIRTUAL = 0x04 68 FUNCTION_CTOR = 0x08 69 FUNCTION_DTOR = 0x10 70 FUNCTION_ATTRIBUTE = 0x20 71 FUNCTION_UNKNOWN_ANNOTATION = 0x40 72 FUNCTION_THROW = 0x80 73 FUNCTION_OVERRIDE = 0x100 74 75 """ 76 These are currently unused. Should really handle these properly at some point. 77 78 TYPE_MODIFIER_INLINE = 0x010000 79 TYPE_MODIFIER_EXTERN = 0x020000 80 TYPE_MODIFIER_STATIC = 0x040000 81 TYPE_MODIFIER_CONST = 0x080000 82 TYPE_MODIFIER_REGISTER = 0x100000 83 TYPE_MODIFIER_VOLATILE = 0x200000 84 TYPE_MODIFIER_MUTABLE = 0x400000 85 86 TYPE_MODIFIER_MAP = { 87 'inline': TYPE_MODIFIER_INLINE, 88 'extern': TYPE_MODIFIER_EXTERN, 89 'static': TYPE_MODIFIER_STATIC, 90 'const': TYPE_MODIFIER_CONST, 91 'register': TYPE_MODIFIER_REGISTER, 92 'volatile': TYPE_MODIFIER_VOLATILE, 93 'mutable': TYPE_MODIFIER_MUTABLE, 94 } 95 """ 96 97 _INTERNAL_TOKEN = 'internal' 98 _NAMESPACE_POP = 'ns-pop' 99 100 101 # TODO(nnorwitz): use this as a singleton for templated_types, etc 102 # where we don't want to create a new empty dict each time. It is also const. 103 class _NullDict(object): 104 __contains__ = lambda self: False 105 keys = values = items = iterkeys = itervalues = iteritems = lambda self: () 106 107 108 # TODO(nnorwitz): move AST nodes into a separate module. 109 class Node(object): 110 """Base AST node.""" 111 112 def __init__(self, start, end): 113 self.start = start 114 self.end = end 115 116 def IsDeclaration(self): 117 """Returns bool if this node is a declaration.""" 118 return False 119 120 def IsDefinition(self): 121 """Returns bool if this node is a definition.""" 122 return False 123 124 def IsExportable(self): 125 """Returns bool if this node exportable from a header file.""" 126 return False 127 128 def Requires(self, node): 129 """Does this AST node require the definition of the node passed in?""" 130 return False 131 132 def XXX__str__(self): 133 return self._StringHelper(self.__class__.__name__, '') 134 135 def _StringHelper(self, name, suffix): 136 if not utils.DEBUG: 137 return '%s(%s)' % (name, suffix) 138 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) 139 140 def __repr__(self): 141 return str(self) 142 143 144 class Define(Node): 145 def __init__(self, start, end, name, definition): 146 Node.__init__(self, start, end) 147 self.name = name 148 self.definition = definition 149 150 def __str__(self): 151 value = '%s %s' % (self.name, self.definition) 152 return self._StringHelper(self.__class__.__name__, value) 153 154 155 class Include(Node): 156 def __init__(self, start, end, filename, system): 157 Node.__init__(self, start, end) 158 self.filename = filename 159 self.system = system 160 161 def __str__(self): 162 fmt = '"%s"' 163 if self.system: 164 fmt = '<%s>' 165 return self._StringHelper(self.__class__.__name__, fmt % self.filename) 166 167 168 class Goto(Node): 169 def __init__(self, start, end, label): 170 Node.__init__(self, start, end) 171 self.label = label 172 173 def __str__(self): 174 return self._StringHelper(self.__class__.__name__, str(self.label)) 175 176 177 class Expr(Node): 178 def __init__(self, start, end, expr): 179 Node.__init__(self, start, end) 180 self.expr = expr 181 182 def Requires(self, node): 183 # TODO(nnorwitz): impl. 184 return False 185 186 def __str__(self): 187 return self._StringHelper(self.__class__.__name__, str(self.expr)) 188 189 190 class Return(Expr): 191 pass 192 193 194 class Delete(Expr): 195 pass 196 197 198 class Friend(Expr): 199 def __init__(self, start, end, expr, namespace): 200 Expr.__init__(self, start, end, expr) 201 self.namespace = namespace[:] 202 203 204 class Using(Node): 205 def __init__(self, start, end, names): 206 Node.__init__(self, start, end) 207 self.names = names 208 209 def __str__(self): 210 return self._StringHelper(self.__class__.__name__, str(self.names)) 211 212 213 class Parameter(Node): 214 def __init__(self, start, end, name, parameter_type, default): 215 Node.__init__(self, start, end) 216 self.name = name 217 self.type = parameter_type 218 self.default = default 219 220 def Requires(self, node): 221 # TODO(nnorwitz): handle namespaces, etc. 222 return self.type.name == node.name 223 224 def __str__(self): 225 name = str(self.type) 226 suffix = '%s %s' % (name, self.name) 227 if self.default: 228 suffix += ' = ' + ''.join([d.name for d in self.default]) 229 return self._StringHelper(self.__class__.__name__, suffix) 230 231 232 class _GenericDeclaration(Node): 233 def __init__(self, start, end, name, namespace): 234 Node.__init__(self, start, end) 235 self.name = name 236 self.namespace = namespace[:] 237 238 def FullName(self): 239 prefix = '' 240 if self.namespace and self.namespace[-1]: 241 prefix = '::'.join(self.namespace) + '::' 242 return prefix + self.name 243 244 def _TypeStringHelper(self, suffix): 245 if self.namespace: 246 names = [n or '<anonymous>' for n in self.namespace] 247 suffix += ' in ' + '::'.join(names) 248 return self._StringHelper(self.__class__.__name__, suffix) 249 250 251 # TODO(nnorwitz): merge with Parameter in some way? 252 class VariableDeclaration(_GenericDeclaration): 253 def __init__(self, start, end, name, var_type, initial_value, namespace): 254 _GenericDeclaration.__init__(self, start, end, name, namespace) 255 self.type = var_type 256 self.initial_value = initial_value 257 258 def Requires(self, node): 259 # TODO(nnorwitz): handle namespaces, etc. 260 return self.type.name == node.name 261 262 def ToString(self): 263 """Return a string that tries to reconstitute the variable decl.""" 264 suffix = '%s %s' % (self.type, self.name) 265 if self.initial_value: 266 suffix += ' = ' + self.initial_value 267 return suffix 268 269 def __str__(self): 270 return self._StringHelper(self.__class__.__name__, self.ToString()) 271 272 273 class Typedef(_GenericDeclaration): 274 def __init__(self, start, end, name, alias, namespace): 275 _GenericDeclaration.__init__(self, start, end, name, namespace) 276 self.alias = alias 277 278 def IsDefinition(self): 279 return True 280 281 def IsExportable(self): 282 return True 283 284 def Requires(self, node): 285 # TODO(nnorwitz): handle namespaces, etc. 286 name = node.name 287 for token in self.alias: 288 if token is not None and name == token.name: 289 return True 290 return False 291 292 def __str__(self): 293 suffix = '%s, %s' % (self.name, self.alias) 294 return self._TypeStringHelper(suffix) 295 296 297 class _NestedType(_GenericDeclaration): 298 def __init__(self, start, end, name, fields, namespace): 299 _GenericDeclaration.__init__(self, start, end, name, namespace) 300 self.fields = fields 301 302 def IsDefinition(self): 303 return True 304 305 def IsExportable(self): 306 return True 307 308 def __str__(self): 309 suffix = '%s, {%s}' % (self.name, self.fields) 310 return self._TypeStringHelper(suffix) 311 312 313 class Union(_NestedType): 314 pass 315 316 317 class Enum(_NestedType): 318 pass 319 320 321 class Class(_GenericDeclaration): 322 def __init__(self, start, end, name, bases, templated_types, body, namespace): 323 _GenericDeclaration.__init__(self, start, end, name, namespace) 324 self.bases = bases 325 self.body = body 326 self.templated_types = templated_types 327 328 def IsDeclaration(self): 329 return self.bases is None and self.body is None 330 331 def IsDefinition(self): 332 return not self.IsDeclaration() 333 334 def IsExportable(self): 335 return not self.IsDeclaration() 336 337 def Requires(self, node): 338 # TODO(nnorwitz): handle namespaces, etc. 339 if self.bases: 340 for token_list in self.bases: 341 # TODO(nnorwitz): bases are tokens, do name comparision. 342 for token in token_list: 343 if token.name == node.name: 344 return True 345 # TODO(nnorwitz): search in body too. 346 return False 347 348 def __str__(self): 349 name = self.name 350 if self.templated_types: 351 name += '<%s>' % self.templated_types 352 suffix = '%s, %s, %s' % (name, self.bases, self.body) 353 return self._TypeStringHelper(suffix) 354 355 356 class Struct(Class): 357 pass 358 359 360 class Function(_GenericDeclaration): 361 def __init__(self, start, end, name, return_type, parameters, 362 modifiers, templated_types, body, namespace): 363 _GenericDeclaration.__init__(self, start, end, name, namespace) 364 converter = TypeConverter(namespace) 365 self.return_type = converter.CreateReturnType(return_type) 366 self.parameters = converter.ToParameters(parameters) 367 self.modifiers = modifiers 368 self.body = body 369 self.templated_types = templated_types 370 371 def IsDeclaration(self): 372 return self.body is None 373 374 def IsDefinition(self): 375 return self.body is not None 376 377 def IsExportable(self): 378 if self.return_type and 'static' in self.return_type.modifiers: 379 return False 380 return None not in self.namespace 381 382 def Requires(self, node): 383 if self.parameters: 384 # TODO(nnorwitz): parameters are tokens, do name comparision. 385 for p in self.parameters: 386 if p.name == node.name: 387 return True 388 # TODO(nnorwitz): search in body too. 389 return False 390 391 def __str__(self): 392 # TODO(nnorwitz): add templated_types. 393 suffix = ('%s %s(%s), 0x%02x, %s' % 394 (self.return_type, self.name, self.parameters, 395 self.modifiers, self.body)) 396 return self._TypeStringHelper(suffix) 397 398 399 class Method(Function): 400 def __init__(self, start, end, name, in_class, return_type, parameters, 401 modifiers, templated_types, body, namespace): 402 Function.__init__(self, start, end, name, return_type, parameters, 403 modifiers, templated_types, body, namespace) 404 # TODO(nnorwitz): in_class could also be a namespace which can 405 # mess up finding functions properly. 406 self.in_class = in_class 407 408 409 class Type(_GenericDeclaration): 410 """Type used for any variable (eg class, primitive, struct, etc).""" 411 412 def __init__(self, start, end, name, templated_types, modifiers, 413 reference, pointer, array): 414 """ 415 Args: 416 name: str name of main type 417 templated_types: [Class (Type?)] template type info between <> 418 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. 419 reference, pointer, array: bools 420 """ 421 _GenericDeclaration.__init__(self, start, end, name, []) 422 self.templated_types = templated_types 423 if not name and modifiers: 424 self.name = modifiers.pop() 425 self.modifiers = modifiers 426 self.reference = reference 427 self.pointer = pointer 428 self.array = array 429 430 def __str__(self): 431 prefix = '' 432 if self.modifiers: 433 prefix = ' '.join(self.modifiers) + ' ' 434 name = str(self.name) 435 if self.templated_types: 436 name += '<%s>' % self.templated_types 437 suffix = prefix + name 438 if self.reference: 439 suffix += '&' 440 if self.pointer: 441 suffix += '*' 442 if self.array: 443 suffix += '[]' 444 return self._TypeStringHelper(suffix) 445 446 # By definition, Is* are always False. A Type can only exist in 447 # some sort of variable declaration, parameter, or return value. 448 def IsDeclaration(self): 449 return False 450 451 def IsDefinition(self): 452 return False 453 454 def IsExportable(self): 455 return False 456 457 458 class TypeConverter(object): 459 460 def __init__(self, namespace_stack): 461 self.namespace_stack = namespace_stack 462 463 def _GetTemplateEnd(self, tokens, start): 464 count = 1 465 end = start 466 while 1: 467 token = tokens[end] 468 end += 1 469 if token.name == '<': 470 count += 1 471 elif token.name == '>': 472 count -= 1 473 if count == 0: 474 break 475 return tokens[start:end-1], end 476 477 def ToType(self, tokens): 478 """Convert [Token,...] to [Class(...), ] useful for base classes. 479 For example, code like class Foo : public Bar<x, y> { ... }; 480 the "Bar<x, y>" portion gets converted to an AST. 481 482 Returns: 483 [Class(...), ...] 484 """ 485 result = [] 486 name_tokens = [] 487 reference = pointer = array = False 488 489 def AddType(templated_types): 490 # Partition tokens into name and modifier tokens. 491 names = [] 492 modifiers = [] 493 for t in name_tokens: 494 if keywords.IsKeyword(t.name): 495 modifiers.append(t.name) 496 else: 497 names.append(t.name) 498 name = ''.join(names) 499 if name_tokens: 500 result.append(Type(name_tokens[0].start, name_tokens[-1].end, 501 name, templated_types, modifiers, 502 reference, pointer, array)) 503 del name_tokens[:] 504 505 i = 0 506 end = len(tokens) 507 while i < end: 508 token = tokens[i] 509 if token.name == '<': 510 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) 511 AddType(self.ToType(new_tokens)) 512 # If there is a comma after the template, we need to consume 513 # that here otherwise it becomes part of the name. 514 i = new_end 515 reference = pointer = array = False 516 elif token.name == ',': 517 AddType([]) 518 reference = pointer = array = False 519 elif token.name == '*': 520 pointer = True 521 elif token.name == '&': 522 reference = True 523 elif token.name == '[': 524 pointer = True 525 elif token.name == ']': 526 pass 527 else: 528 name_tokens.append(token) 529 i += 1 530 531 if name_tokens: 532 # No '<' in the tokens, just a simple name and no template. 533 AddType([]) 534 return result 535 536 def DeclarationToParts(self, parts, needs_name_removed): 537 name = None 538 default = [] 539 if needs_name_removed: 540 # Handle default (initial) values properly. 541 for i, t in enumerate(parts): 542 if t.name == '=': 543 default = parts[i+1:] 544 name = parts[i-1].name 545 if name == ']' and parts[i-2].name == '[': 546 name = parts[i-3].name 547 i -= 1 548 parts = parts[:i-1] 549 break 550 else: 551 if parts[-1].token_type == tokenize.NAME: 552 name = parts.pop().name 553 else: 554 # TODO(nnorwitz): this is a hack that happens for code like 555 # Register(Foo<T>); where it thinks this is a function call 556 # but it's actually a declaration. 557 name = '???' 558 modifiers = [] 559 type_name = [] 560 other_tokens = [] 561 templated_types = [] 562 i = 0 563 end = len(parts) 564 while i < end: 565 p = parts[i] 566 if keywords.IsKeyword(p.name): 567 modifiers.append(p.name) 568 elif p.name == '<': 569 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) 570 templated_types = self.ToType(templated_tokens) 571 i = new_end - 1 572 # Don't add a spurious :: to data members being initialized. 573 next_index = i + 1 574 if next_index < end and parts[next_index].name == '::': 575 i += 1 576 elif p.name in ('[', ']', '='): 577 # These are handled elsewhere. 578 other_tokens.append(p) 579 elif p.name not in ('*', '&', '>'): 580 # Ensure that names have a space between them. 581 if (type_name and type_name[-1].token_type == tokenize.NAME and 582 p.token_type == tokenize.NAME): 583 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) 584 type_name.append(p) 585 else: 586 other_tokens.append(p) 587 i += 1 588 type_name = ''.join([t.name for t in type_name]) 589 return name, type_name, templated_types, modifiers, default, other_tokens 590 591 def ToParameters(self, tokens): 592 if not tokens: 593 return [] 594 595 result = [] 596 name = type_name = '' 597 type_modifiers = [] 598 pointer = reference = array = False 599 first_token = None 600 default = [] 601 602 def AddParameter(end): 603 if default: 604 del default[0] # Remove flag. 605 parts = self.DeclarationToParts(type_modifiers, True) 606 (name, type_name, templated_types, modifiers, 607 unused_default, unused_other_tokens) = parts 608 parameter_type = Type(first_token.start, first_token.end, 609 type_name, templated_types, modifiers, 610 reference, pointer, array) 611 p = Parameter(first_token.start, end, name, 612 parameter_type, default) 613 result.append(p) 614 615 template_count = 0 616 for s in tokens: 617 if not first_token: 618 first_token = s 619 if s.name == '<': 620 template_count += 1 621 elif s.name == '>': 622 template_count -= 1 623 if template_count > 0: 624 type_modifiers.append(s) 625 continue 626 627 if s.name == ',': 628 AddParameter(s.start) 629 name = type_name = '' 630 type_modifiers = [] 631 pointer = reference = array = False 632 first_token = None 633 default = [] 634 elif s.name == '*': 635 pointer = True 636 elif s.name == '&': 637 reference = True 638 elif s.name == '[': 639 array = True 640 elif s.name == ']': 641 pass # Just don't add to type_modifiers. 642 elif s.name == '=': 643 # Got a default value. Add any value (None) as a flag. 644 default.append(None) 645 elif default: 646 default.append(s) 647 else: 648 type_modifiers.append(s) 649 AddParameter(tokens[-1].end) 650 return result 651 652 def CreateReturnType(self, return_type_seq): 653 if not return_type_seq: 654 return None 655 start = return_type_seq[0].start 656 end = return_type_seq[-1].end 657 _, name, templated_types, modifiers, default, other_tokens = \ 658 self.DeclarationToParts(return_type_seq, False) 659 names = [n.name for n in other_tokens] 660 reference = '&' in names 661 pointer = '*' in names 662 array = '[' in names 663 return Type(start, end, name, templated_types, modifiers, 664 reference, pointer, array) 665 666 def GetTemplateIndices(self, names): 667 # names is a list of strings. 668 start = names.index('<') 669 end = len(names) - 1 670 while end > 0: 671 if names[end] == '>': 672 break 673 end -= 1 674 return start, end+1 675 676 class AstBuilder(object): 677 def __init__(self, token_stream, filename, in_class='', visibility=None, 678 namespace_stack=[]): 679 self.tokens = token_stream 680 self.filename = filename 681 # TODO(nnorwitz): use a better data structure (deque) for the queue. 682 # Switching directions of the "queue" improved perf by about 25%. 683 # Using a deque should be even better since we access from both sides. 684 self.token_queue = [] 685 self.namespace_stack = namespace_stack[:] 686 self.in_class = in_class 687 if in_class is None: 688 self.in_class_name_only = None 689 else: 690 self.in_class_name_only = in_class.split('::')[-1] 691 self.visibility = visibility 692 self.in_function = False 693 self.current_token = None 694 # Keep the state whether we are currently handling a typedef or not. 695 self._handling_typedef = False 696 697 self.converter = TypeConverter(self.namespace_stack) 698 699 def HandleError(self, msg, token): 700 printable_queue = list(reversed(self.token_queue[-20:])) 701 sys.stderr.write('Got %s in %s @ %s %s\n' % 702 (msg, self.filename, token, printable_queue)) 703 704 def Generate(self): 705 while 1: 706 token = self._GetNextToken() 707 if not token: 708 break 709 710 # Get the next token. 711 self.current_token = token 712 713 # Dispatch on the next token type. 714 if token.token_type == _INTERNAL_TOKEN: 715 if token.name == _NAMESPACE_POP: 716 self.namespace_stack.pop() 717 continue 718 719 try: 720 result = self._GenerateOne(token) 721 if result is not None: 722 yield result 723 except: 724 self.HandleError('exception', token) 725 raise 726 727 def _CreateVariable(self, pos_token, name, type_name, type_modifiers, 728 ref_pointer_name_seq, templated_types, value=None): 729 reference = '&' in ref_pointer_name_seq 730 pointer = '*' in ref_pointer_name_seq 731 array = '[' in ref_pointer_name_seq 732 var_type = Type(pos_token.start, pos_token.end, type_name, 733 templated_types, type_modifiers, 734 reference, pointer, array) 735 return VariableDeclaration(pos_token.start, pos_token.end, 736 name, var_type, value, self.namespace_stack) 737 738 def _GenerateOne(self, token): 739 if token.token_type == tokenize.NAME: 740 if (keywords.IsKeyword(token.name) and 741 not keywords.IsBuiltinType(token.name)): 742 method = getattr(self, 'handle_' + token.name) 743 return method() 744 elif token.name == self.in_class_name_only: 745 # The token name is the same as the class, must be a ctor if 746 # there is a paren. Otherwise, it's the return type. 747 # Peek ahead to get the next token to figure out which. 748 next = self._GetNextToken() 749 self._AddBackToken(next) 750 if next.token_type == tokenize.SYNTAX and next.name == '(': 751 return self._GetMethod([token], FUNCTION_CTOR, None, True) 752 # Fall through--handle like any other method. 753 754 # Handle data or function declaration/definition. 755 syntax = tokenize.SYNTAX 756 temp_tokens, last_token = \ 757 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[') 758 temp_tokens.insert(0, token) 759 if last_token.name == '(': 760 # If there is an assignment before the paren, 761 # this is an expression, not a method. 762 expr = bool([e for e in temp_tokens if e.name == '=']) 763 if expr: 764 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') 765 temp_tokens.append(last_token) 766 temp_tokens.extend(new_temp) 767 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) 768 769 if last_token.name == '[': 770 # Handle array, this isn't a method, unless it's an operator. 771 # TODO(nnorwitz): keep the size somewhere. 772 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') 773 temp_tokens.append(last_token) 774 if temp_tokens[-2].name == 'operator': 775 temp_tokens.append(self._GetNextToken()) 776 else: 777 temp_tokens2, last_token = \ 778 self._GetVarTokensUpTo(tokenize.SYNTAX, ';') 779 temp_tokens.extend(temp_tokens2) 780 781 if last_token.name == ';': 782 # Handle data, this isn't a method. 783 parts = self.converter.DeclarationToParts(temp_tokens, True) 784 (name, type_name, templated_types, modifiers, default, 785 unused_other_tokens) = parts 786 787 t0 = temp_tokens[0] 788 names = [t.name for t in temp_tokens] 789 if templated_types: 790 start, end = self.converter.GetTemplateIndices(names) 791 names = names[:start] + names[end:] 792 default = ''.join([t.name for t in default]) 793 return self._CreateVariable(t0, name, type_name, modifiers, 794 names, templated_types, default) 795 if last_token.name == '{': 796 self._AddBackTokens(temp_tokens[1:]) 797 self._AddBackToken(last_token) 798 method_name = temp_tokens[0].name 799 method = getattr(self, 'handle_' + method_name, None) 800 if not method: 801 # Must be declaring a variable. 802 # TODO(nnorwitz): handle the declaration. 803 return None 804 return method() 805 return self._GetMethod(temp_tokens, 0, None, False) 806 elif token.token_type == tokenize.SYNTAX: 807 if token.name == '~' and self.in_class: 808 # Must be a dtor (probably not in method body). 809 token = self._GetNextToken() 810 # self.in_class can contain A::Name, but the dtor will only 811 # be Name. Make sure to compare against the right value. 812 if (token.token_type == tokenize.NAME and 813 token.name == self.in_class_name_only): 814 return self._GetMethod([token], FUNCTION_DTOR, None, True) 815 # TODO(nnorwitz): handle a lot more syntax. 816 elif token.token_type == tokenize.PREPROCESSOR: 817 # TODO(nnorwitz): handle more preprocessor directives. 818 # token starts with a #, so remove it and strip whitespace. 819 name = token.name[1:].lstrip() 820 if name.startswith('include'): 821 # Remove "include". 822 name = name[7:].strip() 823 assert name 824 # Handle #include \<newline> "header-on-second-line.h". 825 if name.startswith('\\'): 826 name = name[1:].strip() 827 assert name[0] in '<"', token 828 assert name[-1] in '>"', token 829 system = name[0] == '<' 830 filename = name[1:-1] 831 return Include(token.start, token.end, filename, system) 832 if name.startswith('define'): 833 # Remove "define". 834 name = name[6:].strip() 835 assert name 836 value = '' 837 for i, c in enumerate(name): 838 if c.isspace(): 839 value = name[i:].lstrip() 840 name = name[:i] 841 break 842 return Define(token.start, token.end, name, value) 843 if name.startswith('if') and name[2:3].isspace(): 844 condition = name[3:].strip() 845 if condition.startswith('0') or condition.startswith('(0)'): 846 self._SkipIf0Blocks() 847 return None 848 849 def _GetTokensUpTo(self, expected_token_type, expected_token): 850 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] 851 852 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): 853 last_token = self._GetNextToken() 854 tokens = [] 855 while (last_token.token_type != expected_token_type or 856 last_token.name not in expected_tokens): 857 tokens.append(last_token) 858 last_token = self._GetNextToken() 859 return tokens, last_token 860 861 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary. 862 def _IgnoreUpTo(self, token_type, token): 863 unused_tokens = self._GetTokensUpTo(token_type, token) 864 865 def _SkipIf0Blocks(self): 866 count = 1 867 while 1: 868 token = self._GetNextToken() 869 if token.token_type != tokenize.PREPROCESSOR: 870 continue 871 872 name = token.name[1:].lstrip() 873 if name.startswith('endif'): 874 count -= 1 875 if count == 0: 876 break 877 elif name.startswith('if'): 878 count += 1 879 880 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): 881 if GetNextToken is None: 882 GetNextToken = self._GetNextToken 883 # Assumes the current token is open_paren and we will consume 884 # and return up to the close_paren. 885 count = 1 886 token = GetNextToken() 887 while 1: 888 if token.token_type == tokenize.SYNTAX: 889 if token.name == open_paren: 890 count += 1 891 elif token.name == close_paren: 892 count -= 1 893 if count == 0: 894 break 895 yield token 896 token = GetNextToken() 897 yield token 898 899 def _GetParameters(self): 900 return self._GetMatchingChar('(', ')') 901 902 def GetScope(self): 903 return self._GetMatchingChar('{', '}') 904 905 def _GetNextToken(self): 906 if self.token_queue: 907 return self.token_queue.pop() 908 return next(self.tokens) 909 910 def _AddBackToken(self, token): 911 if token.whence == tokenize.WHENCE_STREAM: 912 token.whence = tokenize.WHENCE_QUEUE 913 self.token_queue.insert(0, token) 914 else: 915 assert token.whence == tokenize.WHENCE_QUEUE, token 916 self.token_queue.append(token) 917 918 def _AddBackTokens(self, tokens): 919 if tokens: 920 if tokens[-1].whence == tokenize.WHENCE_STREAM: 921 for token in tokens: 922 token.whence = tokenize.WHENCE_QUEUE 923 self.token_queue[:0] = reversed(tokens) 924 else: 925 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens 926 self.token_queue.extend(reversed(tokens)) 927 928 def GetName(self, seq=None): 929 """Returns ([tokens], next_token_info).""" 930 GetNextToken = self._GetNextToken 931 if seq is not None: 932 it = iter(seq) 933 GetNextToken = lambda: next(it) 934 next_token = GetNextToken() 935 tokens = [] 936 last_token_was_name = False 937 while (next_token.token_type == tokenize.NAME or 938 (next_token.token_type == tokenize.SYNTAX and 939 next_token.name in ('::', '<'))): 940 # Two NAMEs in a row means the identifier should terminate. 941 # It's probably some sort of variable declaration. 942 if last_token_was_name and next_token.token_type == tokenize.NAME: 943 break 944 last_token_was_name = next_token.token_type == tokenize.NAME 945 tokens.append(next_token) 946 # Handle templated names. 947 if next_token.name == '<': 948 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) 949 last_token_was_name = True 950 next_token = GetNextToken() 951 return tokens, next_token 952 953 def GetMethod(self, modifiers, templated_types): 954 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 955 assert len(return_type_and_name) >= 1 956 return self._GetMethod(return_type_and_name, modifiers, templated_types, 957 False) 958 959 def _GetMethod(self, return_type_and_name, modifiers, templated_types, 960 get_paren): 961 template_portion = None 962 if get_paren: 963 token = self._GetNextToken() 964 assert token.token_type == tokenize.SYNTAX, token 965 if token.name == '<': 966 # Handle templatized dtors. 967 template_portion = [token] 968 template_portion.extend(self._GetMatchingChar('<', '>')) 969 token = self._GetNextToken() 970 assert token.token_type == tokenize.SYNTAX, token 971 assert token.name == '(', token 972 973 name = return_type_and_name.pop() 974 # Handle templatized ctors. 975 if name.name == '>': 976 index = 1 977 while return_type_and_name[index].name != '<': 978 index += 1 979 template_portion = return_type_and_name[index:] + [name] 980 del return_type_and_name[index:] 981 name = return_type_and_name.pop() 982 elif name.name == ']': 983 rt = return_type_and_name 984 assert rt[-1].name == '[', return_type_and_name 985 assert rt[-2].name == 'operator', return_type_and_name 986 name_seq = return_type_and_name[-2:] 987 del return_type_and_name[-2:] 988 name = tokenize.Token(tokenize.NAME, 'operator[]', 989 name_seq[0].start, name.end) 990 # Get the open paren so _GetParameters() below works. 991 unused_open_paren = self._GetNextToken() 992 993 # TODO(nnorwitz): store template_portion. 994 return_type = return_type_and_name 995 indices = name 996 if return_type: 997 indices = return_type[0] 998 999 # Force ctor for templatized ctors. 1000 if name.name == self.in_class and not modifiers: 1001 modifiers |= FUNCTION_CTOR 1002 parameters = list(self._GetParameters()) 1003 del parameters[-1] # Remove trailing ')'. 1004 1005 # Handling operator() is especially weird. 1006 if name.name == 'operator' and not parameters: 1007 token = self._GetNextToken() 1008 assert token.name == '(', token 1009 parameters = list(self._GetParameters()) 1010 del parameters[-1] # Remove trailing ')'. 1011 1012 token = self._GetNextToken() 1013 while token.token_type == tokenize.NAME: 1014 modifier_token = token 1015 token = self._GetNextToken() 1016 if modifier_token.name == 'const': 1017 modifiers |= FUNCTION_CONST 1018 elif modifier_token.name == '__attribute__': 1019 # TODO(nnorwitz): handle more __attribute__ details. 1020 modifiers |= FUNCTION_ATTRIBUTE 1021 assert token.name == '(', token 1022 # Consume everything between the (parens). 1023 unused_tokens = list(self._GetMatchingChar('(', ')')) 1024 token = self._GetNextToken() 1025 elif modifier_token.name == 'throw': 1026 modifiers |= FUNCTION_THROW 1027 assert token.name == '(', token 1028 # Consume everything between the (parens). 1029 unused_tokens = list(self._GetMatchingChar('(', ')')) 1030 token = self._GetNextToken() 1031 elif modifier_token.name == 'override': 1032 modifiers |= FUNCTION_OVERRIDE 1033 elif modifier_token.name == modifier_token.name.upper(): 1034 # HACK(nnorwitz): assume that all upper-case names 1035 # are some macro we aren't expanding. 1036 modifiers |= FUNCTION_UNKNOWN_ANNOTATION 1037 else: 1038 self.HandleError('unexpected token', modifier_token) 1039 1040 assert token.token_type == tokenize.SYNTAX, token 1041 # Handle ctor initializers. 1042 if token.name == ':': 1043 # TODO(nnorwitz): anything else to handle for initializer list? 1044 while token.name != ';' and token.name != '{': 1045 token = self._GetNextToken() 1046 1047 # Handle pointer to functions that are really data but look 1048 # like method declarations. 1049 if token.name == '(': 1050 if parameters[0].name == '*': 1051 # name contains the return type. 1052 name = parameters.pop() 1053 # parameters contains the name of the data. 1054 modifiers = [p.name for p in parameters] 1055 # Already at the ( to open the parameter list. 1056 function_parameters = list(self._GetMatchingChar('(', ')')) 1057 del function_parameters[-1] # Remove trailing ')'. 1058 # TODO(nnorwitz): store the function_parameters. 1059 token = self._GetNextToken() 1060 assert token.token_type == tokenize.SYNTAX, token 1061 assert token.name == ';', token 1062 return self._CreateVariable(indices, name.name, indices.name, 1063 modifiers, '', None) 1064 # At this point, we got something like: 1065 # return_type (type::*name_)(params); 1066 # This is a data member called name_ that is a function pointer. 1067 # With this code: void (sq_type::*field_)(string&); 1068 # We get: name=void return_type=[] parameters=sq_type ... field_ 1069 # TODO(nnorwitz): is return_type always empty? 1070 # TODO(nnorwitz): this isn't even close to being correct. 1071 # Just put in something so we don't crash and can move on. 1072 real_name = parameters[-1] 1073 modifiers = [p.name for p in self._GetParameters()] 1074 del modifiers[-1] # Remove trailing ')'. 1075 return self._CreateVariable(indices, real_name.name, indices.name, 1076 modifiers, '', None) 1077 1078 if token.name == '{': 1079 body = list(self.GetScope()) 1080 del body[-1] # Remove trailing '}'. 1081 else: 1082 body = None 1083 if token.name == '=': 1084 token = self._GetNextToken() 1085 1086 if token.name == 'default' or token.name == 'delete': 1087 # Ignore explicitly defaulted and deleted special members 1088 # in C++11. 1089 token = self._GetNextToken() 1090 else: 1091 # Handle pure-virtual declarations. 1092 assert token.token_type == tokenize.CONSTANT, token 1093 assert token.name == '0', token 1094 modifiers |= FUNCTION_PURE_VIRTUAL 1095 token = self._GetNextToken() 1096 1097 if token.name == '[': 1098 # TODO(nnorwitz): store tokens and improve parsing. 1099 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; 1100 tokens = list(self._GetMatchingChar('[', ']')) 1101 token = self._GetNextToken() 1102 1103 assert token.name == ';', (token, return_type_and_name, parameters) 1104 1105 # Looks like we got a method, not a function. 1106 if len(return_type) > 2 and return_type[-1].name == '::': 1107 return_type, in_class = \ 1108 self._GetReturnTypeAndClassName(return_type) 1109 return Method(indices.start, indices.end, name.name, in_class, 1110 return_type, parameters, modifiers, templated_types, 1111 body, self.namespace_stack) 1112 return Function(indices.start, indices.end, name.name, return_type, 1113 parameters, modifiers, templated_types, body, 1114 self.namespace_stack) 1115 1116 def _GetReturnTypeAndClassName(self, token_seq): 1117 # Splitting the return type from the class name in a method 1118 # can be tricky. For example, Return::Type::Is::Hard::To::Find(). 1119 # Where is the return type and where is the class name? 1120 # The heuristic used is to pull the last name as the class name. 1121 # This includes all the templated type info. 1122 # TODO(nnorwitz): if there is only One name like in the 1123 # example above, punt and assume the last bit is the class name. 1124 1125 # Ignore a :: prefix, if exists so we can find the first real name. 1126 i = 0 1127 if token_seq[0].name == '::': 1128 i = 1 1129 # Ignore a :: suffix, if exists. 1130 end = len(token_seq) - 1 1131 if token_seq[end-1].name == '::': 1132 end -= 1 1133 1134 # Make a copy of the sequence so we can append a sentinel 1135 # value. This is required for GetName will has to have some 1136 # terminating condition beyond the last name. 1137 seq_copy = token_seq[i:end] 1138 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) 1139 names = [] 1140 while i < end: 1141 # Iterate through the sequence parsing out each name. 1142 new_name, next = self.GetName(seq_copy[i:]) 1143 assert new_name, 'Got empty new_name, next=%s' % next 1144 # We got a pointer or ref. Add it to the name. 1145 if next and next.token_type == tokenize.SYNTAX: 1146 new_name.append(next) 1147 names.append(new_name) 1148 i += len(new_name) 1149 1150 # Now that we have the names, it's time to undo what we did. 1151 1152 # Remove the sentinel value. 1153 names[-1].pop() 1154 # Flatten the token sequence for the return type. 1155 return_type = [e for seq in names[:-1] for e in seq] 1156 # The class name is the last name. 1157 class_name = names[-1] 1158 return return_type, class_name 1159 1160 def handle_bool(self): 1161 pass 1162 1163 def handle_char(self): 1164 pass 1165 1166 def handle_int(self): 1167 pass 1168 1169 def handle_long(self): 1170 pass 1171 1172 def handle_short(self): 1173 pass 1174 1175 def handle_double(self): 1176 pass 1177 1178 def handle_float(self): 1179 pass 1180 1181 def handle_void(self): 1182 pass 1183 1184 def handle_wchar_t(self): 1185 pass 1186 1187 def handle_unsigned(self): 1188 pass 1189 1190 def handle_signed(self): 1191 pass 1192 1193 def _GetNestedType(self, ctor): 1194 name = None 1195 name_tokens, token = self.GetName() 1196 if name_tokens: 1197 name = ''.join([t.name for t in name_tokens]) 1198 1199 # Handle forward declarations. 1200 if token.token_type == tokenize.SYNTAX and token.name == ';': 1201 return ctor(token.start, token.end, name, None, 1202 self.namespace_stack) 1203 1204 if token.token_type == tokenize.NAME and self._handling_typedef: 1205 self._AddBackToken(token) 1206 return ctor(token.start, token.end, name, None, 1207 self.namespace_stack) 1208 1209 # Must be the type declaration. 1210 fields = list(self._GetMatchingChar('{', '}')) 1211 del fields[-1] # Remove trailing '}'. 1212 if token.token_type == tokenize.SYNTAX and token.name == '{': 1213 next = self._GetNextToken() 1214 new_type = ctor(token.start, token.end, name, fields, 1215 self.namespace_stack) 1216 # A name means this is an anonymous type and the name 1217 # is the variable declaration. 1218 if next.token_type != tokenize.NAME: 1219 return new_type 1220 name = new_type 1221 token = next 1222 1223 # Must be variable declaration using the type prefixed with keyword. 1224 assert token.token_type == tokenize.NAME, token 1225 return self._CreateVariable(token, token.name, name, [], '', None) 1226 1227 def handle_struct(self): 1228 # Special case the handling typedef/aliasing of structs here. 1229 # It would be a pain to handle in the class code. 1230 name_tokens, var_token = self.GetName() 1231 if name_tokens: 1232 next_token = self._GetNextToken() 1233 is_syntax = (var_token.token_type == tokenize.SYNTAX and 1234 var_token.name[0] in '*&') 1235 is_variable = (var_token.token_type == tokenize.NAME and 1236 next_token.name == ';') 1237 variable = var_token 1238 if is_syntax and not is_variable: 1239 variable = next_token 1240 temp = self._GetNextToken() 1241 if temp.token_type == tokenize.SYNTAX and temp.name == '(': 1242 # Handle methods declared to return a struct. 1243 t0 = name_tokens[0] 1244 struct = tokenize.Token(tokenize.NAME, 'struct', 1245 t0.start-7, t0.start-2) 1246 type_and_name = [struct] 1247 type_and_name.extend(name_tokens) 1248 type_and_name.extend((var_token, next_token)) 1249 return self._GetMethod(type_and_name, 0, None, False) 1250 assert temp.name == ';', (temp, name_tokens, var_token) 1251 if is_syntax or (is_variable and not self._handling_typedef): 1252 modifiers = ['struct'] 1253 type_name = ''.join([t.name for t in name_tokens]) 1254 position = name_tokens[0] 1255 return self._CreateVariable(position, variable.name, type_name, 1256 modifiers, var_token.name, None) 1257 name_tokens.extend((var_token, next_token)) 1258 self._AddBackTokens(name_tokens) 1259 else: 1260 self._AddBackToken(var_token) 1261 return self._GetClass(Struct, VISIBILITY_PUBLIC, None) 1262 1263 def handle_union(self): 1264 return self._GetNestedType(Union) 1265 1266 def handle_enum(self): 1267 return self._GetNestedType(Enum) 1268 1269 def handle_auto(self): 1270 # TODO(nnorwitz): warn about using auto? Probably not since it 1271 # will be reclaimed and useful for C++0x. 1272 pass 1273 1274 def handle_register(self): 1275 pass 1276 1277 def handle_const(self): 1278 pass 1279 1280 def handle_inline(self): 1281 pass 1282 1283 def handle_extern(self): 1284 pass 1285 1286 def handle_static(self): 1287 pass 1288 1289 def handle_virtual(self): 1290 # What follows must be a method. 1291 token = token2 = self._GetNextToken() 1292 if token.name == 'inline': 1293 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. 1294 token2 = self._GetNextToken() 1295 if token2.token_type == tokenize.SYNTAX and token2.name == '~': 1296 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) 1297 assert token.token_type == tokenize.NAME or token.name == '::', token 1298 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') # ) 1299 return_type_and_name.insert(0, token) 1300 if token2 is not token: 1301 return_type_and_name.insert(1, token2) 1302 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, 1303 None, False) 1304 1305 def handle_volatile(self): 1306 pass 1307 1308 def handle_mutable(self): 1309 pass 1310 1311 def handle_public(self): 1312 assert self.in_class 1313 self.visibility = VISIBILITY_PUBLIC 1314 1315 def handle_protected(self): 1316 assert self.in_class 1317 self.visibility = VISIBILITY_PROTECTED 1318 1319 def handle_private(self): 1320 assert self.in_class 1321 self.visibility = VISIBILITY_PRIVATE 1322 1323 def handle_friend(self): 1324 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1325 assert tokens 1326 t0 = tokens[0] 1327 return Friend(t0.start, t0.end, tokens, self.namespace_stack) 1328 1329 def handle_static_cast(self): 1330 pass 1331 1332 def handle_const_cast(self): 1333 pass 1334 1335 def handle_dynamic_cast(self): 1336 pass 1337 1338 def handle_reinterpret_cast(self): 1339 pass 1340 1341 def handle_new(self): 1342 pass 1343 1344 def handle_delete(self): 1345 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1346 assert tokens 1347 return Delete(tokens[0].start, tokens[0].end, tokens) 1348 1349 def handle_typedef(self): 1350 token = self._GetNextToken() 1351 if (token.token_type == tokenize.NAME and 1352 keywords.IsKeyword(token.name)): 1353 # Token must be struct/enum/union/class. 1354 method = getattr(self, 'handle_' + token.name) 1355 self._handling_typedef = True 1356 tokens = [method()] 1357 self._handling_typedef = False 1358 else: 1359 tokens = [token] 1360 1361 # Get the remainder of the typedef up to the semi-colon. 1362 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) 1363 1364 # TODO(nnorwitz): clean all this up. 1365 assert tokens 1366 name = tokens.pop() 1367 indices = name 1368 if tokens: 1369 indices = tokens[0] 1370 if not indices: 1371 indices = token 1372 if name.name == ')': 1373 # HACK(nnorwitz): Handle pointers to functions "properly". 1374 if (len(tokens) >= 4 and 1375 tokens[1].name == '(' and tokens[2].name == '*'): 1376 tokens.append(name) 1377 name = tokens[3] 1378 elif name.name == ']': 1379 # HACK(nnorwitz): Handle arrays properly. 1380 if len(tokens) >= 2: 1381 tokens.append(name) 1382 name = tokens[1] 1383 new_type = tokens 1384 if tokens and isinstance(tokens[0], tokenize.Token): 1385 new_type = self.converter.ToType(tokens)[0] 1386 return Typedef(indices.start, indices.end, name.name, 1387 new_type, self.namespace_stack) 1388 1389 def handle_typeid(self): 1390 pass # Not needed yet. 1391 1392 def handle_typename(self): 1393 pass # Not needed yet. 1394 1395 def _GetTemplatedTypes(self): 1396 result = {} 1397 tokens = list(self._GetMatchingChar('<', '>')) 1398 len_tokens = len(tokens) - 1 # Ignore trailing '>'. 1399 i = 0 1400 while i < len_tokens: 1401 key = tokens[i].name 1402 i += 1 1403 if keywords.IsKeyword(key) or key == ',': 1404 continue 1405 type_name = default = None 1406 if i < len_tokens: 1407 i += 1 1408 if tokens[i-1].name == '=': 1409 assert i < len_tokens, '%s %s' % (i, tokens) 1410 default, unused_next_token = self.GetName(tokens[i:]) 1411 i += len(default) 1412 else: 1413 if tokens[i-1].name != ',': 1414 # We got something like: Type variable. 1415 # Re-adjust the key (variable) and type_name (Type). 1416 key = tokens[i-1].name 1417 type_name = tokens[i-2] 1418 1419 result[key] = (type_name, default) 1420 return result 1421 1422 def handle_template(self): 1423 token = self._GetNextToken() 1424 assert token.token_type == tokenize.SYNTAX, token 1425 assert token.name == '<', token 1426 templated_types = self._GetTemplatedTypes() 1427 # TODO(nnorwitz): for now, just ignore the template params. 1428 token = self._GetNextToken() 1429 if token.token_type == tokenize.NAME: 1430 if token.name == 'class': 1431 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types) 1432 elif token.name == 'struct': 1433 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types) 1434 elif token.name == 'friend': 1435 return self.handle_friend() 1436 self._AddBackToken(token) 1437 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') 1438 tokens.append(last) 1439 self._AddBackTokens(tokens) 1440 if last.name == '(': 1441 return self.GetMethod(FUNCTION_NONE, templated_types) 1442 # Must be a variable definition. 1443 return None 1444 1445 def handle_true(self): 1446 pass # Nothing to do. 1447 1448 def handle_false(self): 1449 pass # Nothing to do. 1450 1451 def handle_asm(self): 1452 pass # Not needed yet. 1453 1454 def handle_class(self): 1455 return self._GetClass(Class, VISIBILITY_PRIVATE, None) 1456 1457 def _GetBases(self): 1458 # Get base classes. 1459 bases = [] 1460 while 1: 1461 token = self._GetNextToken() 1462 assert token.token_type == tokenize.NAME, token 1463 # TODO(nnorwitz): store kind of inheritance...maybe. 1464 if token.name not in ('public', 'protected', 'private'): 1465 # If inheritance type is not specified, it is private. 1466 # Just put the token back so we can form a name. 1467 # TODO(nnorwitz): it would be good to warn about this. 1468 self._AddBackToken(token) 1469 else: 1470 # Check for virtual inheritance. 1471 token = self._GetNextToken() 1472 if token.name != 'virtual': 1473 self._AddBackToken(token) 1474 else: 1475 # TODO(nnorwitz): store that we got virtual for this base. 1476 pass 1477 base, next_token = self.GetName() 1478 bases_ast = self.converter.ToType(base) 1479 assert len(bases_ast) == 1, bases_ast 1480 bases.append(bases_ast[0]) 1481 assert next_token.token_type == tokenize.SYNTAX, next_token 1482 if next_token.name == '{': 1483 token = next_token 1484 break 1485 # Support multiple inheritance. 1486 assert next_token.name == ',', next_token 1487 return bases, token 1488 1489 def _GetClass(self, class_type, visibility, templated_types): 1490 class_name = None 1491 class_token = self._GetNextToken() 1492 if class_token.token_type != tokenize.NAME: 1493 assert class_token.token_type == tokenize.SYNTAX, class_token 1494 token = class_token 1495 else: 1496 # Skip any macro (e.g. storage class specifiers) after the 1497 # 'class' keyword. 1498 next_token = self._GetNextToken() 1499 if next_token.token_type == tokenize.NAME: 1500 self._AddBackToken(next_token) 1501 else: 1502 self._AddBackTokens([class_token, next_token]) 1503 name_tokens, token = self.GetName() 1504 class_name = ''.join([t.name for t in name_tokens]) 1505 bases = None 1506 if token.token_type == tokenize.SYNTAX: 1507 if token.name == ';': 1508 # Forward declaration. 1509 return class_type(class_token.start, class_token.end, 1510 class_name, None, templated_types, None, 1511 self.namespace_stack) 1512 if token.name in '*&': 1513 # Inline forward declaration. Could be method or data. 1514 name_token = self._GetNextToken() 1515 next_token = self._GetNextToken() 1516 if next_token.name == ';': 1517 # Handle data 1518 modifiers = ['class'] 1519 return self._CreateVariable(class_token, name_token.name, 1520 class_name, 1521 modifiers, token.name, None) 1522 else: 1523 # Assume this is a method. 1524 tokens = (class_token, token, name_token, next_token) 1525 self._AddBackTokens(tokens) 1526 return self.GetMethod(FUNCTION_NONE, None) 1527 if token.name == ':': 1528 bases, token = self._GetBases() 1529 1530 body = None 1531 if token.token_type == tokenize.SYNTAX and token.name == '{': 1532 assert token.token_type == tokenize.SYNTAX, token 1533 assert token.name == '{', token 1534 1535 ast = AstBuilder(self.GetScope(), self.filename, class_name, 1536 visibility, self.namespace_stack) 1537 body = list(ast.Generate()) 1538 1539 if not self._handling_typedef: 1540 token = self._GetNextToken() 1541 if token.token_type != tokenize.NAME: 1542 assert token.token_type == tokenize.SYNTAX, token 1543 assert token.name == ';', token 1544 else: 1545 new_class = class_type(class_token.start, class_token.end, 1546 class_name, bases, None, 1547 body, self.namespace_stack) 1548 1549 modifiers = [] 1550 return self._CreateVariable(class_token, 1551 token.name, new_class, 1552 modifiers, token.name, None) 1553 else: 1554 if not self._handling_typedef: 1555 self.HandleError('non-typedef token', token) 1556 self._AddBackToken(token) 1557 1558 return class_type(class_token.start, class_token.end, class_name, 1559 bases, templated_types, body, self.namespace_stack) 1560 1561 def handle_namespace(self): 1562 token = self._GetNextToken() 1563 # Support anonymous namespaces. 1564 name = None 1565 if token.token_type == tokenize.NAME: 1566 name = token.name 1567 token = self._GetNextToken() 1568 self.namespace_stack.append(name) 1569 assert token.token_type == tokenize.SYNTAX, token 1570 # Create an internal token that denotes when the namespace is complete. 1571 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, 1572 None, None) 1573 internal_token.whence = token.whence 1574 if token.name == '=': 1575 # TODO(nnorwitz): handle aliasing namespaces. 1576 name, next_token = self.GetName() 1577 assert next_token.name == ';', next_token 1578 self._AddBackToken(internal_token) 1579 else: 1580 assert token.name == '{', token 1581 tokens = list(self.GetScope()) 1582 # Replace the trailing } with the internal namespace pop token. 1583 tokens[-1] = internal_token 1584 # Handle namespace with nothing in it. 1585 self._AddBackTokens(tokens) 1586 return None 1587 1588 def handle_using(self): 1589 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1590 assert tokens 1591 return Using(tokens[0].start, tokens[0].end, tokens) 1592 1593 def handle_explicit(self): 1594 assert self.in_class 1595 # Nothing much to do. 1596 # TODO(nnorwitz): maybe verify the method name == class name. 1597 # This must be a ctor. 1598 return self.GetMethod(FUNCTION_CTOR, None) 1599 1600 def handle_this(self): 1601 pass # Nothing to do. 1602 1603 def handle_operator(self): 1604 # Pull off the next token(s?) and make that part of the method name. 1605 pass 1606 1607 def handle_sizeof(self): 1608 pass 1609 1610 def handle_case(self): 1611 pass 1612 1613 def handle_switch(self): 1614 pass 1615 1616 def handle_default(self): 1617 token = self._GetNextToken() 1618 assert token.token_type == tokenize.SYNTAX 1619 assert token.name == ':' 1620 1621 def handle_if(self): 1622 pass 1623 1624 def handle_else(self): 1625 pass 1626 1627 def handle_return(self): 1628 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1629 if not tokens: 1630 return Return(self.current_token.start, self.current_token.end, None) 1631 return Return(tokens[0].start, tokens[0].end, tokens) 1632 1633 def handle_goto(self): 1634 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1635 assert len(tokens) == 1, str(tokens) 1636 return Goto(tokens[0].start, tokens[0].end, tokens[0].name) 1637 1638 def handle_try(self): 1639 pass # Not needed yet. 1640 1641 def handle_catch(self): 1642 pass # Not needed yet. 1643 1644 def handle_throw(self): 1645 pass # Not needed yet. 1646 1647 def handle_while(self): 1648 pass 1649 1650 def handle_do(self): 1651 pass 1652 1653 def handle_for(self): 1654 pass 1655 1656 def handle_break(self): 1657 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1658 1659 def handle_continue(self): 1660 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1661 1662 1663 def BuilderFromSource(source, filename): 1664 """Utility method that returns an AstBuilder from source code. 1665 1666 Args: 1667 source: 'C++ source code' 1668 filename: 'file1' 1669 1670 Returns: 1671 AstBuilder 1672 """ 1673 return AstBuilder(tokenize.GetTokens(source), filename) 1674 1675 1676 def PrintIndentifiers(filename, should_print): 1677 """Prints all identifiers for a C++ source file. 1678 1679 Args: 1680 filename: 'file1' 1681 should_print: predicate with signature: bool Function(token) 1682 """ 1683 source = utils.ReadFile(filename, False) 1684 if source is None: 1685 sys.stderr.write('Unable to find: %s\n' % filename) 1686 return 1687 1688 #print('Processing %s' % actual_filename) 1689 builder = BuilderFromSource(source, filename) 1690 try: 1691 for node in builder.Generate(): 1692 if should_print(node): 1693 print(node.name) 1694 except KeyboardInterrupt: 1695 return 1696 except: 1697 pass 1698 1699 1700 def PrintAllIndentifiers(filenames, should_print): 1701 """Prints all identifiers for each C++ source file in filenames. 1702 1703 Args: 1704 filenames: ['file1', 'file2', ...] 1705 should_print: predicate with signature: bool Function(token) 1706 """ 1707 for path in filenames: 1708 PrintIndentifiers(path, should_print) 1709 1710 1711 def main(argv): 1712 for filename in argv[1:]: 1713 source = utils.ReadFile(filename) 1714 if source is None: 1715 continue 1716 1717 print('Processing %s' % filename) 1718 builder = BuilderFromSource(source, filename) 1719 try: 1720 entire_ast = filter(None, builder.Generate()) 1721 except KeyboardInterrupt: 1722 return 1723 except: 1724 # Already printed a warning, print the traceback and continue. 1725 traceback.print_exc() 1726 else: 1727 if utils.DEBUG: 1728 for ast in entire_ast: 1729 print(ast) 1730 1731 1732 if __name__ == '__main__': 1733 main(sys.argv) 1734