Home | History | Annotate | Download | only in cpp
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2007 Neal Norwitz
      4 # Portions Copyright 2007 Google Inc.
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #      http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 
     18 """Generate an Abstract Syntax Tree (AST) for C++."""
     19 
     20 __author__ = 'nnorwitz (at] google.com (Neal Norwitz)'
     21 
     22 
     23 # TODO:
     24 #  * Tokens should never be exported, need to convert to Nodes
     25 #    (return types, parameters, etc.)
     26 #  * Handle static class data for templatized classes
     27 #  * Handle casts (both C++ and C-style)
     28 #  * Handle conditions and loops (if/else, switch, for, while/do)
     29 #
     30 # TODO much, much later:
     31 #  * Handle #define
     32 #  * exceptions
     33 
     34 
     35 try:
     36     # Python 3.x
     37     import builtins
     38 except ImportError:
     39     # Python 2.x
     40     import __builtin__ as builtins
     41 
     42 import sys
     43 import traceback
     44 
     45 from cpp import keywords
     46 from cpp import tokenize
     47 from cpp import utils
     48 
     49 
     50 if not hasattr(builtins, 'reversed'):
     51     # Support Python 2.3 and earlier.
     52     def reversed(seq):
     53         for i in range(len(seq)-1, -1, -1):
     54             yield seq[i]
     55 
     56 if not hasattr(builtins, 'next'):
     57     # Support Python 2.5 and earlier.
     58     def next(obj):
     59         return obj.next()
     60 
     61 
     62 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
     63 
     64 FUNCTION_NONE = 0x00
     65 FUNCTION_CONST = 0x01
     66 FUNCTION_VIRTUAL = 0x02
     67 FUNCTION_PURE_VIRTUAL = 0x04
     68 FUNCTION_CTOR = 0x08
     69 FUNCTION_DTOR = 0x10
     70 FUNCTION_ATTRIBUTE = 0x20
     71 FUNCTION_UNKNOWN_ANNOTATION = 0x40
     72 FUNCTION_THROW = 0x80
     73 
     74 """
     75 These are currently unused.  Should really handle these properly at some point.
     76 
     77 TYPE_MODIFIER_INLINE   = 0x010000
     78 TYPE_MODIFIER_EXTERN   = 0x020000
     79 TYPE_MODIFIER_STATIC   = 0x040000
     80 TYPE_MODIFIER_CONST    = 0x080000
     81 TYPE_MODIFIER_REGISTER = 0x100000
     82 TYPE_MODIFIER_VOLATILE = 0x200000
     83 TYPE_MODIFIER_MUTABLE  = 0x400000
     84 
     85 TYPE_MODIFIER_MAP = {
     86     'inline': TYPE_MODIFIER_INLINE,
     87     'extern': TYPE_MODIFIER_EXTERN,
     88     'static': TYPE_MODIFIER_STATIC,
     89     'const': TYPE_MODIFIER_CONST,
     90     'register': TYPE_MODIFIER_REGISTER,
     91     'volatile': TYPE_MODIFIER_VOLATILE,
     92     'mutable': TYPE_MODIFIER_MUTABLE,
     93     }
     94 """
     95 
     96 _INTERNAL_TOKEN = 'internal'
     97 _NAMESPACE_POP = 'ns-pop'
     98 
     99 
    100 # TODO(nnorwitz): use this as a singleton for templated_types, etc
    101 # where we don't want to create a new empty dict each time.  It is also const.
    102 class _NullDict(object):
    103     __contains__ = lambda self: False
    104     keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
    105 
    106 
    107 # TODO(nnorwitz): move AST nodes into a separate module.
    108 class Node(object):
    109     """Base AST node."""
    110 
    111     def __init__(self, start, end):
    112         self.start = start
    113         self.end = end
    114 
    115     def IsDeclaration(self):
    116         """Returns bool if this node is a declaration."""
    117         return False
    118 
    119     def IsDefinition(self):
    120         """Returns bool if this node is a definition."""
    121         return False
    122 
    123     def IsExportable(self):
    124         """Returns bool if this node exportable from a header file."""
    125         return False
    126 
    127     def Requires(self, node):
    128         """Does this AST node require the definition of the node passed in?"""
    129         return False
    130 
    131     def XXX__str__(self):
    132         return self._StringHelper(self.__class__.__name__, '')
    133 
    134     def _StringHelper(self, name, suffix):
    135         if not utils.DEBUG:
    136             return '%s(%s)' % (name, suffix)
    137         return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
    138 
    139     def __repr__(self):
    140         return str(self)
    141 
    142 
    143 class Define(Node):
    144     def __init__(self, start, end, name, definition):
    145         Node.__init__(self, start, end)
    146         self.name = name
    147         self.definition = definition
    148 
    149     def __str__(self):
    150         value = '%s %s' % (self.name, self.definition)
    151         return self._StringHelper(self.__class__.__name__, value)
    152 
    153 
    154 class Include(Node):
    155     def __init__(self, start, end, filename, system):
    156         Node.__init__(self, start, end)
    157         self.filename = filename
    158         self.system = system
    159 
    160     def __str__(self):
    161         fmt = '"%s"'
    162         if self.system:
    163             fmt = '<%s>'
    164         return self._StringHelper(self.__class__.__name__, fmt % self.filename)
    165 
    166 
    167 class Goto(Node):
    168     def __init__(self, start, end, label):
    169         Node.__init__(self, start, end)
    170         self.label = label
    171 
    172     def __str__(self):
    173         return self._StringHelper(self.__class__.__name__, str(self.label))
    174 
    175 
    176 class Expr(Node):
    177     def __init__(self, start, end, expr):
    178         Node.__init__(self, start, end)
    179         self.expr = expr
    180 
    181     def Requires(self, node):
    182         # TODO(nnorwitz): impl.
    183         return False
    184 
    185     def __str__(self):
    186         return self._StringHelper(self.__class__.__name__, str(self.expr))
    187 
    188 
    189 class Return(Expr):
    190     pass
    191 
    192 
    193 class Delete(Expr):
    194     pass
    195 
    196 
    197 class Friend(Expr):
    198     def __init__(self, start, end, expr, namespace):
    199         Expr.__init__(self, start, end, expr)
    200         self.namespace = namespace[:]
    201 
    202 
    203 class Using(Node):
    204     def __init__(self, start, end, names):
    205         Node.__init__(self, start, end)
    206         self.names = names
    207 
    208     def __str__(self):
    209         return self._StringHelper(self.__class__.__name__, str(self.names))
    210 
    211 
    212 class Parameter(Node):
    213     def __init__(self, start, end, name, parameter_type, default):
    214         Node.__init__(self, start, end)
    215         self.name = name
    216         self.type = parameter_type
    217         self.default = default
    218 
    219     def Requires(self, node):
    220         # TODO(nnorwitz): handle namespaces, etc.
    221         return self.type.name == node.name
    222 
    223     def __str__(self):
    224         name = str(self.type)
    225         suffix = '%s %s' % (name, self.name)
    226         if self.default:
    227             suffix += ' = ' + ''.join([d.name for d in self.default])
    228         return self._StringHelper(self.__class__.__name__, suffix)
    229 
    230 
    231 class _GenericDeclaration(Node):
    232     def __init__(self, start, end, name, namespace):
    233         Node.__init__(self, start, end)
    234         self.name = name
    235         self.namespace = namespace[:]
    236 
    237     def FullName(self):
    238         prefix = ''
    239         if self.namespace and self.namespace[-1]:
    240             prefix = '::'.join(self.namespace) + '::'
    241         return prefix + self.name
    242 
    243     def _TypeStringHelper(self, suffix):
    244         if self.namespace:
    245             names = [n or '<anonymous>' for n in self.namespace]
    246             suffix += ' in ' + '::'.join(names)
    247         return self._StringHelper(self.__class__.__name__, suffix)
    248 
    249 
    250 # TODO(nnorwitz): merge with Parameter in some way?
    251 class VariableDeclaration(_GenericDeclaration):
    252     def __init__(self, start, end, name, var_type, initial_value, namespace):
    253         _GenericDeclaration.__init__(self, start, end, name, namespace)
    254         self.type = var_type
    255         self.initial_value = initial_value
    256 
    257     def Requires(self, node):
    258         # TODO(nnorwitz): handle namespaces, etc.
    259         return self.type.name == node.name
    260 
    261     def ToString(self):
    262         """Return a string that tries to reconstitute the variable decl."""
    263         suffix = '%s %s' % (self.type, self.name)
    264         if self.initial_value:
    265             suffix += ' = ' + self.initial_value
    266         return suffix
    267 
    268     def __str__(self):
    269         return self._StringHelper(self.__class__.__name__, self.ToString())
    270 
    271 
    272 class Typedef(_GenericDeclaration):
    273     def __init__(self, start, end, name, alias, namespace):
    274         _GenericDeclaration.__init__(self, start, end, name, namespace)
    275         self.alias = alias
    276 
    277     def IsDefinition(self):
    278         return True
    279 
    280     def IsExportable(self):
    281         return True
    282 
    283     def Requires(self, node):
    284         # TODO(nnorwitz): handle namespaces, etc.
    285         name = node.name
    286         for token in self.alias:
    287             if token is not None and name == token.name:
    288                 return True
    289         return False
    290 
    291     def __str__(self):
    292         suffix = '%s, %s' % (self.name, self.alias)
    293         return self._TypeStringHelper(suffix)
    294 
    295 
    296 class _NestedType(_GenericDeclaration):
    297     def __init__(self, start, end, name, fields, namespace):
    298         _GenericDeclaration.__init__(self, start, end, name, namespace)
    299         self.fields = fields
    300 
    301     def IsDefinition(self):
    302         return True
    303 
    304     def IsExportable(self):
    305         return True
    306 
    307     def __str__(self):
    308         suffix = '%s, {%s}' % (self.name, self.fields)
    309         return self._TypeStringHelper(suffix)
    310 
    311 
    312 class Union(_NestedType):
    313     pass
    314 
    315 
    316 class Enum(_NestedType):
    317     pass
    318 
    319 
    320 class Class(_GenericDeclaration):
    321     def __init__(self, start, end, name, bases, templated_types, body, namespace):
    322         _GenericDeclaration.__init__(self, start, end, name, namespace)
    323         self.bases = bases
    324         self.body = body
    325         self.templated_types = templated_types
    326 
    327     def IsDeclaration(self):
    328         return self.bases is None and self.body is None
    329 
    330     def IsDefinition(self):
    331         return not self.IsDeclaration()
    332 
    333     def IsExportable(self):
    334         return not self.IsDeclaration()
    335 
    336     def Requires(self, node):
    337         # TODO(nnorwitz): handle namespaces, etc.
    338         if self.bases:
    339             for token_list in self.bases:
    340                 # TODO(nnorwitz): bases are tokens, do name comparision.
    341                 for token in token_list:
    342                     if token.name == node.name:
    343                         return True
    344         # TODO(nnorwitz): search in body too.
    345         return False
    346 
    347     def __str__(self):
    348         name = self.name
    349         if self.templated_types:
    350             name += '<%s>' % self.templated_types
    351         suffix = '%s, %s, %s' % (name, self.bases, self.body)
    352         return self._TypeStringHelper(suffix)
    353 
    354 
    355 class Struct(Class):
    356     pass
    357 
    358 
    359 class Function(_GenericDeclaration):
    360     def __init__(self, start, end, name, return_type, parameters,
    361                  modifiers, templated_types, body, namespace):
    362         _GenericDeclaration.__init__(self, start, end, name, namespace)
    363         converter = TypeConverter(namespace)
    364         self.return_type = converter.CreateReturnType(return_type)
    365         self.parameters = converter.ToParameters(parameters)
    366         self.modifiers = modifiers
    367         self.body = body
    368         self.templated_types = templated_types
    369 
    370     def IsDeclaration(self):
    371         return self.body is None
    372 
    373     def IsDefinition(self):
    374         return self.body is not None
    375 
    376     def IsExportable(self):
    377         if self.return_type and 'static' in self.return_type.modifiers:
    378             return False
    379         return None not in self.namespace
    380 
    381     def Requires(self, node):
    382         if self.parameters:
    383             # TODO(nnorwitz): parameters are tokens, do name comparision.
    384             for p in self.parameters:
    385                 if p.name == node.name:
    386                     return True
    387         # TODO(nnorwitz): search in body too.
    388         return False
    389 
    390     def __str__(self):
    391         # TODO(nnorwitz): add templated_types.
    392         suffix = ('%s %s(%s), 0x%02x, %s' %
    393                   (self.return_type, self.name, self.parameters,
    394                    self.modifiers, self.body))
    395         return self._TypeStringHelper(suffix)
    396 
    397 
    398 class Method(Function):
    399     def __init__(self, start, end, name, in_class, return_type, parameters,
    400                  modifiers, templated_types, body, namespace):
    401         Function.__init__(self, start, end, name, return_type, parameters,
    402                           modifiers, templated_types, body, namespace)
    403         # TODO(nnorwitz): in_class could also be a namespace which can
    404         # mess up finding functions properly.
    405         self.in_class = in_class
    406 
    407 
    408 class Type(_GenericDeclaration):
    409     """Type used for any variable (eg class, primitive, struct, etc)."""
    410 
    411     def __init__(self, start, end, name, templated_types, modifiers,
    412                  reference, pointer, array):
    413         """
    414         Args:
    415           name: str name of main type
    416           templated_types: [Class (Type?)] template type info between <>
    417           modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
    418           reference, pointer, array: bools
    419         """
    420         _GenericDeclaration.__init__(self, start, end, name, [])
    421         self.templated_types = templated_types
    422         if not name and modifiers:
    423             self.name = modifiers.pop()
    424         self.modifiers = modifiers
    425         self.reference = reference
    426         self.pointer = pointer
    427         self.array = array
    428 
    429     def __str__(self):
    430         prefix = ''
    431         if self.modifiers:
    432             prefix = ' '.join(self.modifiers) + ' '
    433         name = str(self.name)
    434         if self.templated_types:
    435             name += '<%s>' % self.templated_types
    436         suffix = prefix + name
    437         if self.reference:
    438             suffix += '&'
    439         if self.pointer:
    440             suffix += '*'
    441         if self.array:
    442             suffix += '[]'
    443         return self._TypeStringHelper(suffix)
    444 
    445     # By definition, Is* are always False.  A Type can only exist in
    446     # some sort of variable declaration, parameter, or return value.
    447     def IsDeclaration(self):
    448         return False
    449 
    450     def IsDefinition(self):
    451         return False
    452 
    453     def IsExportable(self):
    454         return False
    455 
    456 
    457 class TypeConverter(object):
    458 
    459     def __init__(self, namespace_stack):
    460         self.namespace_stack = namespace_stack
    461 
    462     def _GetTemplateEnd(self, tokens, start):
    463         count = 1
    464         end = start
    465         while 1:
    466             token = tokens[end]
    467             end += 1
    468             if token.name == '<':
    469                 count += 1
    470             elif token.name == '>':
    471                 count -= 1
    472                 if count == 0:
    473                     break
    474         return tokens[start:end-1], end
    475 
    476     def ToType(self, tokens):
    477         """Convert [Token,...] to [Class(...), ] useful for base classes.
    478         For example, code like class Foo : public Bar<x, y> { ... };
    479         the "Bar<x, y>" portion gets converted to an AST.
    480 
    481         Returns:
    482           [Class(...), ...]
    483         """
    484         result = []
    485         name_tokens = []
    486         reference = pointer = array = False
    487 
    488         def AddType(templated_types):
    489             # Partition tokens into name and modifier tokens.
    490             names = []
    491             modifiers = []
    492             for t in name_tokens:
    493                 if keywords.IsKeyword(t.name):
    494                     modifiers.append(t.name)
    495                 else:
    496                     names.append(t.name)
    497             name = ''.join(names)
    498             result.append(Type(name_tokens[0].start, name_tokens[-1].end,
    499                                name, templated_types, modifiers,
    500                                reference, pointer, array))
    501             del name_tokens[:]
    502 
    503         i = 0
    504         end = len(tokens)
    505         while i < end:
    506             token = tokens[i]
    507             if token.name == '<':
    508                 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
    509                 AddType(self.ToType(new_tokens))
    510                 # If there is a comma after the template, we need to consume
    511                 # that here otherwise it becomes part of the name.
    512                 i = new_end
    513                 reference = pointer = array = False
    514             elif token.name == ',':
    515                 AddType([])
    516                 reference = pointer = array = False
    517             elif token.name == '*':
    518                 pointer = True
    519             elif token.name == '&':
    520                 reference = True
    521             elif token.name == '[':
    522                pointer = True
    523             elif token.name == ']':
    524                 pass
    525             else:
    526                 name_tokens.append(token)
    527             i += 1
    528 
    529         if name_tokens:
    530             # No '<' in the tokens, just a simple name and no template.
    531             AddType([])
    532         return result
    533 
    534     def DeclarationToParts(self, parts, needs_name_removed):
    535         name = None
    536         default = []
    537         if needs_name_removed:
    538             # Handle default (initial) values properly.
    539             for i, t in enumerate(parts):
    540                 if t.name == '=':
    541                     default = parts[i+1:]
    542                     name = parts[i-1].name
    543                     if name == ']' and parts[i-2].name == '[':
    544                         name = parts[i-3].name
    545                         i -= 1
    546                     parts = parts[:i-1]
    547                     break
    548             else:
    549                 if parts[-1].token_type == tokenize.NAME:
    550                     name = parts.pop().name
    551                 else:
    552                     # TODO(nnorwitz): this is a hack that happens for code like
    553                     # Register(Foo<T>); where it thinks this is a function call
    554                     # but it's actually a declaration.
    555                     name = '???'
    556         modifiers = []
    557         type_name = []
    558         other_tokens = []
    559         templated_types = []
    560         i = 0
    561         end = len(parts)
    562         while i < end:
    563             p = parts[i]
    564             if keywords.IsKeyword(p.name):
    565                 modifiers.append(p.name)
    566             elif p.name == '<':
    567                 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
    568                 templated_types = self.ToType(templated_tokens)
    569                 i = new_end - 1
    570                 # Don't add a spurious :: to data members being initialized.
    571                 next_index = i + 1
    572                 if next_index < end and parts[next_index].name == '::':
    573                     i += 1
    574             elif p.name in ('[', ']', '='):
    575                 # These are handled elsewhere.
    576                 other_tokens.append(p)
    577             elif p.name not in ('*', '&', '>'):
    578                 # Ensure that names have a space between them.
    579                 if (type_name and type_name[-1].token_type == tokenize.NAME and
    580                     p.token_type == tokenize.NAME):
    581                     type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
    582                 type_name.append(p)
    583             else:
    584                 other_tokens.append(p)
    585             i += 1
    586         type_name = ''.join([t.name for t in type_name])
    587         return name, type_name, templated_types, modifiers, default, other_tokens
    588 
    589     def ToParameters(self, tokens):
    590         if not tokens:
    591             return []
    592 
    593         result = []
    594         name = type_name = ''
    595         type_modifiers = []
    596         pointer = reference = array = False
    597         first_token = None
    598         default = []
    599 
    600         def AddParameter():
    601             if default:
    602                 del default[0]  # Remove flag.
    603             end = type_modifiers[-1].end
    604             parts = self.DeclarationToParts(type_modifiers, True)
    605             (name, type_name, templated_types, modifiers,
    606              unused_default, unused_other_tokens) = parts
    607             parameter_type = Type(first_token.start, first_token.end,
    608                                   type_name, templated_types, modifiers,
    609                                   reference, pointer, array)
    610             p = Parameter(first_token.start, end, name,
    611                           parameter_type, default)
    612             result.append(p)
    613 
    614         template_count = 0
    615         for s in tokens:
    616             if not first_token:
    617                 first_token = s
    618             if s.name == '<':
    619                 template_count += 1
    620             elif s.name == '>':
    621                 template_count -= 1
    622             if template_count > 0:
    623                 type_modifiers.append(s)
    624                 continue
    625 
    626             if s.name == ',':
    627                 AddParameter()
    628                 name = type_name = ''
    629                 type_modifiers = []
    630                 pointer = reference = array = False
    631                 first_token = None
    632                 default = []
    633             elif s.name == '*':
    634                 pointer = True
    635             elif s.name == '&':
    636                 reference = True
    637             elif s.name == '[':
    638                 array = True
    639             elif s.name == ']':
    640                 pass  # Just don't add to type_modifiers.
    641             elif s.name == '=':
    642                 # Got a default value.  Add any value (None) as a flag.
    643                 default.append(None)
    644             elif default:
    645                 default.append(s)
    646             else:
    647                 type_modifiers.append(s)
    648         AddParameter()
    649         return result
    650 
    651     def CreateReturnType(self, return_type_seq):
    652         if not return_type_seq:
    653             return None
    654         start = return_type_seq[0].start
    655         end = return_type_seq[-1].end
    656         _, name, templated_types, modifiers, default, other_tokens = \
    657            self.DeclarationToParts(return_type_seq, False)
    658         names = [n.name for n in other_tokens]
    659         reference = '&' in names
    660         pointer = '*' in names
    661         array = '[' in names
    662         return Type(start, end, name, templated_types, modifiers,
    663                     reference, pointer, array)
    664 
    665     def GetTemplateIndices(self, names):
    666         # names is a list of strings.
    667         start = names.index('<')
    668         end = len(names) - 1
    669         while end > 0:
    670             if names[end] == '>':
    671                 break
    672             end -= 1
    673         return start, end+1
    674 
    675 class AstBuilder(object):
    676     def __init__(self, token_stream, filename, in_class='', visibility=None,
    677                  namespace_stack=[]):
    678         self.tokens = token_stream
    679         self.filename = filename
    680         # TODO(nnorwitz): use a better data structure (deque) for the queue.
    681         # Switching directions of the "queue" improved perf by about 25%.
    682         # Using a deque should be even better since we access from both sides.
    683         self.token_queue = []
    684         self.namespace_stack = namespace_stack[:]
    685         self.in_class = in_class
    686         if in_class is None:
    687             self.in_class_name_only = None
    688         else:
    689             self.in_class_name_only = in_class.split('::')[-1]
    690         self.visibility = visibility
    691         self.in_function = False
    692         self.current_token = None
    693         # Keep the state whether we are currently handling a typedef or not.
    694         self._handling_typedef = False
    695 
    696         self.converter = TypeConverter(self.namespace_stack)
    697 
    698     def HandleError(self, msg, token):
    699         printable_queue = list(reversed(self.token_queue[-20:]))
    700         sys.stderr.write('Got %s in %s @ %s %s\n' %
    701                          (msg, self.filename, token, printable_queue))
    702 
    703     def Generate(self):
    704         while 1:
    705             token = self._GetNextToken()
    706             if not token:
    707                 break
    708 
    709             # Get the next token.
    710             self.current_token = token
    711 
    712             # Dispatch on the next token type.
    713             if token.token_type == _INTERNAL_TOKEN:
    714                 if token.name == _NAMESPACE_POP:
    715                     self.namespace_stack.pop()
    716                 continue
    717 
    718             try:
    719                 result = self._GenerateOne(token)
    720                 if result is not None:
    721                     yield result
    722             except:
    723                 self.HandleError('exception', token)
    724                 raise
    725 
    726     def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
    727                         ref_pointer_name_seq, templated_types, value=None):
    728         reference = '&' in ref_pointer_name_seq
    729         pointer = '*' in ref_pointer_name_seq
    730         array = '[' in ref_pointer_name_seq
    731         var_type = Type(pos_token.start, pos_token.end, type_name,
    732                         templated_types, type_modifiers,
    733                         reference, pointer, array)
    734         return VariableDeclaration(pos_token.start, pos_token.end,
    735                                    name, var_type, value, self.namespace_stack)
    736 
    737     def _GenerateOne(self, token):
    738         if token.token_type == tokenize.NAME:
    739             if (keywords.IsKeyword(token.name) and
    740                 not keywords.IsBuiltinType(token.name)):
    741                 method = getattr(self, 'handle_' + token.name)
    742                 return method()
    743             elif token.name == self.in_class_name_only:
    744                 # The token name is the same as the class, must be a ctor if
    745                 # there is a paren.  Otherwise, it's the return type.
    746                 # Peek ahead to get the next token to figure out which.
    747                 next = self._GetNextToken()
    748                 self._AddBackToken(next)
    749                 if next.token_type == tokenize.SYNTAX and next.name == '(':
    750                     return self._GetMethod([token], FUNCTION_CTOR, None, True)
    751                 # Fall through--handle like any other method.
    752 
    753             # Handle data or function declaration/definition.
    754             syntax = tokenize.SYNTAX
    755             temp_tokens, last_token = \
    756                 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
    757             temp_tokens.insert(0, token)
    758             if last_token.name == '(':
    759                 # If there is an assignment before the paren,
    760                 # this is an expression, not a method.
    761                 expr = bool([e for e in temp_tokens if e.name == '='])
    762                 if expr:
    763                     new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
    764                     temp_tokens.append(last_token)
    765                     temp_tokens.extend(new_temp)
    766                     last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
    767 
    768             if last_token.name == '[':
    769                 # Handle array, this isn't a method, unless it's an operator.
    770                 # TODO(nnorwitz): keep the size somewhere.
    771                 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
    772                 temp_tokens.append(last_token)
    773                 if temp_tokens[-2].name == 'operator':
    774                     temp_tokens.append(self._GetNextToken())
    775                 else:
    776                     temp_tokens2, last_token = \
    777                         self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
    778                     temp_tokens.extend(temp_tokens2)
    779 
    780             if last_token.name == ';':
    781                 # Handle data, this isn't a method.
    782                 parts = self.converter.DeclarationToParts(temp_tokens, True)
    783                 (name, type_name, templated_types, modifiers, default,
    784                  unused_other_tokens) = parts
    785 
    786                 t0 = temp_tokens[0]
    787                 names = [t.name for t in temp_tokens]
    788                 if templated_types:
    789                     start, end = self.converter.GetTemplateIndices(names)
    790                     names = names[:start] + names[end:]
    791                 default = ''.join([t.name for t in default])
    792                 return self._CreateVariable(t0, name, type_name, modifiers,
    793                                             names, templated_types, default)
    794             if last_token.name == '{':
    795                 self._AddBackTokens(temp_tokens[1:])
    796                 self._AddBackToken(last_token)
    797                 method_name = temp_tokens[0].name
    798                 method = getattr(self, 'handle_' + method_name, None)
    799                 if not method:
    800                     # Must be declaring a variable.
    801                     # TODO(nnorwitz): handle the declaration.
    802                     return None
    803                 return method()
    804             return self._GetMethod(temp_tokens, 0, None, False)
    805         elif token.token_type == tokenize.SYNTAX:
    806             if token.name == '~' and self.in_class:
    807                 # Must be a dtor (probably not in method body).
    808                 token = self._GetNextToken()
    809                 # self.in_class can contain A::Name, but the dtor will only
    810                 # be Name.  Make sure to compare against the right value.
    811                 if (token.token_type == tokenize.NAME and
    812                     token.name == self.in_class_name_only):
    813                     return self._GetMethod([token], FUNCTION_DTOR, None, True)
    814             # TODO(nnorwitz): handle a lot more syntax.
    815         elif token.token_type == tokenize.PREPROCESSOR:
    816             # TODO(nnorwitz): handle more preprocessor directives.
    817             # token starts with a #, so remove it and strip whitespace.
    818             name = token.name[1:].lstrip()
    819             if name.startswith('include'):
    820                 # Remove "include".
    821                 name = name[7:].strip()
    822                 assert name
    823                 # Handle #include \<newline> "header-on-second-line.h".
    824                 if name.startswith('\\'):
    825                     name = name[1:].strip()
    826                 assert name[0] in '<"', token
    827                 assert name[-1] in '>"', token
    828                 system = name[0] == '<'
    829                 filename = name[1:-1]
    830                 return Include(token.start, token.end, filename, system)
    831             if name.startswith('define'):
    832                 # Remove "define".
    833                 name = name[6:].strip()
    834                 assert name
    835                 value = ''
    836                 for i, c in enumerate(name):
    837                     if c.isspace():
    838                         value = name[i:].lstrip()
    839                         name = name[:i]
    840                         break
    841                 return Define(token.start, token.end, name, value)
    842             if name.startswith('if') and name[2:3].isspace():
    843                 condition = name[3:].strip()
    844                 if condition.startswith('0') or condition.startswith('(0)'):
    845                     self._SkipIf0Blocks()
    846         return None
    847 
    848     def _GetTokensUpTo(self, expected_token_type, expected_token):
    849         return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
    850 
    851     def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
    852         last_token = self._GetNextToken()
    853         tokens = []
    854         while (last_token.token_type != expected_token_type or
    855                last_token.name not in expected_tokens):
    856             tokens.append(last_token)
    857             last_token = self._GetNextToken()
    858         return tokens, last_token
    859 
    860     # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
    861     def _IgnoreUpTo(self, token_type, token):
    862         unused_tokens = self._GetTokensUpTo(token_type, token)
    863 
    864     def _SkipIf0Blocks(self):
    865         count = 1
    866         while 1:
    867             token = self._GetNextToken()
    868             if token.token_type != tokenize.PREPROCESSOR:
    869                 continue
    870 
    871             name = token.name[1:].lstrip()
    872             if name.startswith('endif'):
    873                 count -= 1
    874                 if count == 0:
    875                     break
    876             elif name.startswith('if'):
    877                 count += 1
    878 
    879     def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
    880         if GetNextToken is None:
    881             GetNextToken = self._GetNextToken
    882         # Assumes the current token is open_paren and we will consume
    883         # and return up to the close_paren.
    884         count = 1
    885         token = GetNextToken()
    886         while 1:
    887             if token.token_type == tokenize.SYNTAX:
    888                 if token.name == open_paren:
    889                     count += 1
    890                 elif token.name == close_paren:
    891                     count -= 1
    892                     if count == 0:
    893                         break
    894             yield token
    895             token = GetNextToken()
    896         yield token
    897 
    898     def _GetParameters(self):
    899         return self._GetMatchingChar('(', ')')
    900 
    901     def GetScope(self):
    902         return self._GetMatchingChar('{', '}')
    903 
    904     def _GetNextToken(self):
    905         if self.token_queue:
    906             return self.token_queue.pop()
    907         return next(self.tokens)
    908 
    909     def _AddBackToken(self, token):
    910         if token.whence == tokenize.WHENCE_STREAM:
    911             token.whence = tokenize.WHENCE_QUEUE
    912             self.token_queue.insert(0, token)
    913         else:
    914             assert token.whence == tokenize.WHENCE_QUEUE, token
    915             self.token_queue.append(token)
    916 
    917     def _AddBackTokens(self, tokens):
    918         if tokens:
    919             if tokens[-1].whence == tokenize.WHENCE_STREAM:
    920                 for token in tokens:
    921                     token.whence = tokenize.WHENCE_QUEUE
    922                 self.token_queue[:0] = reversed(tokens)
    923             else:
    924                 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
    925                 self.token_queue.extend(reversed(tokens))
    926 
    927     def GetName(self, seq=None):
    928         """Returns ([tokens], next_token_info)."""
    929         GetNextToken = self._GetNextToken
    930         if seq is not None:
    931             it = iter(seq)
    932             GetNextToken = lambda: next(it)
    933         next_token = GetNextToken()
    934         tokens = []
    935         last_token_was_name = False
    936         while (next_token.token_type == tokenize.NAME or
    937                (next_token.token_type == tokenize.SYNTAX and
    938                 next_token.name in ('::', '<'))):
    939             # Two NAMEs in a row means the identifier should terminate.
    940             # It's probably some sort of variable declaration.
    941             if last_token_was_name and next_token.token_type == tokenize.NAME:
    942                 break
    943             last_token_was_name = next_token.token_type == tokenize.NAME
    944             tokens.append(next_token)
    945             # Handle templated names.
    946             if next_token.name == '<':
    947                 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
    948                 last_token_was_name = True
    949             next_token = GetNextToken()
    950         return tokens, next_token
    951 
    952     def GetMethod(self, modifiers, templated_types):
    953         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
    954         assert len(return_type_and_name) >= 1
    955         return self._GetMethod(return_type_and_name, modifiers, templated_types,
    956                                False)
    957 
    958     def _GetMethod(self, return_type_and_name, modifiers, templated_types,
    959                    get_paren):
    960         template_portion = None
    961         if get_paren:
    962             token = self._GetNextToken()
    963             assert token.token_type == tokenize.SYNTAX, token
    964             if token.name == '<':
    965                 # Handle templatized dtors.
    966                 template_portion = [token]
    967                 template_portion.extend(self._GetMatchingChar('<', '>'))
    968                 token = self._GetNextToken()
    969             assert token.token_type == tokenize.SYNTAX, token
    970             assert token.name == '(', token
    971 
    972         name = return_type_and_name.pop()
    973         # Handle templatized ctors.
    974         if name.name == '>':
    975             index = 1
    976             while return_type_and_name[index].name != '<':
    977                 index += 1
    978             template_portion = return_type_and_name[index:] + [name]
    979             del return_type_and_name[index:]
    980             name = return_type_and_name.pop()
    981         elif name.name == ']':
    982             rt = return_type_and_name
    983             assert rt[-1].name == '[', return_type_and_name
    984             assert rt[-2].name == 'operator', return_type_and_name
    985             name_seq = return_type_and_name[-2:]
    986             del return_type_and_name[-2:]
    987             name = tokenize.Token(tokenize.NAME, 'operator[]',
    988                                   name_seq[0].start, name.end)
    989             # Get the open paren so _GetParameters() below works.
    990             unused_open_paren = self._GetNextToken()
    991 
    992         # TODO(nnorwitz): store template_portion.
    993         return_type = return_type_and_name
    994         indices = name
    995         if return_type:
    996             indices = return_type[0]
    997 
    998         # Force ctor for templatized ctors.
    999         if name.name == self.in_class and not modifiers:
   1000             modifiers |= FUNCTION_CTOR
   1001         parameters = list(self._GetParameters())
   1002         del parameters[-1]              # Remove trailing ')'.
   1003 
   1004         # Handling operator() is especially weird.
   1005         if name.name == 'operator' and not parameters:
   1006             token = self._GetNextToken()
   1007             assert token.name == '(', token
   1008             parameters = list(self._GetParameters())
   1009             del parameters[-1]          # Remove trailing ')'.
   1010 
   1011         token = self._GetNextToken()
   1012         while token.token_type == tokenize.NAME:
   1013             modifier_token = token
   1014             token = self._GetNextToken()
   1015             if modifier_token.name == 'const':
   1016                 modifiers |= FUNCTION_CONST
   1017             elif modifier_token.name == '__attribute__':
   1018                 # TODO(nnorwitz): handle more __attribute__ details.
   1019                 modifiers |= FUNCTION_ATTRIBUTE
   1020                 assert token.name == '(', token
   1021                 # Consume everything between the (parens).
   1022                 unused_tokens = list(self._GetMatchingChar('(', ')'))
   1023                 token = self._GetNextToken()
   1024             elif modifier_token.name == 'throw':
   1025                 modifiers |= FUNCTION_THROW
   1026                 assert token.name == '(', token
   1027                 # Consume everything between the (parens).
   1028                 unused_tokens = list(self._GetMatchingChar('(', ')'))
   1029                 token = self._GetNextToken()
   1030             elif modifier_token.name == modifier_token.name.upper():
   1031                 # HACK(nnorwitz):  assume that all upper-case names
   1032                 # are some macro we aren't expanding.
   1033                 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
   1034             else:
   1035                 self.HandleError('unexpected token', modifier_token)
   1036 
   1037         assert token.token_type == tokenize.SYNTAX, token
   1038         # Handle ctor initializers.
   1039         if token.name == ':':
   1040             # TODO(nnorwitz): anything else to handle for initializer list?
   1041             while token.name != ';' and token.name != '{':
   1042                 token = self._GetNextToken()
   1043 
   1044         # Handle pointer to functions that are really data but look
   1045         # like method declarations.
   1046         if token.name == '(':
   1047             if parameters[0].name == '*':
   1048                 # name contains the return type.
   1049                 name = parameters.pop()
   1050                 # parameters contains the name of the data.
   1051                 modifiers = [p.name for p in parameters]
   1052                 # Already at the ( to open the parameter list.
   1053                 function_parameters = list(self._GetMatchingChar('(', ')'))
   1054                 del function_parameters[-1]  # Remove trailing ')'.
   1055                 # TODO(nnorwitz): store the function_parameters.
   1056                 token = self._GetNextToken()
   1057                 assert token.token_type == tokenize.SYNTAX, token
   1058                 assert token.name == ';', token
   1059                 return self._CreateVariable(indices, name.name, indices.name,
   1060                                             modifiers, '', None)
   1061             # At this point, we got something like:
   1062             #  return_type (type::*name_)(params);
   1063             # This is a data member called name_ that is a function pointer.
   1064             # With this code: void (sq_type::*field_)(string&);
   1065             # We get: name=void return_type=[] parameters=sq_type ... field_
   1066             # TODO(nnorwitz): is return_type always empty?
   1067             # TODO(nnorwitz): this isn't even close to being correct.
   1068             # Just put in something so we don't crash and can move on.
   1069             real_name = parameters[-1]
   1070             modifiers = [p.name for p in self._GetParameters()]
   1071             del modifiers[-1]           # Remove trailing ')'.
   1072             return self._CreateVariable(indices, real_name.name, indices.name,
   1073                                         modifiers, '', None)
   1074 
   1075         if token.name == '{':
   1076             body = list(self.GetScope())
   1077             del body[-1]                # Remove trailing '}'.
   1078         else:
   1079             body = None
   1080             if token.name == '=':
   1081                 token = self._GetNextToken()
   1082                 assert token.token_type == tokenize.CONSTANT, token
   1083                 assert token.name == '0', token
   1084                 modifiers |= FUNCTION_PURE_VIRTUAL
   1085                 token = self._GetNextToken()
   1086 
   1087             if token.name == '[':
   1088                 # TODO(nnorwitz): store tokens and improve parsing.
   1089                 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
   1090                 tokens = list(self._GetMatchingChar('[', ']'))
   1091                 token = self._GetNextToken()
   1092 
   1093             assert token.name == ';', (token, return_type_and_name, parameters)
   1094 
   1095         # Looks like we got a method, not a function.
   1096         if len(return_type) > 2 and return_type[-1].name == '::':
   1097             return_type, in_class = \
   1098                          self._GetReturnTypeAndClassName(return_type)
   1099             return Method(indices.start, indices.end, name.name, in_class,
   1100                           return_type, parameters, modifiers, templated_types,
   1101                           body, self.namespace_stack)
   1102         return Function(indices.start, indices.end, name.name, return_type,
   1103                         parameters, modifiers, templated_types, body,
   1104                         self.namespace_stack)
   1105 
   1106     def _GetReturnTypeAndClassName(self, token_seq):
   1107         # Splitting the return type from the class name in a method
   1108         # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
   1109         # Where is the return type and where is the class name?
   1110         # The heuristic used is to pull the last name as the class name.
   1111         # This includes all the templated type info.
   1112         # TODO(nnorwitz): if there is only One name like in the
   1113         # example above, punt and assume the last bit is the class name.
   1114 
   1115         # Ignore a :: prefix, if exists so we can find the first real name.
   1116         i = 0
   1117         if token_seq[0].name == '::':
   1118             i = 1
   1119         # Ignore a :: suffix, if exists.
   1120         end = len(token_seq) - 1
   1121         if token_seq[end-1].name == '::':
   1122             end -= 1
   1123 
   1124         # Make a copy of the sequence so we can append a sentinel
   1125         # value. This is required for GetName will has to have some
   1126         # terminating condition beyond the last name.
   1127         seq_copy = token_seq[i:end]
   1128         seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
   1129         names = []
   1130         while i < end:
   1131             # Iterate through the sequence parsing out each name.
   1132             new_name, next = self.GetName(seq_copy[i:])
   1133             assert new_name, 'Got empty new_name, next=%s' % next
   1134             # We got a pointer or ref.  Add it to the name.
   1135             if next and next.token_type == tokenize.SYNTAX:
   1136                 new_name.append(next)
   1137             names.append(new_name)
   1138             i += len(new_name)
   1139 
   1140         # Now that we have the names, it's time to undo what we did.
   1141 
   1142         # Remove the sentinel value.
   1143         names[-1].pop()
   1144         # Flatten the token sequence for the return type.
   1145         return_type = [e for seq in names[:-1] for e in seq]
   1146         # The class name is the last name.
   1147         class_name = names[-1]
   1148         return return_type, class_name
   1149 
   1150     def handle_bool(self):
   1151         pass
   1152 
   1153     def handle_char(self):
   1154         pass
   1155 
   1156     def handle_int(self):
   1157         pass
   1158 
   1159     def handle_long(self):
   1160         pass
   1161 
   1162     def handle_short(self):
   1163         pass
   1164 
   1165     def handle_double(self):
   1166         pass
   1167 
   1168     def handle_float(self):
   1169         pass
   1170 
   1171     def handle_void(self):
   1172         pass
   1173 
   1174     def handle_wchar_t(self):
   1175         pass
   1176 
   1177     def handle_unsigned(self):
   1178         pass
   1179 
   1180     def handle_signed(self):
   1181         pass
   1182 
   1183     def _GetNestedType(self, ctor):
   1184         name = None
   1185         name_tokens, token = self.GetName()
   1186         if name_tokens:
   1187             name = ''.join([t.name for t in name_tokens])
   1188 
   1189         # Handle forward declarations.
   1190         if token.token_type == tokenize.SYNTAX and token.name == ';':
   1191             return ctor(token.start, token.end, name, None,
   1192                         self.namespace_stack)
   1193 
   1194         if token.token_type == tokenize.NAME and self._handling_typedef:
   1195             self._AddBackToken(token)
   1196             return ctor(token.start, token.end, name, None,
   1197                         self.namespace_stack)
   1198 
   1199         # Must be the type declaration.
   1200         fields = list(self._GetMatchingChar('{', '}'))
   1201         del fields[-1]                  # Remove trailing '}'.
   1202         if token.token_type == tokenize.SYNTAX and token.name == '{':
   1203             next = self._GetNextToken()
   1204             new_type = ctor(token.start, token.end, name, fields,
   1205                             self.namespace_stack)
   1206             # A name means this is an anonymous type and the name
   1207             # is the variable declaration.
   1208             if next.token_type != tokenize.NAME:
   1209                 return new_type
   1210             name = new_type
   1211             token = next
   1212 
   1213         # Must be variable declaration using the type prefixed with keyword.
   1214         assert token.token_type == tokenize.NAME, token
   1215         return self._CreateVariable(token, token.name, name, [], '', None)
   1216 
   1217     def handle_struct(self):
   1218         # Special case the handling typedef/aliasing of structs here.
   1219         # It would be a pain to handle in the class code.
   1220         name_tokens, var_token = self.GetName()
   1221         if name_tokens:
   1222             next_token = self._GetNextToken()
   1223             is_syntax = (var_token.token_type == tokenize.SYNTAX and
   1224                          var_token.name[0] in '*&')
   1225             is_variable = (var_token.token_type == tokenize.NAME and
   1226                            next_token.name == ';')
   1227             variable = var_token
   1228             if is_syntax and not is_variable:
   1229                 variable = next_token
   1230                 temp = self._GetNextToken()
   1231                 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
   1232                     # Handle methods declared to return a struct.
   1233                     t0 = name_tokens[0]
   1234                     struct = tokenize.Token(tokenize.NAME, 'struct',
   1235                                             t0.start-7, t0.start-2)
   1236                     type_and_name = [struct]
   1237                     type_and_name.extend(name_tokens)
   1238                     type_and_name.extend((var_token, next_token))
   1239                     return self._GetMethod(type_and_name, 0, None, False)
   1240                 assert temp.name == ';', (temp, name_tokens, var_token)
   1241             if is_syntax or (is_variable and not self._handling_typedef):
   1242                 modifiers = ['struct']
   1243                 type_name = ''.join([t.name for t in name_tokens])
   1244                 position = name_tokens[0]
   1245                 return self._CreateVariable(position, variable.name, type_name,
   1246                                             modifiers, var_token.name, None)
   1247             name_tokens.extend((var_token, next_token))
   1248             self._AddBackTokens(name_tokens)
   1249         else:
   1250             self._AddBackToken(var_token)
   1251         return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
   1252 
   1253     def handle_union(self):
   1254         return self._GetNestedType(Union)
   1255 
   1256     def handle_enum(self):
   1257         return self._GetNestedType(Enum)
   1258 
   1259     def handle_auto(self):
   1260         # TODO(nnorwitz): warn about using auto?  Probably not since it
   1261         # will be reclaimed and useful for C++0x.
   1262         pass
   1263 
   1264     def handle_register(self):
   1265         pass
   1266 
   1267     def handle_const(self):
   1268         pass
   1269 
   1270     def handle_inline(self):
   1271         pass
   1272 
   1273     def handle_extern(self):
   1274         pass
   1275 
   1276     def handle_static(self):
   1277         pass
   1278 
   1279     def handle_virtual(self):
   1280         # What follows must be a method.
   1281         token = token2 = self._GetNextToken()
   1282         if token.name == 'inline':
   1283             # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
   1284             token2 = self._GetNextToken()
   1285         if token2.token_type == tokenize.SYNTAX and token2.name == '~':
   1286             return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
   1287         assert token.token_type == tokenize.NAME or token.name == '::', token
   1288         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
   1289         return_type_and_name.insert(0, token)
   1290         if token2 is not token:
   1291             return_type_and_name.insert(1, token2)
   1292         return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
   1293                                None, False)
   1294 
   1295     def handle_volatile(self):
   1296         pass
   1297 
   1298     def handle_mutable(self):
   1299         pass
   1300 
   1301     def handle_public(self):
   1302         assert self.in_class
   1303         self.visibility = VISIBILITY_PUBLIC
   1304 
   1305     def handle_protected(self):
   1306         assert self.in_class
   1307         self.visibility = VISIBILITY_PROTECTED
   1308 
   1309     def handle_private(self):
   1310         assert self.in_class
   1311         self.visibility = VISIBILITY_PRIVATE
   1312 
   1313     def handle_friend(self):
   1314         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1315         assert tokens
   1316         t0 = tokens[0]
   1317         return Friend(t0.start, t0.end, tokens, self.namespace_stack)
   1318 
   1319     def handle_static_cast(self):
   1320         pass
   1321 
   1322     def handle_const_cast(self):
   1323         pass
   1324 
   1325     def handle_dynamic_cast(self):
   1326         pass
   1327 
   1328     def handle_reinterpret_cast(self):
   1329         pass
   1330 
   1331     def handle_new(self):
   1332         pass
   1333 
   1334     def handle_delete(self):
   1335         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1336         assert tokens
   1337         return Delete(tokens[0].start, tokens[0].end, tokens)
   1338 
   1339     def handle_typedef(self):
   1340         token = self._GetNextToken()
   1341         if (token.token_type == tokenize.NAME and
   1342             keywords.IsKeyword(token.name)):
   1343             # Token must be struct/enum/union/class.
   1344             method = getattr(self, 'handle_' + token.name)
   1345             self._handling_typedef = True
   1346             tokens = [method()]
   1347             self._handling_typedef = False
   1348         else:
   1349             tokens = [token]
   1350 
   1351         # Get the remainder of the typedef up to the semi-colon.
   1352         tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
   1353 
   1354         # TODO(nnorwitz): clean all this up.
   1355         assert tokens
   1356         name = tokens.pop()
   1357         indices = name
   1358         if tokens:
   1359             indices = tokens[0]
   1360         if not indices:
   1361             indices = token
   1362         if name.name == ')':
   1363             # HACK(nnorwitz): Handle pointers to functions "properly".
   1364             if (len(tokens) >= 4 and
   1365                 tokens[1].name == '(' and tokens[2].name == '*'):
   1366                 tokens.append(name)
   1367                 name = tokens[3]
   1368         elif name.name == ']':
   1369             # HACK(nnorwitz): Handle arrays properly.
   1370             if len(tokens) >= 2:
   1371                 tokens.append(name)
   1372                 name = tokens[1]
   1373         new_type = tokens
   1374         if tokens and isinstance(tokens[0], tokenize.Token):
   1375             new_type = self.converter.ToType(tokens)[0]
   1376         return Typedef(indices.start, indices.end, name.name,
   1377                        new_type, self.namespace_stack)
   1378 
   1379     def handle_typeid(self):
   1380         pass  # Not needed yet.
   1381 
   1382     def handle_typename(self):
   1383         pass  # Not needed yet.
   1384 
   1385     def _GetTemplatedTypes(self):
   1386         result = {}
   1387         tokens = list(self._GetMatchingChar('<', '>'))
   1388         len_tokens = len(tokens) - 1    # Ignore trailing '>'.
   1389         i = 0
   1390         while i < len_tokens:
   1391             key = tokens[i].name
   1392             i += 1
   1393             if keywords.IsKeyword(key) or key == ',':
   1394                 continue
   1395             type_name = default = None
   1396             if i < len_tokens:
   1397                 i += 1
   1398                 if tokens[i-1].name == '=':
   1399                     assert i < len_tokens, '%s %s' % (i, tokens)
   1400                     default, unused_next_token = self.GetName(tokens[i:])
   1401                     i += len(default)
   1402                 else:
   1403                     if tokens[i-1].name != ',':
   1404                         # We got something like: Type variable.
   1405                         # Re-adjust the key (variable) and type_name (Type).
   1406                         key = tokens[i-1].name
   1407                         type_name = tokens[i-2]
   1408 
   1409             result[key] = (type_name, default)
   1410         return result
   1411 
   1412     def handle_template(self):
   1413         token = self._GetNextToken()
   1414         assert token.token_type == tokenize.SYNTAX, token
   1415         assert token.name == '<', token
   1416         templated_types = self._GetTemplatedTypes()
   1417         # TODO(nnorwitz): for now, just ignore the template params.
   1418         token = self._GetNextToken()
   1419         if token.token_type == tokenize.NAME:
   1420             if token.name == 'class':
   1421                 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
   1422             elif token.name == 'struct':
   1423                 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
   1424             elif token.name == 'friend':
   1425                 return self.handle_friend()
   1426         self._AddBackToken(token)
   1427         tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
   1428         tokens.append(last)
   1429         self._AddBackTokens(tokens)
   1430         if last.name == '(':
   1431             return self.GetMethod(FUNCTION_NONE, templated_types)
   1432         # Must be a variable definition.
   1433         return None
   1434 
   1435     def handle_true(self):
   1436         pass  # Nothing to do.
   1437 
   1438     def handle_false(self):
   1439         pass  # Nothing to do.
   1440 
   1441     def handle_asm(self):
   1442         pass  # Not needed yet.
   1443 
   1444     def handle_class(self):
   1445         return self._GetClass(Class, VISIBILITY_PRIVATE, None)
   1446 
   1447     def _GetBases(self):
   1448         # Get base classes.
   1449         bases = []
   1450         while 1:
   1451             token = self._GetNextToken()
   1452             assert token.token_type == tokenize.NAME, token
   1453             # TODO(nnorwitz): store kind of inheritance...maybe.
   1454             if token.name not in ('public', 'protected', 'private'):
   1455                 # If inheritance type is not specified, it is private.
   1456                 # Just put the token back so we can form a name.
   1457                 # TODO(nnorwitz): it would be good to warn about this.
   1458                 self._AddBackToken(token)
   1459             else:
   1460                 # Check for virtual inheritance.
   1461                 token = self._GetNextToken()
   1462                 if token.name != 'virtual':
   1463                     self._AddBackToken(token)
   1464                 else:
   1465                     # TODO(nnorwitz): store that we got virtual for this base.
   1466                     pass
   1467             base, next_token = self.GetName()
   1468             bases_ast = self.converter.ToType(base)
   1469             assert len(bases_ast) == 1, bases_ast
   1470             bases.append(bases_ast[0])
   1471             assert next_token.token_type == tokenize.SYNTAX, next_token
   1472             if next_token.name == '{':
   1473                 token = next_token
   1474                 break
   1475             # Support multiple inheritance.
   1476             assert next_token.name == ',', next_token
   1477         return bases, token
   1478 
   1479     def _GetClass(self, class_type, visibility, templated_types):
   1480         class_name = None
   1481         class_token = self._GetNextToken()
   1482         if class_token.token_type != tokenize.NAME:
   1483             assert class_token.token_type == tokenize.SYNTAX, class_token
   1484             token = class_token
   1485         else:
   1486             # Skip any macro (e.g. storage class specifiers) after the
   1487             # 'class' keyword.
   1488             next_token = self._GetNextToken()
   1489             if next_token.token_type == tokenize.NAME:
   1490                 self._AddBackToken(next_token)
   1491             else:
   1492                 self._AddBackTokens([class_token, next_token])
   1493             name_tokens, token = self.GetName()
   1494             class_name = ''.join([t.name for t in name_tokens])
   1495         bases = None
   1496         if token.token_type == tokenize.SYNTAX:
   1497             if token.name == ';':
   1498                 # Forward declaration.
   1499                 return class_type(class_token.start, class_token.end,
   1500                                   class_name, None, templated_types, None,
   1501                                   self.namespace_stack)
   1502             if token.name in '*&':
   1503                 # Inline forward declaration.  Could be method or data.
   1504                 name_token = self._GetNextToken()
   1505                 next_token = self._GetNextToken()
   1506                 if next_token.name == ';':
   1507                     # Handle data
   1508                     modifiers = ['class']
   1509                     return self._CreateVariable(class_token, name_token.name,
   1510                                                 class_name,
   1511                                                 modifiers, token.name, None)
   1512                 else:
   1513                     # Assume this is a method.
   1514                     tokens = (class_token, token, name_token, next_token)
   1515                     self._AddBackTokens(tokens)
   1516                     return self.GetMethod(FUNCTION_NONE, None)
   1517             if token.name == ':':
   1518                 bases, token = self._GetBases()
   1519 
   1520         body = None
   1521         if token.token_type == tokenize.SYNTAX and token.name == '{':
   1522             assert token.token_type == tokenize.SYNTAX, token
   1523             assert token.name == '{', token
   1524 
   1525             ast = AstBuilder(self.GetScope(), self.filename, class_name,
   1526                              visibility, self.namespace_stack)
   1527             body = list(ast.Generate())
   1528 
   1529             if not self._handling_typedef:
   1530                 token = self._GetNextToken()
   1531                 if token.token_type != tokenize.NAME:
   1532                     assert token.token_type == tokenize.SYNTAX, token
   1533                     assert token.name == ';', token
   1534                 else:
   1535                     new_class = class_type(class_token.start, class_token.end,
   1536                                            class_name, bases, None,
   1537                                            body, self.namespace_stack)
   1538 
   1539                     modifiers = []
   1540                     return self._CreateVariable(class_token,
   1541                                                 token.name, new_class,
   1542                                                 modifiers, token.name, None)
   1543         else:
   1544             if not self._handling_typedef:
   1545                 self.HandleError('non-typedef token', token)
   1546             self._AddBackToken(token)
   1547 
   1548         return class_type(class_token.start, class_token.end, class_name,
   1549                           bases, None, body, self.namespace_stack)
   1550 
   1551     def handle_namespace(self):
   1552         token = self._GetNextToken()
   1553         # Support anonymous namespaces.
   1554         name = None
   1555         if token.token_type == tokenize.NAME:
   1556             name = token.name
   1557             token = self._GetNextToken()
   1558         self.namespace_stack.append(name)
   1559         assert token.token_type == tokenize.SYNTAX, token
   1560         # Create an internal token that denotes when the namespace is complete.
   1561         internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
   1562                                         None, None)
   1563         internal_token.whence = token.whence
   1564         if token.name == '=':
   1565             # TODO(nnorwitz): handle aliasing namespaces.
   1566             name, next_token = self.GetName()
   1567             assert next_token.name == ';', next_token
   1568             self._AddBackToken(internal_token)
   1569         else:
   1570             assert token.name == '{', token
   1571             tokens = list(self.GetScope())
   1572             # Replace the trailing } with the internal namespace pop token.
   1573             tokens[-1] = internal_token
   1574             # Handle namespace with nothing in it.
   1575             self._AddBackTokens(tokens)
   1576         return None
   1577 
   1578     def handle_using(self):
   1579         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1580         assert tokens
   1581         return Using(tokens[0].start, tokens[0].end, tokens)
   1582 
   1583     def handle_explicit(self):
   1584         assert self.in_class
   1585         # Nothing much to do.
   1586         # TODO(nnorwitz): maybe verify the method name == class name.
   1587         # This must be a ctor.
   1588         return self.GetMethod(FUNCTION_CTOR, None)
   1589 
   1590     def handle_this(self):
   1591         pass  # Nothing to do.
   1592 
   1593     def handle_operator(self):
   1594         # Pull off the next token(s?) and make that part of the method name.
   1595         pass
   1596 
   1597     def handle_sizeof(self):
   1598         pass
   1599 
   1600     def handle_case(self):
   1601         pass
   1602 
   1603     def handle_switch(self):
   1604         pass
   1605 
   1606     def handle_default(self):
   1607         token = self._GetNextToken()
   1608         assert token.token_type == tokenize.SYNTAX
   1609         assert token.name == ':'
   1610 
   1611     def handle_if(self):
   1612         pass
   1613 
   1614     def handle_else(self):
   1615         pass
   1616 
   1617     def handle_return(self):
   1618         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1619         if not tokens:
   1620             return Return(self.current_token.start, self.current_token.end, None)
   1621         return Return(tokens[0].start, tokens[0].end, tokens)
   1622 
   1623     def handle_goto(self):
   1624         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1625         assert len(tokens) == 1, str(tokens)
   1626         return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
   1627 
   1628     def handle_try(self):
   1629         pass  # Not needed yet.
   1630 
   1631     def handle_catch(self):
   1632         pass  # Not needed yet.
   1633 
   1634     def handle_throw(self):
   1635         pass  # Not needed yet.
   1636 
   1637     def handle_while(self):
   1638         pass
   1639 
   1640     def handle_do(self):
   1641         pass
   1642 
   1643     def handle_for(self):
   1644         pass
   1645 
   1646     def handle_break(self):
   1647         self._IgnoreUpTo(tokenize.SYNTAX, ';')
   1648 
   1649     def handle_continue(self):
   1650         self._IgnoreUpTo(tokenize.SYNTAX, ';')
   1651 
   1652 
   1653 def BuilderFromSource(source, filename):
   1654     """Utility method that returns an AstBuilder from source code.
   1655 
   1656     Args:
   1657       source: 'C++ source code'
   1658       filename: 'file1'
   1659 
   1660     Returns:
   1661       AstBuilder
   1662     """
   1663     return AstBuilder(tokenize.GetTokens(source), filename)
   1664 
   1665 
   1666 def PrintIndentifiers(filename, should_print):
   1667     """Prints all identifiers for a C++ source file.
   1668 
   1669     Args:
   1670       filename: 'file1'
   1671       should_print: predicate with signature: bool Function(token)
   1672     """
   1673     source = utils.ReadFile(filename, False)
   1674     if source is None:
   1675         sys.stderr.write('Unable to find: %s\n' % filename)
   1676         return
   1677 
   1678     #print('Processing %s' % actual_filename)
   1679     builder = BuilderFromSource(source, filename)
   1680     try:
   1681         for node in builder.Generate():
   1682             if should_print(node):
   1683                 print(node.name)
   1684     except KeyboardInterrupt:
   1685         return
   1686     except:
   1687         pass
   1688 
   1689 
   1690 def PrintAllIndentifiers(filenames, should_print):
   1691     """Prints all identifiers for each C++ source file in filenames.
   1692 
   1693     Args:
   1694       filenames: ['file1', 'file2', ...]
   1695       should_print: predicate with signature: bool Function(token)
   1696     """
   1697     for path in filenames:
   1698         PrintIndentifiers(path, should_print)
   1699 
   1700 
   1701 def main(argv):
   1702     for filename in argv[1:]:
   1703         source = utils.ReadFile(filename)
   1704         if source is None:
   1705             continue
   1706 
   1707         print('Processing %s' % filename)
   1708         builder = BuilderFromSource(source, filename)
   1709         try:
   1710             entire_ast = filter(None, builder.Generate())
   1711         except KeyboardInterrupt:
   1712             return
   1713         except:
   1714             # Already printed a warning, print the traceback and continue.
   1715             traceback.print_exc()
   1716         else:
   1717             if utils.DEBUG:
   1718                 for ast in entire_ast:
   1719                     print(ast)
   1720 
   1721 
   1722 if __name__ == '__main__':
   1723     main(sys.argv)
   1724