Home | History | Annotate | Download | only in cpp
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2007 Neal Norwitz
      4 # Portions Copyright 2007 Google Inc.
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #      http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 
     18 """Generate an Abstract Syntax Tree (AST) for C++."""
     19 
     20 __author__ = 'nnorwitz (at] google.com (Neal Norwitz)'
     21 
     22 
     23 # TODO:
     24 #  * Tokens should never be exported, need to convert to Nodes
     25 #    (return types, parameters, etc.)
     26 #  * Handle static class data for templatized classes
     27 #  * Handle casts (both C++ and C-style)
     28 #  * Handle conditions and loops (if/else, switch, for, while/do)
     29 #
     30 # TODO much, much later:
     31 #  * Handle #define
     32 #  * exceptions
     33 
     34 
     35 try:
     36     # Python 3.x
     37     import builtins
     38 except ImportError:
     39     # Python 2.x
     40     import __builtin__ as builtins
     41 
     42 import sys
     43 import traceback
     44 
     45 from cpp import keywords
     46 from cpp import tokenize
     47 from cpp import utils
     48 
     49 
     50 if not hasattr(builtins, 'reversed'):
     51     # Support Python 2.3 and earlier.
     52     def reversed(seq):
     53         for i in range(len(seq)-1, -1, -1):
     54             yield seq[i]
     55 
     56 if not hasattr(builtins, 'next'):
     57     # Support Python 2.5 and earlier.
     58     def next(obj):
     59         return obj.next()
     60 
     61 
     62 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
     63 
     64 FUNCTION_NONE = 0x00
     65 FUNCTION_CONST = 0x01
     66 FUNCTION_VIRTUAL = 0x02
     67 FUNCTION_PURE_VIRTUAL = 0x04
     68 FUNCTION_CTOR = 0x08
     69 FUNCTION_DTOR = 0x10
     70 FUNCTION_ATTRIBUTE = 0x20
     71 FUNCTION_UNKNOWN_ANNOTATION = 0x40
     72 FUNCTION_THROW = 0x80
     73 FUNCTION_OVERRIDE = 0x100
     74 
     75 """
     76 These are currently unused.  Should really handle these properly at some point.
     77 
     78 TYPE_MODIFIER_INLINE   = 0x010000
     79 TYPE_MODIFIER_EXTERN   = 0x020000
     80 TYPE_MODIFIER_STATIC   = 0x040000
     81 TYPE_MODIFIER_CONST    = 0x080000
     82 TYPE_MODIFIER_REGISTER = 0x100000
     83 TYPE_MODIFIER_VOLATILE = 0x200000
     84 TYPE_MODIFIER_MUTABLE  = 0x400000
     85 
     86 TYPE_MODIFIER_MAP = {
     87     'inline': TYPE_MODIFIER_INLINE,
     88     'extern': TYPE_MODIFIER_EXTERN,
     89     'static': TYPE_MODIFIER_STATIC,
     90     'const': TYPE_MODIFIER_CONST,
     91     'register': TYPE_MODIFIER_REGISTER,
     92     'volatile': TYPE_MODIFIER_VOLATILE,
     93     'mutable': TYPE_MODIFIER_MUTABLE,
     94     }
     95 """
     96 
     97 _INTERNAL_TOKEN = 'internal'
     98 _NAMESPACE_POP = 'ns-pop'
     99 
    100 
    101 # TODO(nnorwitz): use this as a singleton for templated_types, etc
    102 # where we don't want to create a new empty dict each time.  It is also const.
    103 class _NullDict(object):
    104     __contains__ = lambda self: False
    105     keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
    106 
    107 
    108 # TODO(nnorwitz): move AST nodes into a separate module.
    109 class Node(object):
    110     """Base AST node."""
    111 
    112     def __init__(self, start, end):
    113         self.start = start
    114         self.end = end
    115 
    116     def IsDeclaration(self):
    117         """Returns bool if this node is a declaration."""
    118         return False
    119 
    120     def IsDefinition(self):
    121         """Returns bool if this node is a definition."""
    122         return False
    123 
    124     def IsExportable(self):
    125         """Returns bool if this node exportable from a header file."""
    126         return False
    127 
    128     def Requires(self, node):
    129         """Does this AST node require the definition of the node passed in?"""
    130         return False
    131 
    132     def XXX__str__(self):
    133         return self._StringHelper(self.__class__.__name__, '')
    134 
    135     def _StringHelper(self, name, suffix):
    136         if not utils.DEBUG:
    137             return '%s(%s)' % (name, suffix)
    138         return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
    139 
    140     def __repr__(self):
    141         return str(self)
    142 
    143 
    144 class Define(Node):
    145     def __init__(self, start, end, name, definition):
    146         Node.__init__(self, start, end)
    147         self.name = name
    148         self.definition = definition
    149 
    150     def __str__(self):
    151         value = '%s %s' % (self.name, self.definition)
    152         return self._StringHelper(self.__class__.__name__, value)
    153 
    154 
    155 class Include(Node):
    156     def __init__(self, start, end, filename, system):
    157         Node.__init__(self, start, end)
    158         self.filename = filename
    159         self.system = system
    160 
    161     def __str__(self):
    162         fmt = '"%s"'
    163         if self.system:
    164             fmt = '<%s>'
    165         return self._StringHelper(self.__class__.__name__, fmt % self.filename)
    166 
    167 
    168 class Goto(Node):
    169     def __init__(self, start, end, label):
    170         Node.__init__(self, start, end)
    171         self.label = label
    172 
    173     def __str__(self):
    174         return self._StringHelper(self.__class__.__name__, str(self.label))
    175 
    176 
    177 class Expr(Node):
    178     def __init__(self, start, end, expr):
    179         Node.__init__(self, start, end)
    180         self.expr = expr
    181 
    182     def Requires(self, node):
    183         # TODO(nnorwitz): impl.
    184         return False
    185 
    186     def __str__(self):
    187         return self._StringHelper(self.__class__.__name__, str(self.expr))
    188 
    189 
    190 class Return(Expr):
    191     pass
    192 
    193 
    194 class Delete(Expr):
    195     pass
    196 
    197 
    198 class Friend(Expr):
    199     def __init__(self, start, end, expr, namespace):
    200         Expr.__init__(self, start, end, expr)
    201         self.namespace = namespace[:]
    202 
    203 
    204 class Using(Node):
    205     def __init__(self, start, end, names):
    206         Node.__init__(self, start, end)
    207         self.names = names
    208 
    209     def __str__(self):
    210         return self._StringHelper(self.__class__.__name__, str(self.names))
    211 
    212 
    213 class Parameter(Node):
    214     def __init__(self, start, end, name, parameter_type, default):
    215         Node.__init__(self, start, end)
    216         self.name = name
    217         self.type = parameter_type
    218         self.default = default
    219 
    220     def Requires(self, node):
    221         # TODO(nnorwitz): handle namespaces, etc.
    222         return self.type.name == node.name
    223 
    224     def __str__(self):
    225         name = str(self.type)
    226         suffix = '%s %s' % (name, self.name)
    227         if self.default:
    228             suffix += ' = ' + ''.join([d.name for d in self.default])
    229         return self._StringHelper(self.__class__.__name__, suffix)
    230 
    231 
    232 class _GenericDeclaration(Node):
    233     def __init__(self, start, end, name, namespace):
    234         Node.__init__(self, start, end)
    235         self.name = name
    236         self.namespace = namespace[:]
    237 
    238     def FullName(self):
    239         prefix = ''
    240         if self.namespace and self.namespace[-1]:
    241             prefix = '::'.join(self.namespace) + '::'
    242         return prefix + self.name
    243 
    244     def _TypeStringHelper(self, suffix):
    245         if self.namespace:
    246             names = [n or '<anonymous>' for n in self.namespace]
    247             suffix += ' in ' + '::'.join(names)
    248         return self._StringHelper(self.__class__.__name__, suffix)
    249 
    250 
    251 # TODO(nnorwitz): merge with Parameter in some way?
    252 class VariableDeclaration(_GenericDeclaration):
    253     def __init__(self, start, end, name, var_type, initial_value, namespace):
    254         _GenericDeclaration.__init__(self, start, end, name, namespace)
    255         self.type = var_type
    256         self.initial_value = initial_value
    257 
    258     def Requires(self, node):
    259         # TODO(nnorwitz): handle namespaces, etc.
    260         return self.type.name == node.name
    261 
    262     def ToString(self):
    263         """Return a string that tries to reconstitute the variable decl."""
    264         suffix = '%s %s' % (self.type, self.name)
    265         if self.initial_value:
    266             suffix += ' = ' + self.initial_value
    267         return suffix
    268 
    269     def __str__(self):
    270         return self._StringHelper(self.__class__.__name__, self.ToString())
    271 
    272 
    273 class Typedef(_GenericDeclaration):
    274     def __init__(self, start, end, name, alias, namespace):
    275         _GenericDeclaration.__init__(self, start, end, name, namespace)
    276         self.alias = alias
    277 
    278     def IsDefinition(self):
    279         return True
    280 
    281     def IsExportable(self):
    282         return True
    283 
    284     def Requires(self, node):
    285         # TODO(nnorwitz): handle namespaces, etc.
    286         name = node.name
    287         for token in self.alias:
    288             if token is not None and name == token.name:
    289                 return True
    290         return False
    291 
    292     def __str__(self):
    293         suffix = '%s, %s' % (self.name, self.alias)
    294         return self._TypeStringHelper(suffix)
    295 
    296 
    297 class _NestedType(_GenericDeclaration):
    298     def __init__(self, start, end, name, fields, namespace):
    299         _GenericDeclaration.__init__(self, start, end, name, namespace)
    300         self.fields = fields
    301 
    302     def IsDefinition(self):
    303         return True
    304 
    305     def IsExportable(self):
    306         return True
    307 
    308     def __str__(self):
    309         suffix = '%s, {%s}' % (self.name, self.fields)
    310         return self._TypeStringHelper(suffix)
    311 
    312 
    313 class Union(_NestedType):
    314     pass
    315 
    316 
    317 class Enum(_NestedType):
    318     pass
    319 
    320 
    321 class Class(_GenericDeclaration):
    322     def __init__(self, start, end, name, bases, templated_types, body, namespace):
    323         _GenericDeclaration.__init__(self, start, end, name, namespace)
    324         self.bases = bases
    325         self.body = body
    326         self.templated_types = templated_types
    327 
    328     def IsDeclaration(self):
    329         return self.bases is None and self.body is None
    330 
    331     def IsDefinition(self):
    332         return not self.IsDeclaration()
    333 
    334     def IsExportable(self):
    335         return not self.IsDeclaration()
    336 
    337     def Requires(self, node):
    338         # TODO(nnorwitz): handle namespaces, etc.
    339         if self.bases:
    340             for token_list in self.bases:
    341                 # TODO(nnorwitz): bases are tokens, do name comparision.
    342                 for token in token_list:
    343                     if token.name == node.name:
    344                         return True
    345         # TODO(nnorwitz): search in body too.
    346         return False
    347 
    348     def __str__(self):
    349         name = self.name
    350         if self.templated_types:
    351             name += '<%s>' % self.templated_types
    352         suffix = '%s, %s, %s' % (name, self.bases, self.body)
    353         return self._TypeStringHelper(suffix)
    354 
    355 
    356 class Struct(Class):
    357     pass
    358 
    359 
    360 class Function(_GenericDeclaration):
    361     def __init__(self, start, end, name, return_type, parameters,
    362                  modifiers, templated_types, body, namespace):
    363         _GenericDeclaration.__init__(self, start, end, name, namespace)
    364         converter = TypeConverter(namespace)
    365         self.return_type = converter.CreateReturnType(return_type)
    366         self.parameters = converter.ToParameters(parameters)
    367         self.modifiers = modifiers
    368         self.body = body
    369         self.templated_types = templated_types
    370 
    371     def IsDeclaration(self):
    372         return self.body is None
    373 
    374     def IsDefinition(self):
    375         return self.body is not None
    376 
    377     def IsExportable(self):
    378         if self.return_type and 'static' in self.return_type.modifiers:
    379             return False
    380         return None not in self.namespace
    381 
    382     def Requires(self, node):
    383         if self.parameters:
    384             # TODO(nnorwitz): parameters are tokens, do name comparision.
    385             for p in self.parameters:
    386                 if p.name == node.name:
    387                     return True
    388         # TODO(nnorwitz): search in body too.
    389         return False
    390 
    391     def __str__(self):
    392         # TODO(nnorwitz): add templated_types.
    393         suffix = ('%s %s(%s), 0x%02x, %s' %
    394                   (self.return_type, self.name, self.parameters,
    395                    self.modifiers, self.body))
    396         return self._TypeStringHelper(suffix)
    397 
    398 
    399 class Method(Function):
    400     def __init__(self, start, end, name, in_class, return_type, parameters,
    401                  modifiers, templated_types, body, namespace):
    402         Function.__init__(self, start, end, name, return_type, parameters,
    403                           modifiers, templated_types, body, namespace)
    404         # TODO(nnorwitz): in_class could also be a namespace which can
    405         # mess up finding functions properly.
    406         self.in_class = in_class
    407 
    408 
    409 class Type(_GenericDeclaration):
    410     """Type used for any variable (eg class, primitive, struct, etc)."""
    411 
    412     def __init__(self, start, end, name, templated_types, modifiers,
    413                  reference, pointer, array):
    414         """
    415         Args:
    416           name: str name of main type
    417           templated_types: [Class (Type?)] template type info between <>
    418           modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
    419           reference, pointer, array: bools
    420         """
    421         _GenericDeclaration.__init__(self, start, end, name, [])
    422         self.templated_types = templated_types
    423         if not name and modifiers:
    424             self.name = modifiers.pop()
    425         self.modifiers = modifiers
    426         self.reference = reference
    427         self.pointer = pointer
    428         self.array = array
    429 
    430     def __str__(self):
    431         prefix = ''
    432         if self.modifiers:
    433             prefix = ' '.join(self.modifiers) + ' '
    434         name = str(self.name)
    435         if self.templated_types:
    436             name += '<%s>' % self.templated_types
    437         suffix = prefix + name
    438         if self.reference:
    439             suffix += '&'
    440         if self.pointer:
    441             suffix += '*'
    442         if self.array:
    443             suffix += '[]'
    444         return self._TypeStringHelper(suffix)
    445 
    446     # By definition, Is* are always False.  A Type can only exist in
    447     # some sort of variable declaration, parameter, or return value.
    448     def IsDeclaration(self):
    449         return False
    450 
    451     def IsDefinition(self):
    452         return False
    453 
    454     def IsExportable(self):
    455         return False
    456 
    457 
    458 class TypeConverter(object):
    459 
    460     def __init__(self, namespace_stack):
    461         self.namespace_stack = namespace_stack
    462 
    463     def _GetTemplateEnd(self, tokens, start):
    464         count = 1
    465         end = start
    466         while 1:
    467             token = tokens[end]
    468             end += 1
    469             if token.name == '<':
    470                 count += 1
    471             elif token.name == '>':
    472                 count -= 1
    473                 if count == 0:
    474                     break
    475         return tokens[start:end-1], end
    476 
    477     def ToType(self, tokens):
    478         """Convert [Token,...] to [Class(...), ] useful for base classes.
    479         For example, code like class Foo : public Bar<x, y> { ... };
    480         the "Bar<x, y>" portion gets converted to an AST.
    481 
    482         Returns:
    483           [Class(...), ...]
    484         """
    485         result = []
    486         name_tokens = []
    487         reference = pointer = array = False
    488 
    489         def AddType(templated_types):
    490             # Partition tokens into name and modifier tokens.
    491             names = []
    492             modifiers = []
    493             for t in name_tokens:
    494                 if keywords.IsKeyword(t.name):
    495                     modifiers.append(t.name)
    496                 else:
    497                     names.append(t.name)
    498             name = ''.join(names)
    499             if name_tokens:
    500                 result.append(Type(name_tokens[0].start, name_tokens[-1].end,
    501                                    name, templated_types, modifiers,
    502                                    reference, pointer, array))
    503             del name_tokens[:]
    504 
    505         i = 0
    506         end = len(tokens)
    507         while i < end:
    508             token = tokens[i]
    509             if token.name == '<':
    510                 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
    511                 AddType(self.ToType(new_tokens))
    512                 # If there is a comma after the template, we need to consume
    513                 # that here otherwise it becomes part of the name.
    514                 i = new_end
    515                 reference = pointer = array = False
    516             elif token.name == ',':
    517                 AddType([])
    518                 reference = pointer = array = False
    519             elif token.name == '*':
    520                 pointer = True
    521             elif token.name == '&':
    522                 reference = True
    523             elif token.name == '[':
    524                pointer = True
    525             elif token.name == ']':
    526                 pass
    527             else:
    528                 name_tokens.append(token)
    529             i += 1
    530 
    531         if name_tokens:
    532             # No '<' in the tokens, just a simple name and no template.
    533             AddType([])
    534         return result
    535 
    536     def DeclarationToParts(self, parts, needs_name_removed):
    537         name = None
    538         default = []
    539         if needs_name_removed:
    540             # Handle default (initial) values properly.
    541             for i, t in enumerate(parts):
    542                 if t.name == '=':
    543                     default = parts[i+1:]
    544                     name = parts[i-1].name
    545                     if name == ']' and parts[i-2].name == '[':
    546                         name = parts[i-3].name
    547                         i -= 1
    548                     parts = parts[:i-1]
    549                     break
    550             else:
    551                 if parts[-1].token_type == tokenize.NAME:
    552                     name = parts.pop().name
    553                 else:
    554                     # TODO(nnorwitz): this is a hack that happens for code like
    555                     # Register(Foo<T>); where it thinks this is a function call
    556                     # but it's actually a declaration.
    557                     name = '???'
    558         modifiers = []
    559         type_name = []
    560         other_tokens = []
    561         templated_types = []
    562         i = 0
    563         end = len(parts)
    564         while i < end:
    565             p = parts[i]
    566             if keywords.IsKeyword(p.name):
    567                 modifiers.append(p.name)
    568             elif p.name == '<':
    569                 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
    570                 templated_types = self.ToType(templated_tokens)
    571                 i = new_end - 1
    572                 # Don't add a spurious :: to data members being initialized.
    573                 next_index = i + 1
    574                 if next_index < end and parts[next_index].name == '::':
    575                     i += 1
    576             elif p.name in ('[', ']', '='):
    577                 # These are handled elsewhere.
    578                 other_tokens.append(p)
    579             elif p.name not in ('*', '&', '>'):
    580                 # Ensure that names have a space between them.
    581                 if (type_name and type_name[-1].token_type == tokenize.NAME and
    582                     p.token_type == tokenize.NAME):
    583                     type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
    584                 type_name.append(p)
    585             else:
    586                 other_tokens.append(p)
    587             i += 1
    588         type_name = ''.join([t.name for t in type_name])
    589         return name, type_name, templated_types, modifiers, default, other_tokens
    590 
    591     def ToParameters(self, tokens):
    592         if not tokens:
    593             return []
    594 
    595         result = []
    596         name = type_name = ''
    597         type_modifiers = []
    598         pointer = reference = array = False
    599         first_token = None
    600         default = []
    601 
    602         def AddParameter(end):
    603             if default:
    604                 del default[0]  # Remove flag.
    605             parts = self.DeclarationToParts(type_modifiers, True)
    606             (name, type_name, templated_types, modifiers,
    607              unused_default, unused_other_tokens) = parts
    608             parameter_type = Type(first_token.start, first_token.end,
    609                                   type_name, templated_types, modifiers,
    610                                   reference, pointer, array)
    611             p = Parameter(first_token.start, end, name,
    612                           parameter_type, default)
    613             result.append(p)
    614 
    615         template_count = 0
    616         for s in tokens:
    617             if not first_token:
    618                 first_token = s
    619             if s.name == '<':
    620                 template_count += 1
    621             elif s.name == '>':
    622                 template_count -= 1
    623             if template_count > 0:
    624                 type_modifiers.append(s)
    625                 continue
    626 
    627             if s.name == ',':
    628                 AddParameter(s.start)
    629                 name = type_name = ''
    630                 type_modifiers = []
    631                 pointer = reference = array = False
    632                 first_token = None
    633                 default = []
    634             elif s.name == '*':
    635                 pointer = True
    636             elif s.name == '&':
    637                 reference = True
    638             elif s.name == '[':
    639                 array = True
    640             elif s.name == ']':
    641                 pass  # Just don't add to type_modifiers.
    642             elif s.name == '=':
    643                 # Got a default value.  Add any value (None) as a flag.
    644                 default.append(None)
    645             elif default:
    646                 default.append(s)
    647             else:
    648                 type_modifiers.append(s)
    649         AddParameter(tokens[-1].end)
    650         return result
    651 
    652     def CreateReturnType(self, return_type_seq):
    653         if not return_type_seq:
    654             return None
    655         start = return_type_seq[0].start
    656         end = return_type_seq[-1].end
    657         _, name, templated_types, modifiers, default, other_tokens = \
    658            self.DeclarationToParts(return_type_seq, False)
    659         names = [n.name for n in other_tokens]
    660         reference = '&' in names
    661         pointer = '*' in names
    662         array = '[' in names
    663         return Type(start, end, name, templated_types, modifiers,
    664                     reference, pointer, array)
    665 
    666     def GetTemplateIndices(self, names):
    667         # names is a list of strings.
    668         start = names.index('<')
    669         end = len(names) - 1
    670         while end > 0:
    671             if names[end] == '>':
    672                 break
    673             end -= 1
    674         return start, end+1
    675 
    676 class AstBuilder(object):
    677     def __init__(self, token_stream, filename, in_class='', visibility=None,
    678                  namespace_stack=[]):
    679         self.tokens = token_stream
    680         self.filename = filename
    681         # TODO(nnorwitz): use a better data structure (deque) for the queue.
    682         # Switching directions of the "queue" improved perf by about 25%.
    683         # Using a deque should be even better since we access from both sides.
    684         self.token_queue = []
    685         self.namespace_stack = namespace_stack[:]
    686         self.in_class = in_class
    687         if in_class is None:
    688             self.in_class_name_only = None
    689         else:
    690             self.in_class_name_only = in_class.split('::')[-1]
    691         self.visibility = visibility
    692         self.in_function = False
    693         self.current_token = None
    694         # Keep the state whether we are currently handling a typedef or not.
    695         self._handling_typedef = False
    696 
    697         self.converter = TypeConverter(self.namespace_stack)
    698 
    699     def HandleError(self, msg, token):
    700         printable_queue = list(reversed(self.token_queue[-20:]))
    701         sys.stderr.write('Got %s in %s @ %s %s\n' %
    702                          (msg, self.filename, token, printable_queue))
    703 
    704     def Generate(self):
    705         while 1:
    706             token = self._GetNextToken()
    707             if not token:
    708                 break
    709 
    710             # Get the next token.
    711             self.current_token = token
    712 
    713             # Dispatch on the next token type.
    714             if token.token_type == _INTERNAL_TOKEN:
    715                 if token.name == _NAMESPACE_POP:
    716                     self.namespace_stack.pop()
    717                 continue
    718 
    719             try:
    720                 result = self._GenerateOne(token)
    721                 if result is not None:
    722                     yield result
    723             except:
    724                 self.HandleError('exception', token)
    725                 raise
    726 
    727     def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
    728                         ref_pointer_name_seq, templated_types, value=None):
    729         reference = '&' in ref_pointer_name_seq
    730         pointer = '*' in ref_pointer_name_seq
    731         array = '[' in ref_pointer_name_seq
    732         var_type = Type(pos_token.start, pos_token.end, type_name,
    733                         templated_types, type_modifiers,
    734                         reference, pointer, array)
    735         return VariableDeclaration(pos_token.start, pos_token.end,
    736                                    name, var_type, value, self.namespace_stack)
    737 
    738     def _GenerateOne(self, token):
    739         if token.token_type == tokenize.NAME:
    740             if (keywords.IsKeyword(token.name) and
    741                 not keywords.IsBuiltinType(token.name)):
    742                 method = getattr(self, 'handle_' + token.name)
    743                 return method()
    744             elif token.name == self.in_class_name_only:
    745                 # The token name is the same as the class, must be a ctor if
    746                 # there is a paren.  Otherwise, it's the return type.
    747                 # Peek ahead to get the next token to figure out which.
    748                 next = self._GetNextToken()
    749                 self._AddBackToken(next)
    750                 if next.token_type == tokenize.SYNTAX and next.name == '(':
    751                     return self._GetMethod([token], FUNCTION_CTOR, None, True)
    752                 # Fall through--handle like any other method.
    753 
    754             # Handle data or function declaration/definition.
    755             syntax = tokenize.SYNTAX
    756             temp_tokens, last_token = \
    757                 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
    758             temp_tokens.insert(0, token)
    759             if last_token.name == '(':
    760                 # If there is an assignment before the paren,
    761                 # this is an expression, not a method.
    762                 expr = bool([e for e in temp_tokens if e.name == '='])
    763                 if expr:
    764                     new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
    765                     temp_tokens.append(last_token)
    766                     temp_tokens.extend(new_temp)
    767                     last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
    768 
    769             if last_token.name == '[':
    770                 # Handle array, this isn't a method, unless it's an operator.
    771                 # TODO(nnorwitz): keep the size somewhere.
    772                 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
    773                 temp_tokens.append(last_token)
    774                 if temp_tokens[-2].name == 'operator':
    775                     temp_tokens.append(self._GetNextToken())
    776                 else:
    777                     temp_tokens2, last_token = \
    778                         self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
    779                     temp_tokens.extend(temp_tokens2)
    780 
    781             if last_token.name == ';':
    782                 # Handle data, this isn't a method.
    783                 parts = self.converter.DeclarationToParts(temp_tokens, True)
    784                 (name, type_name, templated_types, modifiers, default,
    785                  unused_other_tokens) = parts
    786 
    787                 t0 = temp_tokens[0]
    788                 names = [t.name for t in temp_tokens]
    789                 if templated_types:
    790                     start, end = self.converter.GetTemplateIndices(names)
    791                     names = names[:start] + names[end:]
    792                 default = ''.join([t.name for t in default])
    793                 return self._CreateVariable(t0, name, type_name, modifiers,
    794                                             names, templated_types, default)
    795             if last_token.name == '{':
    796                 self._AddBackTokens(temp_tokens[1:])
    797                 self._AddBackToken(last_token)
    798                 method_name = temp_tokens[0].name
    799                 method = getattr(self, 'handle_' + method_name, None)
    800                 if not method:
    801                     # Must be declaring a variable.
    802                     # TODO(nnorwitz): handle the declaration.
    803                     return None
    804                 return method()
    805             return self._GetMethod(temp_tokens, 0, None, False)
    806         elif token.token_type == tokenize.SYNTAX:
    807             if token.name == '~' and self.in_class:
    808                 # Must be a dtor (probably not in method body).
    809                 token = self._GetNextToken()
    810                 # self.in_class can contain A::Name, but the dtor will only
    811                 # be Name.  Make sure to compare against the right value.
    812                 if (token.token_type == tokenize.NAME and
    813                     token.name == self.in_class_name_only):
    814                     return self._GetMethod([token], FUNCTION_DTOR, None, True)
    815             # TODO(nnorwitz): handle a lot more syntax.
    816         elif token.token_type == tokenize.PREPROCESSOR:
    817             # TODO(nnorwitz): handle more preprocessor directives.
    818             # token starts with a #, so remove it and strip whitespace.
    819             name = token.name[1:].lstrip()
    820             if name.startswith('include'):
    821                 # Remove "include".
    822                 name = name[7:].strip()
    823                 assert name
    824                 # Handle #include \<newline> "header-on-second-line.h".
    825                 if name.startswith('\\'):
    826                     name = name[1:].strip()
    827                 assert name[0] in '<"', token
    828                 assert name[-1] in '>"', token
    829                 system = name[0] == '<'
    830                 filename = name[1:-1]
    831                 return Include(token.start, token.end, filename, system)
    832             if name.startswith('define'):
    833                 # Remove "define".
    834                 name = name[6:].strip()
    835                 assert name
    836                 value = ''
    837                 for i, c in enumerate(name):
    838                     if c.isspace():
    839                         value = name[i:].lstrip()
    840                         name = name[:i]
    841                         break
    842                 return Define(token.start, token.end, name, value)
    843             if name.startswith('if') and name[2:3].isspace():
    844                 condition = name[3:].strip()
    845                 if condition.startswith('0') or condition.startswith('(0)'):
    846                     self._SkipIf0Blocks()
    847         return None
    848 
    849     def _GetTokensUpTo(self, expected_token_type, expected_token):
    850         return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
    851 
    852     def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
    853         last_token = self._GetNextToken()
    854         tokens = []
    855         while (last_token.token_type != expected_token_type or
    856                last_token.name not in expected_tokens):
    857             tokens.append(last_token)
    858             last_token = self._GetNextToken()
    859         return tokens, last_token
    860 
    861     # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
    862     def _IgnoreUpTo(self, token_type, token):
    863         unused_tokens = self._GetTokensUpTo(token_type, token)
    864 
    865     def _SkipIf0Blocks(self):
    866         count = 1
    867         while 1:
    868             token = self._GetNextToken()
    869             if token.token_type != tokenize.PREPROCESSOR:
    870                 continue
    871 
    872             name = token.name[1:].lstrip()
    873             if name.startswith('endif'):
    874                 count -= 1
    875                 if count == 0:
    876                     break
    877             elif name.startswith('if'):
    878                 count += 1
    879 
    880     def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
    881         if GetNextToken is None:
    882             GetNextToken = self._GetNextToken
    883         # Assumes the current token is open_paren and we will consume
    884         # and return up to the close_paren.
    885         count = 1
    886         token = GetNextToken()
    887         while 1:
    888             if token.token_type == tokenize.SYNTAX:
    889                 if token.name == open_paren:
    890                     count += 1
    891                 elif token.name == close_paren:
    892                     count -= 1
    893                     if count == 0:
    894                         break
    895             yield token
    896             token = GetNextToken()
    897         yield token
    898 
    899     def _GetParameters(self):
    900         return self._GetMatchingChar('(', ')')
    901 
    902     def GetScope(self):
    903         return self._GetMatchingChar('{', '}')
    904 
    905     def _GetNextToken(self):
    906         if self.token_queue:
    907             return self.token_queue.pop()
    908         return next(self.tokens)
    909 
    910     def _AddBackToken(self, token):
    911         if token.whence == tokenize.WHENCE_STREAM:
    912             token.whence = tokenize.WHENCE_QUEUE
    913             self.token_queue.insert(0, token)
    914         else:
    915             assert token.whence == tokenize.WHENCE_QUEUE, token
    916             self.token_queue.append(token)
    917 
    918     def _AddBackTokens(self, tokens):
    919         if tokens:
    920             if tokens[-1].whence == tokenize.WHENCE_STREAM:
    921                 for token in tokens:
    922                     token.whence = tokenize.WHENCE_QUEUE
    923                 self.token_queue[:0] = reversed(tokens)
    924             else:
    925                 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
    926                 self.token_queue.extend(reversed(tokens))
    927 
    928     def GetName(self, seq=None):
    929         """Returns ([tokens], next_token_info)."""
    930         GetNextToken = self._GetNextToken
    931         if seq is not None:
    932             it = iter(seq)
    933             GetNextToken = lambda: next(it)
    934         next_token = GetNextToken()
    935         tokens = []
    936         last_token_was_name = False
    937         while (next_token.token_type == tokenize.NAME or
    938                (next_token.token_type == tokenize.SYNTAX and
    939                 next_token.name in ('::', '<'))):
    940             # Two NAMEs in a row means the identifier should terminate.
    941             # It's probably some sort of variable declaration.
    942             if last_token_was_name and next_token.token_type == tokenize.NAME:
    943                 break
    944             last_token_was_name = next_token.token_type == tokenize.NAME
    945             tokens.append(next_token)
    946             # Handle templated names.
    947             if next_token.name == '<':
    948                 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
    949                 last_token_was_name = True
    950             next_token = GetNextToken()
    951         return tokens, next_token
    952 
    953     def GetMethod(self, modifiers, templated_types):
    954         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
    955         assert len(return_type_and_name) >= 1
    956         return self._GetMethod(return_type_and_name, modifiers, templated_types,
    957                                False)
    958 
    959     def _GetMethod(self, return_type_and_name, modifiers, templated_types,
    960                    get_paren):
    961         template_portion = None
    962         if get_paren:
    963             token = self._GetNextToken()
    964             assert token.token_type == tokenize.SYNTAX, token
    965             if token.name == '<':
    966                 # Handle templatized dtors.
    967                 template_portion = [token]
    968                 template_portion.extend(self._GetMatchingChar('<', '>'))
    969                 token = self._GetNextToken()
    970             assert token.token_type == tokenize.SYNTAX, token
    971             assert token.name == '(', token
    972 
    973         name = return_type_and_name.pop()
    974         # Handle templatized ctors.
    975         if name.name == '>':
    976             index = 1
    977             while return_type_and_name[index].name != '<':
    978                 index += 1
    979             template_portion = return_type_and_name[index:] + [name]
    980             del return_type_and_name[index:]
    981             name = return_type_and_name.pop()
    982         elif name.name == ']':
    983             rt = return_type_and_name
    984             assert rt[-1].name == '[', return_type_and_name
    985             assert rt[-2].name == 'operator', return_type_and_name
    986             name_seq = return_type_and_name[-2:]
    987             del return_type_and_name[-2:]
    988             name = tokenize.Token(tokenize.NAME, 'operator[]',
    989                                   name_seq[0].start, name.end)
    990             # Get the open paren so _GetParameters() below works.
    991             unused_open_paren = self._GetNextToken()
    992 
    993         # TODO(nnorwitz): store template_portion.
    994         return_type = return_type_and_name
    995         indices = name
    996         if return_type:
    997             indices = return_type[0]
    998 
    999         # Force ctor for templatized ctors.
   1000         if name.name == self.in_class and not modifiers:
   1001             modifiers |= FUNCTION_CTOR
   1002         parameters = list(self._GetParameters())
   1003         del parameters[-1]              # Remove trailing ')'.
   1004 
   1005         # Handling operator() is especially weird.
   1006         if name.name == 'operator' and not parameters:
   1007             token = self._GetNextToken()
   1008             assert token.name == '(', token
   1009             parameters = list(self._GetParameters())
   1010             del parameters[-1]          # Remove trailing ')'.
   1011 
   1012         token = self._GetNextToken()
   1013         while token.token_type == tokenize.NAME:
   1014             modifier_token = token
   1015             token = self._GetNextToken()
   1016             if modifier_token.name == 'const':
   1017                 modifiers |= FUNCTION_CONST
   1018             elif modifier_token.name == '__attribute__':
   1019                 # TODO(nnorwitz): handle more __attribute__ details.
   1020                 modifiers |= FUNCTION_ATTRIBUTE
   1021                 assert token.name == '(', token
   1022                 # Consume everything between the (parens).
   1023                 unused_tokens = list(self._GetMatchingChar('(', ')'))
   1024                 token = self._GetNextToken()
   1025             elif modifier_token.name == 'throw':
   1026                 modifiers |= FUNCTION_THROW
   1027                 assert token.name == '(', token
   1028                 # Consume everything between the (parens).
   1029                 unused_tokens = list(self._GetMatchingChar('(', ')'))
   1030                 token = self._GetNextToken()
   1031             elif modifier_token.name == 'override':
   1032                 modifiers |= FUNCTION_OVERRIDE
   1033             elif modifier_token.name == modifier_token.name.upper():
   1034                 # HACK(nnorwitz):  assume that all upper-case names
   1035                 # are some macro we aren't expanding.
   1036                 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
   1037             else:
   1038                 self.HandleError('unexpected token', modifier_token)
   1039 
   1040         assert token.token_type == tokenize.SYNTAX, token
   1041         # Handle ctor initializers.
   1042         if token.name == ':':
   1043             # TODO(nnorwitz): anything else to handle for initializer list?
   1044             while token.name != ';' and token.name != '{':
   1045                 token = self._GetNextToken()
   1046 
   1047         # Handle pointer to functions that are really data but look
   1048         # like method declarations.
   1049         if token.name == '(':
   1050             if parameters[0].name == '*':
   1051                 # name contains the return type.
   1052                 name = parameters.pop()
   1053                 # parameters contains the name of the data.
   1054                 modifiers = [p.name for p in parameters]
   1055                 # Already at the ( to open the parameter list.
   1056                 function_parameters = list(self._GetMatchingChar('(', ')'))
   1057                 del function_parameters[-1]  # Remove trailing ')'.
   1058                 # TODO(nnorwitz): store the function_parameters.
   1059                 token = self._GetNextToken()
   1060                 assert token.token_type == tokenize.SYNTAX, token
   1061                 assert token.name == ';', token
   1062                 return self._CreateVariable(indices, name.name, indices.name,
   1063                                             modifiers, '', None)
   1064             # At this point, we got something like:
   1065             #  return_type (type::*name_)(params);
   1066             # This is a data member called name_ that is a function pointer.
   1067             # With this code: void (sq_type::*field_)(string&);
   1068             # We get: name=void return_type=[] parameters=sq_type ... field_
   1069             # TODO(nnorwitz): is return_type always empty?
   1070             # TODO(nnorwitz): this isn't even close to being correct.
   1071             # Just put in something so we don't crash and can move on.
   1072             real_name = parameters[-1]
   1073             modifiers = [p.name for p in self._GetParameters()]
   1074             del modifiers[-1]           # Remove trailing ')'.
   1075             return self._CreateVariable(indices, real_name.name, indices.name,
   1076                                         modifiers, '', None)
   1077 
   1078         if token.name == '{':
   1079             body = list(self.GetScope())
   1080             del body[-1]                # Remove trailing '}'.
   1081         else:
   1082             body = None
   1083             if token.name == '=':
   1084                 token = self._GetNextToken()
   1085 
   1086                 if token.name == 'default' or token.name == 'delete':
   1087                     # Ignore explicitly defaulted and deleted special members
   1088                     # in C++11.
   1089                     token = self._GetNextToken()
   1090                 else:
   1091                     # Handle pure-virtual declarations.
   1092                     assert token.token_type == tokenize.CONSTANT, token
   1093                     assert token.name == '0', token
   1094                     modifiers |= FUNCTION_PURE_VIRTUAL
   1095                     token = self._GetNextToken()
   1096 
   1097             if token.name == '[':
   1098                 # TODO(nnorwitz): store tokens and improve parsing.
   1099                 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
   1100                 tokens = list(self._GetMatchingChar('[', ']'))
   1101                 token = self._GetNextToken()
   1102 
   1103             assert token.name == ';', (token, return_type_and_name, parameters)
   1104 
   1105         # Looks like we got a method, not a function.
   1106         if len(return_type) > 2 and return_type[-1].name == '::':
   1107             return_type, in_class = \
   1108                          self._GetReturnTypeAndClassName(return_type)
   1109             return Method(indices.start, indices.end, name.name, in_class,
   1110                           return_type, parameters, modifiers, templated_types,
   1111                           body, self.namespace_stack)
   1112         return Function(indices.start, indices.end, name.name, return_type,
   1113                         parameters, modifiers, templated_types, body,
   1114                         self.namespace_stack)
   1115 
   1116     def _GetReturnTypeAndClassName(self, token_seq):
   1117         # Splitting the return type from the class name in a method
   1118         # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
   1119         # Where is the return type and where is the class name?
   1120         # The heuristic used is to pull the last name as the class name.
   1121         # This includes all the templated type info.
   1122         # TODO(nnorwitz): if there is only One name like in the
   1123         # example above, punt and assume the last bit is the class name.
   1124 
   1125         # Ignore a :: prefix, if exists so we can find the first real name.
   1126         i = 0
   1127         if token_seq[0].name == '::':
   1128             i = 1
   1129         # Ignore a :: suffix, if exists.
   1130         end = len(token_seq) - 1
   1131         if token_seq[end-1].name == '::':
   1132             end -= 1
   1133 
   1134         # Make a copy of the sequence so we can append a sentinel
   1135         # value. This is required for GetName will has to have some
   1136         # terminating condition beyond the last name.
   1137         seq_copy = token_seq[i:end]
   1138         seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
   1139         names = []
   1140         while i < end:
   1141             # Iterate through the sequence parsing out each name.
   1142             new_name, next = self.GetName(seq_copy[i:])
   1143             assert new_name, 'Got empty new_name, next=%s' % next
   1144             # We got a pointer or ref.  Add it to the name.
   1145             if next and next.token_type == tokenize.SYNTAX:
   1146                 new_name.append(next)
   1147             names.append(new_name)
   1148             i += len(new_name)
   1149 
   1150         # Now that we have the names, it's time to undo what we did.
   1151 
   1152         # Remove the sentinel value.
   1153         names[-1].pop()
   1154         # Flatten the token sequence for the return type.
   1155         return_type = [e for seq in names[:-1] for e in seq]
   1156         # The class name is the last name.
   1157         class_name = names[-1]
   1158         return return_type, class_name
   1159 
   1160     def handle_bool(self):
   1161         pass
   1162 
   1163     def handle_char(self):
   1164         pass
   1165 
   1166     def handle_int(self):
   1167         pass
   1168 
   1169     def handle_long(self):
   1170         pass
   1171 
   1172     def handle_short(self):
   1173         pass
   1174 
   1175     def handle_double(self):
   1176         pass
   1177 
   1178     def handle_float(self):
   1179         pass
   1180 
   1181     def handle_void(self):
   1182         pass
   1183 
   1184     def handle_wchar_t(self):
   1185         pass
   1186 
   1187     def handle_unsigned(self):
   1188         pass
   1189 
   1190     def handle_signed(self):
   1191         pass
   1192 
   1193     def _GetNestedType(self, ctor):
   1194         name = None
   1195         name_tokens, token = self.GetName()
   1196         if name_tokens:
   1197             name = ''.join([t.name for t in name_tokens])
   1198 
   1199         # Handle forward declarations.
   1200         if token.token_type == tokenize.SYNTAX and token.name == ';':
   1201             return ctor(token.start, token.end, name, None,
   1202                         self.namespace_stack)
   1203 
   1204         if token.token_type == tokenize.NAME and self._handling_typedef:
   1205             self._AddBackToken(token)
   1206             return ctor(token.start, token.end, name, None,
   1207                         self.namespace_stack)
   1208 
   1209         # Must be the type declaration.
   1210         fields = list(self._GetMatchingChar('{', '}'))
   1211         del fields[-1]                  # Remove trailing '}'.
   1212         if token.token_type == tokenize.SYNTAX and token.name == '{':
   1213             next = self._GetNextToken()
   1214             new_type = ctor(token.start, token.end, name, fields,
   1215                             self.namespace_stack)
   1216             # A name means this is an anonymous type and the name
   1217             # is the variable declaration.
   1218             if next.token_type != tokenize.NAME:
   1219                 return new_type
   1220             name = new_type
   1221             token = next
   1222 
   1223         # Must be variable declaration using the type prefixed with keyword.
   1224         assert token.token_type == tokenize.NAME, token
   1225         return self._CreateVariable(token, token.name, name, [], '', None)
   1226 
   1227     def handle_struct(self):
   1228         # Special case the handling typedef/aliasing of structs here.
   1229         # It would be a pain to handle in the class code.
   1230         name_tokens, var_token = self.GetName()
   1231         if name_tokens:
   1232             next_token = self._GetNextToken()
   1233             is_syntax = (var_token.token_type == tokenize.SYNTAX and
   1234                          var_token.name[0] in '*&')
   1235             is_variable = (var_token.token_type == tokenize.NAME and
   1236                            next_token.name == ';')
   1237             variable = var_token
   1238             if is_syntax and not is_variable:
   1239                 variable = next_token
   1240                 temp = self._GetNextToken()
   1241                 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
   1242                     # Handle methods declared to return a struct.
   1243                     t0 = name_tokens[0]
   1244                     struct = tokenize.Token(tokenize.NAME, 'struct',
   1245                                             t0.start-7, t0.start-2)
   1246                     type_and_name = [struct]
   1247                     type_and_name.extend(name_tokens)
   1248                     type_and_name.extend((var_token, next_token))
   1249                     return self._GetMethod(type_and_name, 0, None, False)
   1250                 assert temp.name == ';', (temp, name_tokens, var_token)
   1251             if is_syntax or (is_variable and not self._handling_typedef):
   1252                 modifiers = ['struct']
   1253                 type_name = ''.join([t.name for t in name_tokens])
   1254                 position = name_tokens[0]
   1255                 return self._CreateVariable(position, variable.name, type_name,
   1256                                             modifiers, var_token.name, None)
   1257             name_tokens.extend((var_token, next_token))
   1258             self._AddBackTokens(name_tokens)
   1259         else:
   1260             self._AddBackToken(var_token)
   1261         return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
   1262 
   1263     def handle_union(self):
   1264         return self._GetNestedType(Union)
   1265 
   1266     def handle_enum(self):
   1267         return self._GetNestedType(Enum)
   1268 
   1269     def handle_auto(self):
   1270         # TODO(nnorwitz): warn about using auto?  Probably not since it
   1271         # will be reclaimed and useful for C++0x.
   1272         pass
   1273 
   1274     def handle_register(self):
   1275         pass
   1276 
   1277     def handle_const(self):
   1278         pass
   1279 
   1280     def handle_inline(self):
   1281         pass
   1282 
   1283     def handle_extern(self):
   1284         pass
   1285 
   1286     def handle_static(self):
   1287         pass
   1288 
   1289     def handle_virtual(self):
   1290         # What follows must be a method.
   1291         token = token2 = self._GetNextToken()
   1292         if token.name == 'inline':
   1293             # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
   1294             token2 = self._GetNextToken()
   1295         if token2.token_type == tokenize.SYNTAX and token2.name == '~':
   1296             return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
   1297         assert token.token_type == tokenize.NAME or token.name == '::', token
   1298         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')  # )
   1299         return_type_and_name.insert(0, token)
   1300         if token2 is not token:
   1301             return_type_and_name.insert(1, token2)
   1302         return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
   1303                                None, False)
   1304 
   1305     def handle_volatile(self):
   1306         pass
   1307 
   1308     def handle_mutable(self):
   1309         pass
   1310 
   1311     def handle_public(self):
   1312         assert self.in_class
   1313         self.visibility = VISIBILITY_PUBLIC
   1314 
   1315     def handle_protected(self):
   1316         assert self.in_class
   1317         self.visibility = VISIBILITY_PROTECTED
   1318 
   1319     def handle_private(self):
   1320         assert self.in_class
   1321         self.visibility = VISIBILITY_PRIVATE
   1322 
   1323     def handle_friend(self):
   1324         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1325         assert tokens
   1326         t0 = tokens[0]
   1327         return Friend(t0.start, t0.end, tokens, self.namespace_stack)
   1328 
   1329     def handle_static_cast(self):
   1330         pass
   1331 
   1332     def handle_const_cast(self):
   1333         pass
   1334 
   1335     def handle_dynamic_cast(self):
   1336         pass
   1337 
   1338     def handle_reinterpret_cast(self):
   1339         pass
   1340 
   1341     def handle_new(self):
   1342         pass
   1343 
   1344     def handle_delete(self):
   1345         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1346         assert tokens
   1347         return Delete(tokens[0].start, tokens[0].end, tokens)
   1348 
   1349     def handle_typedef(self):
   1350         token = self._GetNextToken()
   1351         if (token.token_type == tokenize.NAME and
   1352             keywords.IsKeyword(token.name)):
   1353             # Token must be struct/enum/union/class.
   1354             method = getattr(self, 'handle_' + token.name)
   1355             self._handling_typedef = True
   1356             tokens = [method()]
   1357             self._handling_typedef = False
   1358         else:
   1359             tokens = [token]
   1360 
   1361         # Get the remainder of the typedef up to the semi-colon.
   1362         tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
   1363 
   1364         # TODO(nnorwitz): clean all this up.
   1365         assert tokens
   1366         name = tokens.pop()
   1367         indices = name
   1368         if tokens:
   1369             indices = tokens[0]
   1370         if not indices:
   1371             indices = token
   1372         if name.name == ')':
   1373             # HACK(nnorwitz): Handle pointers to functions "properly".
   1374             if (len(tokens) >= 4 and
   1375                 tokens[1].name == '(' and tokens[2].name == '*'):
   1376                 tokens.append(name)
   1377                 name = tokens[3]
   1378         elif name.name == ']':
   1379             # HACK(nnorwitz): Handle arrays properly.
   1380             if len(tokens) >= 2:
   1381                 tokens.append(name)
   1382                 name = tokens[1]
   1383         new_type = tokens
   1384         if tokens and isinstance(tokens[0], tokenize.Token):
   1385             new_type = self.converter.ToType(tokens)[0]
   1386         return Typedef(indices.start, indices.end, name.name,
   1387                        new_type, self.namespace_stack)
   1388 
   1389     def handle_typeid(self):
   1390         pass  # Not needed yet.
   1391 
   1392     def handle_typename(self):
   1393         pass  # Not needed yet.
   1394 
   1395     def _GetTemplatedTypes(self):
   1396         result = {}
   1397         tokens = list(self._GetMatchingChar('<', '>'))
   1398         len_tokens = len(tokens) - 1    # Ignore trailing '>'.
   1399         i = 0
   1400         while i < len_tokens:
   1401             key = tokens[i].name
   1402             i += 1
   1403             if keywords.IsKeyword(key) or key == ',':
   1404                 continue
   1405             type_name = default = None
   1406             if i < len_tokens:
   1407                 i += 1
   1408                 if tokens[i-1].name == '=':
   1409                     assert i < len_tokens, '%s %s' % (i, tokens)
   1410                     default, unused_next_token = self.GetName(tokens[i:])
   1411                     i += len(default)
   1412                 else:
   1413                     if tokens[i-1].name != ',':
   1414                         # We got something like: Type variable.
   1415                         # Re-adjust the key (variable) and type_name (Type).
   1416                         key = tokens[i-1].name
   1417                         type_name = tokens[i-2]
   1418 
   1419             result[key] = (type_name, default)
   1420         return result
   1421 
   1422     def handle_template(self):
   1423         token = self._GetNextToken()
   1424         assert token.token_type == tokenize.SYNTAX, token
   1425         assert token.name == '<', token
   1426         templated_types = self._GetTemplatedTypes()
   1427         # TODO(nnorwitz): for now, just ignore the template params.
   1428         token = self._GetNextToken()
   1429         if token.token_type == tokenize.NAME:
   1430             if token.name == 'class':
   1431                 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
   1432             elif token.name == 'struct':
   1433                 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
   1434             elif token.name == 'friend':
   1435                 return self.handle_friend()
   1436         self._AddBackToken(token)
   1437         tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
   1438         tokens.append(last)
   1439         self._AddBackTokens(tokens)
   1440         if last.name == '(':
   1441             return self.GetMethod(FUNCTION_NONE, templated_types)
   1442         # Must be a variable definition.
   1443         return None
   1444 
   1445     def handle_true(self):
   1446         pass  # Nothing to do.
   1447 
   1448     def handle_false(self):
   1449         pass  # Nothing to do.
   1450 
   1451     def handle_asm(self):
   1452         pass  # Not needed yet.
   1453 
   1454     def handle_class(self):
   1455         return self._GetClass(Class, VISIBILITY_PRIVATE, None)
   1456 
   1457     def _GetBases(self):
   1458         # Get base classes.
   1459         bases = []
   1460         while 1:
   1461             token = self._GetNextToken()
   1462             assert token.token_type == tokenize.NAME, token
   1463             # TODO(nnorwitz): store kind of inheritance...maybe.
   1464             if token.name not in ('public', 'protected', 'private'):
   1465                 # If inheritance type is not specified, it is private.
   1466                 # Just put the token back so we can form a name.
   1467                 # TODO(nnorwitz): it would be good to warn about this.
   1468                 self._AddBackToken(token)
   1469             else:
   1470                 # Check for virtual inheritance.
   1471                 token = self._GetNextToken()
   1472                 if token.name != 'virtual':
   1473                     self._AddBackToken(token)
   1474                 else:
   1475                     # TODO(nnorwitz): store that we got virtual for this base.
   1476                     pass
   1477             base, next_token = self.GetName()
   1478             bases_ast = self.converter.ToType(base)
   1479             assert len(bases_ast) == 1, bases_ast
   1480             bases.append(bases_ast[0])
   1481             assert next_token.token_type == tokenize.SYNTAX, next_token
   1482             if next_token.name == '{':
   1483                 token = next_token
   1484                 break
   1485             # Support multiple inheritance.
   1486             assert next_token.name == ',', next_token
   1487         return bases, token
   1488 
   1489     def _GetClass(self, class_type, visibility, templated_types):
   1490         class_name = None
   1491         class_token = self._GetNextToken()
   1492         if class_token.token_type != tokenize.NAME:
   1493             assert class_token.token_type == tokenize.SYNTAX, class_token
   1494             token = class_token
   1495         else:
   1496             # Skip any macro (e.g. storage class specifiers) after the
   1497             # 'class' keyword.
   1498             next_token = self._GetNextToken()
   1499             if next_token.token_type == tokenize.NAME:
   1500                 self._AddBackToken(next_token)
   1501             else:
   1502                 self._AddBackTokens([class_token, next_token])
   1503             name_tokens, token = self.GetName()
   1504             class_name = ''.join([t.name for t in name_tokens])
   1505         bases = None
   1506         if token.token_type == tokenize.SYNTAX:
   1507             if token.name == ';':
   1508                 # Forward declaration.
   1509                 return class_type(class_token.start, class_token.end,
   1510                                   class_name, None, templated_types, None,
   1511                                   self.namespace_stack)
   1512             if token.name in '*&':
   1513                 # Inline forward declaration.  Could be method or data.
   1514                 name_token = self._GetNextToken()
   1515                 next_token = self._GetNextToken()
   1516                 if next_token.name == ';':
   1517                     # Handle data
   1518                     modifiers = ['class']
   1519                     return self._CreateVariable(class_token, name_token.name,
   1520                                                 class_name,
   1521                                                 modifiers, token.name, None)
   1522                 else:
   1523                     # Assume this is a method.
   1524                     tokens = (class_token, token, name_token, next_token)
   1525                     self._AddBackTokens(tokens)
   1526                     return self.GetMethod(FUNCTION_NONE, None)
   1527             if token.name == ':':
   1528                 bases, token = self._GetBases()
   1529 
   1530         body = None
   1531         if token.token_type == tokenize.SYNTAX and token.name == '{':
   1532             assert token.token_type == tokenize.SYNTAX, token
   1533             assert token.name == '{', token
   1534 
   1535             ast = AstBuilder(self.GetScope(), self.filename, class_name,
   1536                              visibility, self.namespace_stack)
   1537             body = list(ast.Generate())
   1538 
   1539             if not self._handling_typedef:
   1540                 token = self._GetNextToken()
   1541                 if token.token_type != tokenize.NAME:
   1542                     assert token.token_type == tokenize.SYNTAX, token
   1543                     assert token.name == ';', token
   1544                 else:
   1545                     new_class = class_type(class_token.start, class_token.end,
   1546                                            class_name, bases, None,
   1547                                            body, self.namespace_stack)
   1548 
   1549                     modifiers = []
   1550                     return self._CreateVariable(class_token,
   1551                                                 token.name, new_class,
   1552                                                 modifiers, token.name, None)
   1553         else:
   1554             if not self._handling_typedef:
   1555                 self.HandleError('non-typedef token', token)
   1556             self._AddBackToken(token)
   1557 
   1558         return class_type(class_token.start, class_token.end, class_name,
   1559                           bases, templated_types, body, self.namespace_stack)
   1560 
   1561     def handle_namespace(self):
   1562         token = self._GetNextToken()
   1563         # Support anonymous namespaces.
   1564         name = None
   1565         if token.token_type == tokenize.NAME:
   1566             name = token.name
   1567             token = self._GetNextToken()
   1568         self.namespace_stack.append(name)
   1569         assert token.token_type == tokenize.SYNTAX, token
   1570         # Create an internal token that denotes when the namespace is complete.
   1571         internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
   1572                                         None, None)
   1573         internal_token.whence = token.whence
   1574         if token.name == '=':
   1575             # TODO(nnorwitz): handle aliasing namespaces.
   1576             name, next_token = self.GetName()
   1577             assert next_token.name == ';', next_token
   1578             self._AddBackToken(internal_token)
   1579         else:
   1580             assert token.name == '{', token
   1581             tokens = list(self.GetScope())
   1582             # Replace the trailing } with the internal namespace pop token.
   1583             tokens[-1] = internal_token
   1584             # Handle namespace with nothing in it.
   1585             self._AddBackTokens(tokens)
   1586         return None
   1587 
   1588     def handle_using(self):
   1589         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1590         assert tokens
   1591         return Using(tokens[0].start, tokens[0].end, tokens)
   1592 
   1593     def handle_explicit(self):
   1594         assert self.in_class
   1595         # Nothing much to do.
   1596         # TODO(nnorwitz): maybe verify the method name == class name.
   1597         # This must be a ctor.
   1598         return self.GetMethod(FUNCTION_CTOR, None)
   1599 
   1600     def handle_this(self):
   1601         pass  # Nothing to do.
   1602 
   1603     def handle_operator(self):
   1604         # Pull off the next token(s?) and make that part of the method name.
   1605         pass
   1606 
   1607     def handle_sizeof(self):
   1608         pass
   1609 
   1610     def handle_case(self):
   1611         pass
   1612 
   1613     def handle_switch(self):
   1614         pass
   1615 
   1616     def handle_default(self):
   1617         token = self._GetNextToken()
   1618         assert token.token_type == tokenize.SYNTAX
   1619         assert token.name == ':'
   1620 
   1621     def handle_if(self):
   1622         pass
   1623 
   1624     def handle_else(self):
   1625         pass
   1626 
   1627     def handle_return(self):
   1628         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1629         if not tokens:
   1630             return Return(self.current_token.start, self.current_token.end, None)
   1631         return Return(tokens[0].start, tokens[0].end, tokens)
   1632 
   1633     def handle_goto(self):
   1634         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
   1635         assert len(tokens) == 1, str(tokens)
   1636         return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
   1637 
   1638     def handle_try(self):
   1639         pass  # Not needed yet.
   1640 
   1641     def handle_catch(self):
   1642         pass  # Not needed yet.
   1643 
   1644     def handle_throw(self):
   1645         pass  # Not needed yet.
   1646 
   1647     def handle_while(self):
   1648         pass
   1649 
   1650     def handle_do(self):
   1651         pass
   1652 
   1653     def handle_for(self):
   1654         pass
   1655 
   1656     def handle_break(self):
   1657         self._IgnoreUpTo(tokenize.SYNTAX, ';')
   1658 
   1659     def handle_continue(self):
   1660         self._IgnoreUpTo(tokenize.SYNTAX, ';')
   1661 
   1662 
   1663 def BuilderFromSource(source, filename):
   1664     """Utility method that returns an AstBuilder from source code.
   1665 
   1666     Args:
   1667       source: 'C++ source code'
   1668       filename: 'file1'
   1669 
   1670     Returns:
   1671       AstBuilder
   1672     """
   1673     return AstBuilder(tokenize.GetTokens(source), filename)
   1674 
   1675 
   1676 def PrintIndentifiers(filename, should_print):
   1677     """Prints all identifiers for a C++ source file.
   1678 
   1679     Args:
   1680       filename: 'file1'
   1681       should_print: predicate with signature: bool Function(token)
   1682     """
   1683     source = utils.ReadFile(filename, False)
   1684     if source is None:
   1685         sys.stderr.write('Unable to find: %s\n' % filename)
   1686         return
   1687 
   1688     #print('Processing %s' % actual_filename)
   1689     builder = BuilderFromSource(source, filename)
   1690     try:
   1691         for node in builder.Generate():
   1692             if should_print(node):
   1693                 print(node.name)
   1694     except KeyboardInterrupt:
   1695         return
   1696     except:
   1697         pass
   1698 
   1699 
   1700 def PrintAllIndentifiers(filenames, should_print):
   1701     """Prints all identifiers for each C++ source file in filenames.
   1702 
   1703     Args:
   1704       filenames: ['file1', 'file2', ...]
   1705       should_print: predicate with signature: bool Function(token)
   1706     """
   1707     for path in filenames:
   1708         PrintIndentifiers(path, should_print)
   1709 
   1710 
   1711 def main(argv):
   1712     for filename in argv[1:]:
   1713         source = utils.ReadFile(filename)
   1714         if source is None:
   1715             continue
   1716 
   1717         print('Processing %s' % filename)
   1718         builder = BuilderFromSource(source, filename)
   1719         try:
   1720             entire_ast = filter(None, builder.Generate())
   1721         except KeyboardInterrupt:
   1722             return
   1723         except:
   1724             # Already printed a warning, print the traceback and continue.
   1725             traceback.print_exc()
   1726         else:
   1727             if utils.DEBUG:
   1728                 for ast in entire_ast:
   1729                     print(ast)
   1730 
   1731 
   1732 if __name__ == '__main__':
   1733     main(sys.argv)
   1734