Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 """A glorified C pre-processor parser."""
      3 
      4 import ctypes
      5 import logging
      6 import os
      7 import re
      8 import site
      9 import unittest
     10 import utils
     11 
     12 top = os.getenv('ANDROID_BUILD_TOP')
     13 if top is None:
     14     utils.panic('ANDROID_BUILD_TOP not set.\n')
     15 
     16 # Set up the env vars for libclang.
     17 site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))
     18 
     19 import clang.cindex
     20 from clang.cindex import conf
     21 from clang.cindex import Cursor
     22 from clang.cindex import CursorKind
     23 from clang.cindex import SourceLocation
     24 from clang.cindex import SourceRange
     25 from clang.cindex import TokenGroup
     26 from clang.cindex import TokenKind
     27 from clang.cindex import TranslationUnit
     28 
     29 # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
     30 # Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
     31 clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64/libclang_android.so'))
     32 
     33 from defaults import kCppUndefinedMacro
     34 from defaults import kernel_remove_config_macros
     35 from defaults import kernel_struct_replacements
     36 from defaults import kernel_token_replacements
     37 
     38 
     39 debugBlockParser = False
     40 debugCppExpr = False
     41 debugOptimIf01 = False
     42 
     43 ###############################################################################
     44 ###############################################################################
     45 #####                                                                     #####
     46 #####           C P P   T O K E N S                                       #####
     47 #####                                                                     #####
     48 ###############################################################################
     49 ###############################################################################
     50 
     51 # the list of supported C-preprocessor tokens
     52 # plus a couple of C tokens as well
     53 tokEOF = "\0"
     54 tokLN = "\n"
     55 tokSTRINGIFY = "#"
     56 tokCONCAT = "##"
     57 tokLOGICAND = "&&"
     58 tokLOGICOR = "||"
     59 tokSHL = "<<"
     60 tokSHR = ">>"
     61 tokEQUAL = "=="
     62 tokNEQUAL = "!="
     63 tokLT = "<"
     64 tokLTE = "<="
     65 tokGT = ">"
     66 tokGTE = ">="
     67 tokELLIPSIS = "..."
     68 tokSPACE = " "
     69 tokDEFINED = "defined"
     70 tokLPAREN = "("
     71 tokRPAREN = ")"
     72 tokNOT = "!"
     73 tokPLUS = "+"
     74 tokMINUS = "-"
     75 tokMULTIPLY = "*"
     76 tokDIVIDE = "/"
     77 tokMODULUS = "%"
     78 tokBINAND = "&"
     79 tokBINOR = "|"
     80 tokBINXOR = "^"
     81 tokCOMMA = ","
     82 tokLBRACE = "{"
     83 tokRBRACE = "}"
     84 tokARROW = "->"
     85 tokINCREMENT = "++"
     86 tokDECREMENT = "--"
     87 tokNUMBER = "<number>"
     88 tokIDENT = "<ident>"
     89 tokSTRING = "<string>"
     90 
     91 
     92 class Token(clang.cindex.Token):
     93     """A class that represents one token after parsing.
     94 
     95     It inherits the class in libclang, with an extra id property to hold the
     96     new spelling of the token. The spelling property in the base class is
     97     defined as read-only. New names after macro instantiation are saved in
     98     their ids now. It also facilitates the renaming of directive optimizations
     99     like replacing 'ifndef X' with 'if !defined(X)'.
    100 
    101     It also overrides the cursor property of the base class. Because the one
    102     in libclang always queries based on a single token, which usually doesn't
    103     hold useful information. The cursor in this class can be set by calling
    104     CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
    105     base class.
    106     """
    107 
    108     def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
    109                  cursor=None):
    110         clang.cindex.Token.__init__(self)
    111         self._id = None
    112         self._tu = tu
    113         self._group = group
    114         self._cursor = cursor
    115         # self.int_data and self.ptr_data are from the base class. But
    116         # self.int_data doesn't accept a None value.
    117         if int_data is not None:
    118             self.int_data = int_data
    119         self.ptr_data = ptr_data
    120 
    121     @property
    122     def id(self):
    123         """Name of the token."""
    124         if self._id is None:
    125             return self.spelling
    126         else:
    127             return self._id
    128 
    129     @id.setter
    130     def id(self, new_id):
    131         """Setting name of the token."""
    132         self._id = new_id
    133 
    134     @property
    135     def cursor(self):
    136         if self._cursor is None:
    137             self._cursor = clang.cindex.Token.cursor
    138         return self._cursor
    139 
    140     @cursor.setter
    141     def cursor(self, new_cursor):
    142         self._cursor = new_cursor
    143 
    144     def __repr__(self):
    145         if self.id == 'defined':
    146             return self.id
    147         elif self.kind == TokenKind.IDENTIFIER:
    148             return "(ident %s)" % self.id
    149 
    150         return self.id
    151 
    152     def __str__(self):
    153         return self.id
    154 
    155 
    156 class BadExpectedToken(Exception):
    157     """An exception that will be raised for unexpected tokens."""
    158     pass
    159 
    160 
    161 # The __contains__ function in libclang SourceRange class contains a bug. It
    162 # gives wrong result when dealing with single line range.
    163 # Bug filed with upstream:
    164 # http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
    165 def SourceRange__contains__(self, other):
    166     """Determine if a given location is inside the range."""
    167     if not isinstance(other, SourceLocation):
    168         return False
    169     if other.file is None and self.start.file is None:
    170         pass
    171     elif (self.start.file.name != other.file.name or
    172           other.file.name != self.end.file.name):
    173         # same file name
    174         return False
    175     # same file, in between lines
    176     if self.start.line < other.line < self.end.line:
    177         return True
    178     # same file, same line
    179     elif self.start.line == other.line == self.end.line:
    180         if self.start.column <= other.column <= self.end.column:
    181             return True
    182     elif self.start.line == other.line:
    183         # same file first line
    184         if self.start.column <= other.column:
    185             return True
    186     elif other.line == self.end.line:
    187         # same file last line
    188         if other.column <= self.end.column:
    189             return True
    190     return False
    191 
    192 
    193 SourceRange.__contains__ = SourceRange__contains__
    194 
    195 
    196 ################################################################################
    197 ################################################################################
    198 #####                                                                      #####
    199 #####           C P P   T O K E N I Z E R                                  #####
    200 #####                                                                      #####
    201 ################################################################################
    202 ################################################################################
    203 
    204 
    205 class CppTokenizer(object):
    206     """A tokenizer that converts some input text into a list of tokens.
    207 
    208     It calls libclang's tokenizer to get the parsed tokens. In addition, it
    209     updates the cursor property in each token after parsing, by calling
    210     getTokensWithCursors().
    211     """
    212 
    213     clang_flags = ['-E', '-x', 'c']
    214     options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
    215 
    216     def __init__(self):
    217         """Initialize a new CppTokenizer object."""
    218         self._indexer = clang.cindex.Index.create()
    219         self._tu = None
    220         self._index = 0
    221         self.tokens = None
    222 
    223     def _getTokensWithCursors(self):
    224         """Helper method to return all tokens with their cursors.
    225 
    226         The cursor property in a clang Token doesn't provide enough
    227         information. Because it is queried based on single token each time
    228         without any context, i.e. via calling conf.lib.clang_annotateTokens()
    229         with only one token given. So we often see 'INVALID_FILE' in one
    230         token's cursor. In this function it passes all the available tokens
    231         to get more informative cursors.
    232         """
    233 
    234         tokens_memory = ctypes.POINTER(clang.cindex.Token)()
    235         tokens_count = ctypes.c_uint()
    236 
    237         conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
    238                                 ctypes.byref(tokens_memory),
    239                                 ctypes.byref(tokens_count))
    240 
    241         count = int(tokens_count.value)
    242 
    243         # If we get no tokens, no memory was allocated. Be sure not to return
    244         # anything and potentially call a destructor on nothing.
    245         if count < 1:
    246             return
    247 
    248         cursors = (Cursor * count)()
    249         cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
    250 
    251         conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
    252                                       cursors_memory)
    253 
    254         tokens_array = ctypes.cast(
    255             tokens_memory,
    256             ctypes.POINTER(clang.cindex.Token * count)).contents
    257         token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
    258 
    259         tokens = []
    260         for i in xrange(0, count):
    261             token = Token(self._tu, token_group,
    262                           int_data=tokens_array[i].int_data,
    263                           ptr_data=tokens_array[i].ptr_data,
    264                           cursor=cursors[i])
    265             # We only want non-comment tokens.
    266             if token.kind != TokenKind.COMMENT:
    267                 tokens.append(token)
    268 
    269         return tokens
    270 
    271     def parseString(self, lines):
    272         """Parse a list of text lines into a BlockList object."""
    273         file_ = 'dummy.c'
    274         self._tu = self._indexer.parse(file_, self.clang_flags,
    275                                        unsaved_files=[(file_, lines)],
    276                                        options=self.options)
    277         self.tokens = self._getTokensWithCursors()
    278 
    279     def parseFile(self, file_):
    280         """Parse a file into a BlockList object."""
    281         self._tu = self._indexer.parse(file_, self.clang_flags,
    282                                        options=self.options)
    283         self.tokens = self._getTokensWithCursors()
    284 
    285     def nextToken(self):
    286         """Return next token from the list."""
    287         if self._index < len(self.tokens):
    288             t = self.tokens[self._index]
    289             self._index += 1
    290             return t
    291         else:
    292             return None
    293 
    294 
    295 class CppStringTokenizer(CppTokenizer):
    296     """A CppTokenizer derived class that accepts a string of text as input."""
    297 
    298     def __init__(self, line):
    299         CppTokenizer.__init__(self)
    300         self.parseString(line)
    301 
    302 
    303 class CppFileTokenizer(CppTokenizer):
    304     """A CppTokenizer derived class that accepts a file as input."""
    305 
    306     def __init__(self, file_):
    307         CppTokenizer.__init__(self)
    308         self.parseFile(file_)
    309 
    310 
    311 # Unit testing
    312 #
    313 class CppTokenizerTests(unittest.TestCase):
    314     """CppTokenizer tests."""
    315 
    316     def get_tokens(self, token_string, line_col=False):
    317         tokens = CppStringTokenizer(token_string)
    318         token_list = []
    319         while True:
    320             token = tokens.nextToken()
    321             if not token:
    322                 break
    323             if line_col:
    324                 token_list.append((token.id, token.location.line,
    325                                    token.location.column))
    326             else:
    327                 token_list.append(token.id)
    328         return token_list
    329 
    330     def test_hash(self):
    331         self.assertEqual(self.get_tokens("#an/example  && (01923_xy)"),
    332                          ["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
    333                           "01923_xy", tokRPAREN])
    334 
    335     def test_parens(self):
    336         self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"),
    337                          ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
    338                           "defined", tokLPAREN, "BAZ", tokRPAREN])
    339 
    340     def test_comment(self):
    341         self.assertEqual(self.get_tokens("/*\n#\n*/"), [])
    342 
    343     def test_line_cross(self):
    344         self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"])
    345 
    346     def test_line_cross_line_col(self):
    347         self.assertEqual(self.get_tokens("first second\n  third", True),
    348                          [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)])
    349 
    350     def test_comment_line_col(self):
    351         self.assertEqual(self.get_tokens("boo /* what the\nhell */", True),
    352                          [("boo", 1, 1)])
    353 
    354     def test_escapes(self):
    355         self.assertEqual(self.get_tokens("an \\\n example", True),
    356                          [("an", 1, 1), ("example", 2, 2)])
    357 
    358 
    359 ################################################################################
    360 ################################################################################
    361 #####                                                                      #####
    362 #####           C P P   E X P R E S S I O N S                              #####
    363 #####                                                                      #####
    364 ################################################################################
    365 ################################################################################
    366 
    367 
    368 class CppExpr(object):
    369     """A class that models the condition of #if directives into an expr tree.
    370 
    371     Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
    372     "op" is a string describing the operation
    373     """
    374 
    375     unaries = ["!", "~"]
    376     binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
    377                 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
    378     precedences = {
    379         "?": 1, ":": 1,
    380         "||": 2,
    381         "&&": 3,
    382         "|": 4,
    383         "^": 5,
    384         "&": 6,
    385         "==": 7, "!=": 7,
    386         "<": 8, "<=": 8, ">": 8, ">=": 8,
    387         "<<": 9, ">>": 9,
    388         "+": 10, "-": 10,
    389         "*": 11, "/": 11, "%": 11,
    390         "!": 12, "~": 12
    391     }
    392 
    393     def __init__(self, tokens):
    394         """Initialize a CppExpr. 'tokens' must be a CppToken list."""
    395         self.tokens = tokens
    396         self._num_tokens = len(tokens)
    397         self._index = 0
    398 
    399         if debugCppExpr:
    400             print "CppExpr: trying to parse %s" % repr(tokens)
    401         self.expr = self.parseExpression(0)
    402         if debugCppExpr:
    403             print "CppExpr: got " + repr(self.expr)
    404         if self._index != self._num_tokens:
    405             self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
    406                        % (self._index, self._num_tokens, repr(tokens)))
    407 
    408     def throw(self, exception, msg):
    409         if self._index < self._num_tokens:
    410             tok = self.tokens[self._index]
    411             print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
    412         else:
    413             print "EOF: %s" % msg
    414         raise exception(msg)
    415 
    416     def expectId(self, id):
    417         """Check that a given token id is at the current position."""
    418         token = self.tokens[self._index]
    419         if self._index >= self._num_tokens or token.id != id:
    420             self.throw(BadExpectedToken,
    421                        "### expecting '%s' in expression, got '%s'" % (
    422                            id, token.id))
    423         self._index += 1
    424 
    425     def is_decimal(self):
    426         token = self.tokens[self._index].id
    427         if token[-1] in "ULul":
    428             token = token[:-1]
    429         try:
    430             val = int(token, 10)
    431             self._index += 1
    432             return ('int', val)
    433         except ValueError:
    434             return None
    435 
    436     def is_octal(self):
    437         token = self.tokens[self._index].id
    438         if token[-1] in "ULul":
    439             token = token[:-1]
    440         if len(token) < 2 or token[0] != '0':
    441             return None
    442         try:
    443             val = int(token, 8)
    444             self._index += 1
    445             return ('oct', val)
    446         except ValueError:
    447             return None
    448 
    449     def is_hexadecimal(self):
    450         token = self.tokens[self._index].id
    451         if token[-1] in "ULul":
    452             token = token[:-1]
    453         if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
    454             return None
    455         try:
    456             val = int(token, 16)
    457             self._index += 1
    458             return ('hex', val)
    459         except ValueError:
    460             return None
    461 
    462     def is_integer(self):
    463         if self.tokens[self._index].kind != TokenKind.LITERAL:
    464             return None
    465 
    466         c = self.is_hexadecimal()
    467         if c:
    468             return c
    469 
    470         c = self.is_octal()
    471         if c:
    472             return c
    473 
    474         c = self.is_decimal()
    475         if c:
    476             return c
    477 
    478         return None
    479 
    480     def is_number(self):
    481         t = self.tokens[self._index]
    482         if t.id == tokMINUS and self._index + 1 < self._num_tokens:
    483             self._index += 1
    484             c = self.is_integer()
    485             if c:
    486                 op, val = c
    487                 return (op, -val)
    488         if t.id == tokPLUS and self._index + 1 < self._num_tokens:
    489             self._index += 1
    490             c = self.is_integer()
    491             if c:
    492                 return c
    493 
    494         return self.is_integer()
    495 
    496     def is_defined(self):
    497         t = self.tokens[self._index]
    498         if t.id != tokDEFINED:
    499             return None
    500 
    501         # We have the defined keyword, check the rest.
    502         self._index += 1
    503         used_parens = False
    504         if (self._index < self._num_tokens and
    505             self.tokens[self._index].id == tokLPAREN):
    506             used_parens = True
    507             self._index += 1
    508 
    509         if self._index >= self._num_tokens:
    510             self.throw(BadExpectedToken,
    511                        "### 'defined' must be followed by macro name or left "
    512                        "paren")
    513 
    514         t = self.tokens[self._index]
    515         if t.kind != TokenKind.IDENTIFIER:
    516             self.throw(BadExpectedToken,
    517                        "### 'defined' must be followed by macro name")
    518 
    519         self._index += 1
    520         if used_parens:
    521             self.expectId(tokRPAREN)
    522 
    523         return ("defined", t.id)
    524 
    525     def is_call_or_ident(self):
    526         if self._index >= self._num_tokens:
    527             return None
    528 
    529         t = self.tokens[self._index]
    530         if t.kind != TokenKind.IDENTIFIER:
    531             return None
    532 
    533         name = t.id
    534 
    535         self._index += 1
    536         if (self._index >= self._num_tokens or
    537             self.tokens[self._index].id != tokLPAREN):
    538             return ("ident", name)
    539 
    540         params = []
    541         depth = 1
    542         self._index += 1
    543         j = self._index
    544         while self._index < self._num_tokens:
    545             id = self.tokens[self._index].id
    546             if id == tokLPAREN:
    547                 depth += 1
    548             elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
    549                 k = self._index
    550                 param = self.tokens[j:k]
    551                 params.append(param)
    552                 if id == tokRPAREN:
    553                     break
    554                 j = self._index + 1
    555             elif id == tokRPAREN:
    556                 depth -= 1
    557             self._index += 1
    558 
    559         if self._index >= self._num_tokens:
    560             return None
    561 
    562         self._index += 1
    563         return ("call", (name, params))
    564 
    565     # Implements the "precedence climbing" algorithm from
    566     # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
    567     # The "classic" algorithm would be fine if we were using a tool to
    568     # generate the parser, but we're not. Dijkstra's "shunting yard"
    569     # algorithm hasn't been necessary yet.
    570 
    571     def parseExpression(self, minPrecedence):
    572         if self._index >= self._num_tokens:
    573             return None
    574 
    575         node = self.parsePrimary()
    576         while (self.token() and self.isBinary(self.token()) and
    577                self.precedence(self.token()) >= minPrecedence):
    578             op = self.token()
    579             self.nextToken()
    580             rhs = self.parseExpression(self.precedence(op) + 1)
    581             node = (op.id, node, rhs)
    582 
    583         return node
    584 
    585     def parsePrimary(self):
    586         op = self.token()
    587         if self.isUnary(op):
    588             self.nextToken()
    589             return (op.id, self.parseExpression(self.precedence(op)))
    590 
    591         primary = None
    592         if op.id == tokLPAREN:
    593             self.nextToken()
    594             primary = self.parseExpression(0)
    595             self.expectId(tokRPAREN)
    596         elif op.id == "?":
    597             self.nextToken()
    598             primary = self.parseExpression(0)
    599             self.expectId(":")
    600         elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
    601             primary = self.is_number()
    602         # Checking for 'defined' needs to come first now because 'defined' is
    603         # recognized as IDENTIFIER.
    604         elif op.id == tokDEFINED:
    605             primary = self.is_defined()
    606         elif op.kind == TokenKind.IDENTIFIER:
    607             primary = self.is_call_or_ident()
    608         else:
    609             self.throw(BadExpectedToken,
    610                        "didn't expect to see a %s in factor" % (
    611                            self.tokens[self._index].id))
    612         return primary
    613 
    614     def isBinary(self, token):
    615         return token.id in self.binaries
    616 
    617     def isUnary(self, token):
    618         return token.id in self.unaries
    619 
    620     def precedence(self, token):
    621         return self.precedences.get(token.id)
    622 
    623     def token(self):
    624         if self._index >= self._num_tokens:
    625             return None
    626         return self.tokens[self._index]
    627 
    628     def nextToken(self):
    629         self._index += 1
    630         if self._index >= self._num_tokens:
    631             return None
    632         return self.tokens[self._index]
    633 
    634     def dump_node(self, e):
    635         op = e[0]
    636         line = "(" + op
    637         if op == "int":
    638             line += " %d)" % e[1]
    639         elif op == "oct":
    640             line += " 0%o)" % e[1]
    641         elif op == "hex":
    642             line += " 0x%x)" % e[1]
    643         elif op == "ident":
    644             line += " %s)" % e[1]
    645         elif op == "defined":
    646             line += " %s)" % e[1]
    647         elif op == "call":
    648             arg = e[1]
    649             line += " %s [" % arg[0]
    650             prefix = ""
    651             for param in arg[1]:
    652                 par = ""
    653                 for tok in param:
    654                     par += str(tok)
    655                 line += "%s%s" % (prefix, par)
    656                 prefix = ","
    657             line += "])"
    658         elif op in CppExpr.unaries:
    659             line += " %s)" % self.dump_node(e[1])
    660         elif op in CppExpr.binaries:
    661             line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
    662         else:
    663             line += " ?%s)" % repr(e[1])
    664 
    665         return line
    666 
    667     def __repr__(self):
    668         return self.dump_node(self.expr)
    669 
    670     def source_node(self, e):
    671         op = e[0]
    672         if op == "int":
    673             return "%d" % e[1]
    674         if op == "hex":
    675             return "0x%x" % e[1]
    676         if op == "oct":
    677             return "0%o" % e[1]
    678         if op == "ident":
    679             # XXX: should try to expand
    680             return e[1]
    681         if op == "defined":
    682             return "defined(%s)" % e[1]
    683 
    684         prec = CppExpr.precedences.get(op, 1000)
    685         arg = e[1]
    686         if op in CppExpr.unaries:
    687             arg_src = self.source_node(arg)
    688             arg_op = arg[0]
    689             arg_prec = CppExpr.precedences.get(arg_op, 1000)
    690             if arg_prec < prec:
    691                 return "!(" + arg_src + ")"
    692             else:
    693                 return "!" + arg_src
    694         if op in CppExpr.binaries:
    695             arg2 = e[2]
    696             arg1_op = arg[0]
    697             arg2_op = arg2[0]
    698             arg1_src = self.source_node(arg)
    699             arg2_src = self.source_node(arg2)
    700             if CppExpr.precedences.get(arg1_op, 1000) < prec:
    701                 arg1_src = "(%s)" % arg1_src
    702             if CppExpr.precedences.get(arg2_op, 1000) < prec:
    703                 arg2_src = "(%s)" % arg2_src
    704 
    705             return "%s %s %s" % (arg1_src, op, arg2_src)
    706         return "???"
    707 
    708     def __str__(self):
    709         return self.source_node(self.expr)
    710 
    711     @staticmethod
    712     def int_node(e):
    713         if e[0] in ["int", "oct", "hex"]:
    714             return e[1]
    715         else:
    716             return None
    717 
    718     def toInt(self):
    719         return self.int_node(self.expr)
    720 
    721     def optimize_node(self, e, macros=None):
    722         if macros is None:
    723             macros = {}
    724         op = e[0]
    725 
    726         if op == "defined":
    727             op, name = e
    728             if macros.has_key(name):
    729                 if macros[name] == kCppUndefinedMacro:
    730                     return ("int", 0)
    731                 else:
    732                     try:
    733                         value = int(macros[name])
    734                         return ("int", value)
    735                     except ValueError:
    736                         return ("defined", macros[name])
    737 
    738             if kernel_remove_config_macros and name.startswith("CONFIG_"):
    739                 return ("int", 0)
    740 
    741             return e
    742 
    743         elif op == "ident":
    744             op, name = e
    745             if macros.has_key(name):
    746                 try:
    747                     value = int(macros[name])
    748                     expanded = ("int", value)
    749                 except ValueError:
    750                     expanded = ("ident", macros[name])
    751                 return self.optimize_node(expanded, macros)
    752             return e
    753 
    754         elif op == "!":
    755             op, v = e
    756             v = self.optimize_node(v, macros)
    757             if v[0] == "int":
    758                 if v[1] == 0:
    759                     return ("int", 1)
    760                 else:
    761                     return ("int", 0)
    762             return ('!', v)
    763 
    764         elif op == "&&":
    765             op, l, r = e
    766             l = self.optimize_node(l, macros)
    767             r = self.optimize_node(r, macros)
    768             li = self.int_node(l)
    769             ri = self.int_node(r)
    770             if li is not None:
    771                 if li == 0:
    772                     return ("int", 0)
    773                 else:
    774                     return r
    775             elif ri is not None:
    776                 if ri == 0:
    777                     return ("int", 0)
    778                 else:
    779                     return l
    780             return (op, l, r)
    781 
    782         elif op == "||":
    783             op, l, r = e
    784             l = self.optimize_node(l, macros)
    785             r = self.optimize_node(r, macros)
    786             li = self.int_node(l)
    787             ri = self.int_node(r)
    788             if li is not None:
    789                 if li == 0:
    790                     return r
    791                 else:
    792                     return ("int", 1)
    793             elif ri is not None:
    794                 if ri == 0:
    795                     return l
    796                 else:
    797                     return ("int", 1)
    798             return (op, l, r)
    799 
    800         else:
    801             return e
    802 
    803     def optimize(self, macros=None):
    804         if macros is None:
    805             macros = {}
    806         self.expr = self.optimize_node(self.expr, macros)
    807 
    808 class CppExprTest(unittest.TestCase):
    809     """CppExpr unit tests."""
    810 
    811     def get_expr(self, expr):
    812         return repr(CppExpr(CppStringTokenizer(expr).tokens))
    813 
    814     def test_cpp_expr(self):
    815         self.assertEqual(self.get_expr("0"), "(int 0)")
    816         self.assertEqual(self.get_expr("1"), "(int 1)")
    817         self.assertEqual(self.get_expr("-5"), "(int -5)")
    818         self.assertEqual(self.get_expr("+1"), "(int 1)")
    819         self.assertEqual(self.get_expr("0U"), "(int 0)")
    820         self.assertEqual(self.get_expr("015"), "(oct 015)")
    821         self.assertEqual(self.get_expr("015l"), "(oct 015)")
    822         self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)")
    823         self.assertEqual(self.get_expr("(0)"), "(int 0)")
    824         self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))")
    825         self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))")
    826         self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)")
    827         self.assertEqual(self.get_expr("EXAMPLE - 3"),
    828                          "(- (ident EXAMPLE) (int 3))")
    829         self.assertEqual(self.get_expr("defined(EXAMPLE)"),
    830                          "(defined EXAMPLE)")
    831         self.assertEqual(self.get_expr("defined ( EXAMPLE ) "),
    832                          "(defined EXAMPLE)")
    833         self.assertEqual(self.get_expr("!defined(EXAMPLE)"),
    834                          "(! (defined EXAMPLE))")
    835         self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"),
    836                          "(|| (defined ABC) (defined BINGO))")
    837         self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])")
    838         self.assertEqual(self.get_expr("A == 1 || defined(B)"),
    839                          "(|| (== (ident A) (int 1)) (defined B))")
    840 
    841     def get_expr_optimize(self, expr, macros=None):
    842         if macros is None:
    843             macros = {}
    844         e = CppExpr(CppStringTokenizer(expr).tokens)
    845         e.optimize(macros)
    846         return repr(e)
    847 
    848     def test_cpp_expr_optimize(self):
    849         self.assertEqual(self.get_expr_optimize("0"), "(int 0)")
    850         self.assertEqual(self.get_expr_optimize("1"), "(int 1)")
    851         self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)")
    852         self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)")
    853         self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)")
    854         self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)")
    855         self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)")
    856         self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)")
    857         self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)")
    858         self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)")
    859         self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)")
    860         self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)")
    861         self.assertEqual(self.get_expr_optimize("A"), "(ident A)")
    862         self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)")
    863         self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)")
    864         self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)")
    865         self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)")
    866         self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)")
    867         self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))")
    868         self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)")
    869         self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))")
    870         self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)")
    871         self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
    872                                                 {"EXAMPLE": "XOWOE"}),
    873                          "(defined XOWOE)")
    874         self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)",
    875                                                 {"EXAMPLE": kCppUndefinedMacro}),
    876                          "(int 0)")
    877         self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))")
    878         self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
    879                                                 {"EXAMPLE": "XOWOE"}),
    880                          "(! (defined XOWOE))")
    881         self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)",
    882                                                 {"EXAMPLE": kCppUndefinedMacro}),
    883                          "(int 1)")
    884         self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"),
    885                         "(|| (defined A) (defined B))")
    886         self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
    887                                                 {"A": "1"}),
    888                          "(int 1)")
    889         self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
    890                                                 {"B": "1"}),
    891                          "(int 1)")
    892         self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
    893                                                 {"B": kCppUndefinedMacro}),
    894                          "(defined A)")
    895         self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)",
    896                                                 {"A": kCppUndefinedMacro,
    897                                                  "B": kCppUndefinedMacro}),
    898                          "(int 0)")
    899         self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"),
    900                          "(&& (defined A) (defined B))")
    901         self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
    902                                                 {"A": "1"}),
    903                          "(defined B)")
    904         self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
    905                                                 {"B": "1"}),
    906                          "(defined A)")
    907         self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
    908                                                 {"B": kCppUndefinedMacro}),
    909                         "(int 0)")
    910         self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)",
    911                                                 {"A": kCppUndefinedMacro}),
    912                         "(int 0)")
    913         self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"),
    914                          "(|| (== (ident A) (int 1)) (defined B))")
    915         self.assertEqual(self.get_expr_optimize(
    916               "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
    917               {"__KERNEL__": kCppUndefinedMacro}),
    918               "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))")
    919 
    920     def get_expr_string(self, expr):
    921         return str(CppExpr(CppStringTokenizer(expr).tokens))
    922 
    923     def test_cpp_expr_string(self):
    924         self.assertEqual(self.get_expr_string("0"), "0")
    925         self.assertEqual(self.get_expr_string("1"), "1")
    926         self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1")
    927         self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0")
    928         self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1")
    929         self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0")
    930         self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1")
    931         self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0")
    932         self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1")
    933         self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0")
    934         self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE")
    935         self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3")
    936         self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)")
    937         self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)")
    938         self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)")
    939 
    940 
    941 ################################################################################
    942 ################################################################################
    943 #####                                                                      #####
    944 #####          C P P   B L O C K                                           #####
    945 #####                                                                      #####
    946 ################################################################################
    947 ################################################################################
    948 
    949 
    950 class Block(object):
    951     """A class used to model a block of input source text.
    952 
    953     There are two block types:
    954       - directive blocks: contain the tokens of a single pre-processor
    955         directive (e.g. #if)
    956       - text blocks, contain the tokens of non-directive blocks
    957 
    958     The cpp parser class below will transform an input source file into a list
    959     of Block objects (grouped in a BlockList object for convenience)
    960     """
    961 
    962     def __init__(self, tokens, directive=None, lineno=0, identifier=None):
    963         """Initialize a new block, if 'directive' is None, it is a text block.
    964 
    965         NOTE: This automatically converts '#ifdef MACRO' into
    966         '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
    967         """
    968 
    969         if directive == "ifdef":
    970             tok = Token()
    971             tok.id = tokDEFINED
    972             tokens = [tok] + tokens
    973             directive = "if"
    974 
    975         elif directive == "ifndef":
    976             tok1 = Token()
    977             tok2 = Token()
    978             tok1.id = tokNOT
    979             tok2.id = tokDEFINED
    980             tokens = [tok1, tok2] + tokens
    981             directive = "if"
    982 
    983         self.tokens = tokens
    984         self.directive = directive
    985         self.define_id = identifier
    986         if lineno > 0:
    987             self.lineno = lineno
    988         else:
    989             self.lineno = self.tokens[0].location.line
    990 
    991         if self.isIf():
    992             self.expr = CppExpr(self.tokens)
    993 
    994     def isDirective(self):
    995         """Return True iff this is a directive block."""
    996         return self.directive is not None
    997 
    998     def isConditional(self):
    999         """Return True iff this is a conditional directive block."""
   1000         return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
   1001                                   "endif"]
   1002 
   1003     def isDefine(self):
   1004         """Return the macro name in a #define directive, or None otherwise."""
   1005         if self.directive != "define":
   1006             return None
   1007         return self.define_id
   1008 
   1009     def isIf(self):
   1010         """Return True iff this is an #if-like directive block."""
   1011         return self.directive in ["if", "ifdef", "ifndef", "elif"]
   1012 
   1013     def isEndif(self):
   1014         """Return True iff this is an #endif directive block."""
   1015         return self.directive == "endif"
   1016 
   1017     def isInclude(self):
   1018         """Check whether this is a #include directive.
   1019 
   1020         If true, returns the corresponding file name (with brackets or
   1021         double-qoutes). None otherwise.
   1022         """
   1023 
   1024         if self.directive != "include":
   1025             return None
   1026         return ''.join([str(x) for x in self.tokens])
   1027 
   1028     @staticmethod
   1029     def format_blocks(tokens, indent=0):
   1030         """Return the formatted lines of strings with proper indentation."""
   1031         newline = True
   1032         result = []
   1033         buf = ''
   1034         i = 0
   1035         while i < len(tokens):
   1036             t = tokens[i]
   1037             if t.id == '{':
   1038                 buf += ' {'
   1039                 result.append(strip_space(buf))
   1040                 # Do not indent if this is extern "C" {
   1041                 if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"':
   1042                     indent += 2
   1043                 buf = ''
   1044                 newline = True
   1045             elif t.id == '}':
   1046                 if indent >= 2:
   1047                     indent -= 2
   1048                 if not newline:
   1049                     result.append(strip_space(buf))
   1050                 # Look ahead to determine if it's the end of line.
   1051                 if (i + 1 < len(tokens) and
   1052                     (tokens[i+1].id == ';' or
   1053                      tokens[i+1].id in ['else', '__attribute__',
   1054                                         '__attribute', '__packed'] or
   1055                      tokens[i+1].kind == TokenKind.IDENTIFIER)):
   1056                     buf = ' ' * indent + '}'
   1057                     newline = False
   1058                 else:
   1059                     result.append(' ' * indent + '}')
   1060                     buf = ''
   1061                     newline = True
   1062             elif t.id == ';':
   1063                 result.append(strip_space(buf) + ';')
   1064                 buf = ''
   1065                 newline = True
   1066             # We prefer a new line for each constant in enum.
   1067             elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
   1068                 result.append(strip_space(buf) + ',')
   1069                 buf = ''
   1070                 newline = True
   1071             else:
   1072                 if newline:
   1073                     buf += ' ' * indent + str(t)
   1074                 else:
   1075                     buf += ' ' + str(t)
   1076                 newline = False
   1077             i += 1
   1078 
   1079         if buf:
   1080             result.append(strip_space(buf))
   1081 
   1082         return result, indent
   1083 
   1084     def write(self, out, indent):
   1085         """Dump the current block."""
   1086         # removeWhiteSpace() will sometimes creates non-directive blocks
   1087         # without any tokens. These come from blocks that only contained
   1088         # empty lines and spaces. They should not be printed in the final
   1089         # output, and then should not be counted for this operation.
   1090         #
   1091         if self.directive is None and not self.tokens:
   1092             return indent
   1093 
   1094         if self.directive:
   1095             out.write(str(self) + '\n')
   1096         else:
   1097             lines, indent = self.format_blocks(self.tokens, indent)
   1098             for line in lines:
   1099                 out.write(line + '\n')
   1100 
   1101         return indent
   1102 
   1103     def __repr__(self):
   1104         """Generate the representation of a given block."""
   1105         if self.directive:
   1106             result = "#%s " % self.directive
   1107             if self.isIf():
   1108                 result += repr(self.expr)
   1109             else:
   1110                 for tok in self.tokens:
   1111                     result += repr(tok)
   1112         else:
   1113             result = ""
   1114             for tok in self.tokens:
   1115                 result += repr(tok)
   1116 
   1117         return result
   1118 
   1119     def __str__(self):
   1120         """Generate the string representation of a given block."""
   1121         if self.directive:
   1122             # "#if"
   1123             if self.directive == "if":
   1124                 # small optimization to re-generate #ifdef and #ifndef
   1125                 e = self.expr.expr
   1126                 op = e[0]
   1127                 if op == "defined":
   1128                     result = "#ifdef %s" % e[1]
   1129                 elif op == "!" and e[1][0] == "defined":
   1130                     result = "#ifndef %s" % e[1][1]
   1131                 else:
   1132                     result = "#if " + str(self.expr)
   1133 
   1134             # "#define"
   1135             elif self.isDefine():
   1136                 result = "#%s %s" % (self.directive, self.define_id)
   1137                 if self.tokens:
   1138                     result += " "
   1139                 expr = strip_space(' '.join([tok.id for tok in self.tokens]))
   1140                 # remove the space between name and '(' in function call
   1141                 result += re.sub(r'(\w+) \(', r'\1(', expr)
   1142 
   1143             # "#error"
   1144             # Concatenating tokens with a space separator, because they may
   1145             # not be quoted and broken into several tokens
   1146             elif self.directive == "error":
   1147                 result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
   1148 
   1149             else:
   1150                 result = "#%s" % self.directive
   1151                 if self.tokens:
   1152                     result += " "
   1153                 result += ''.join([tok.id for tok in self.tokens])
   1154         else:
   1155             lines, _ = self.format_blocks(self.tokens)
   1156             result = '\n'.join(lines)
   1157 
   1158         return result
   1159 
   1160 
   1161 class BlockList(object):
   1162     """A convenience class used to hold and process a list of blocks.
   1163 
   1164     It calls the cpp parser to get the blocks.
   1165     """
   1166 
   1167     def __init__(self, blocks):
   1168         self.blocks = blocks
   1169 
   1170     def __len__(self):
   1171         return len(self.blocks)
   1172 
   1173     def __getitem__(self, n):
   1174         return self.blocks[n]
   1175 
   1176     def __repr__(self):
   1177         return repr(self.blocks)
   1178 
   1179     def __str__(self):
   1180         result = '\n'.join([str(b) for b in self.blocks])
   1181         return result
   1182 
   1183     def dump(self):
   1184         """Dump all the blocks in current BlockList."""
   1185         print '##### BEGIN #####'
   1186         for i, b in enumerate(self.blocks):
   1187             print '### BLOCK %d ###' % i
   1188             print b
   1189         print '##### END #####'
   1190 
   1191     def optimizeIf01(self):
   1192         """Remove the code between #if 0 .. #endif in a BlockList."""
   1193         self.blocks = optimize_if01(self.blocks)
   1194 
   1195     def optimizeMacros(self, macros):
   1196         """Remove known defined and undefined macros from a BlockList."""
   1197         for b in self.blocks:
   1198             if b.isIf():
   1199                 b.expr.optimize(macros)
   1200 
   1201     def optimizeAll(self, macros):
   1202         self.optimizeMacros(macros)
   1203         self.optimizeIf01()
   1204         return
   1205 
   1206     def findIncludes(self):
   1207         """Return the list of included files in a BlockList."""
   1208         result = []
   1209         for b in self.blocks:
   1210             i = b.isInclude()
   1211             if i:
   1212                 result.append(i)
   1213         return result
   1214 
   1215     def write(self, out):
   1216         indent = 0
   1217         for b in self.blocks:
   1218             indent = b.write(out, indent)
   1219 
   1220     def removeVarsAndFuncs(self, keep):
   1221         """Remove variable and function declarations.
   1222 
   1223         All extern and static declarations corresponding to variable and
   1224         function declarations are removed. We only accept typedefs and
   1225         enum/structs/union declarations.
   1226 
   1227         In addition, remove any macros expanding in the headers. Usually,
   1228         these macros are static inline functions, which is why they are
   1229         removed.
   1230 
   1231         However, we keep the definitions corresponding to the set of known
   1232         static inline functions in the set 'keep', which is useful
   1233         for optimized byteorder swap functions and stuff like that.
   1234         """
   1235 
   1236         # state = NORMAL => normal (i.e. LN + spaces)
   1237         # state = OTHER_DECL => typedef/struct encountered, ends with ";"
   1238         # state = VAR_DECL => var declaration encountered, ends with ";"
   1239         # state = FUNC_DECL => func declaration encountered, ends with "}"
   1240         NORMAL = 0
   1241         OTHER_DECL = 1
   1242         VAR_DECL = 2
   1243         FUNC_DECL = 3
   1244 
   1245         state = NORMAL
   1246         depth = 0
   1247         blocksToKeep = []
   1248         blocksInProgress = []
   1249         blocksOfDirectives = []
   1250         ident = ""
   1251         state_token = ""
   1252         macros = set()
   1253         for block in self.blocks:
   1254             if block.isDirective():
   1255                 # Record all macros.
   1256                 if block.directive == 'define':
   1257                     macro_name = block.define_id
   1258                     paren_index = macro_name.find('(')
   1259                     if paren_index == -1:
   1260                         macros.add(macro_name)
   1261                     else:
   1262                         macros.add(macro_name[0:paren_index])
   1263                 blocksInProgress.append(block)
   1264                 # If this is in a function/variable declaration, we might need
   1265                 # to emit the directives alone, so save them separately.
   1266                 blocksOfDirectives.append(block)
   1267                 continue
   1268 
   1269             numTokens = len(block.tokens)
   1270             lastTerminatorIndex = 0
   1271             i = 0
   1272             while i < numTokens:
   1273                 token_id = block.tokens[i].id
   1274                 terminator = False
   1275                 if token_id == '{':
   1276                     depth += 1
   1277                     if (i >= 2 and block.tokens[i-2].id == 'extern' and
   1278                         block.tokens[i-1].id == '"C"'):
   1279                         # For an extern "C" { pretend as though this is depth 0.
   1280                         depth -= 1
   1281                 elif token_id == '}':
   1282                     if depth > 0:
   1283                         depth -= 1
   1284                     if depth == 0:
   1285                         if state == OTHER_DECL:
   1286                             # Loop through until we hit the ';'
   1287                             i += 1
   1288                             while i < numTokens:
   1289                                 if block.tokens[i].id == ';':
   1290                                     token_id = ';'
   1291                                     break
   1292                                 i += 1
   1293                             # If we didn't hit the ';', just consider this the
   1294                             # terminator any way.
   1295                         terminator = True
   1296                 elif depth == 0:
   1297                     if token_id == ';':
   1298                         if state == NORMAL:
   1299                             blocksToKeep.extend(blocksInProgress)
   1300                             blocksInProgress = []
   1301                             blocksOfDirectives = []
   1302                             state = FUNC_DECL
   1303                         terminator = True
   1304                     elif (state == NORMAL and token_id == '(' and i >= 1 and
   1305                           block.tokens[i-1].kind == TokenKind.IDENTIFIER and
   1306                           block.tokens[i-1].id in macros):
   1307                         # This is a plain macro being expanded in the header
   1308                         # which needs to be removed.
   1309                         blocksToKeep.extend(blocksInProgress)
   1310                         if lastTerminatorIndex < i - 1:
   1311                             blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1]))
   1312                         blocksInProgress = []
   1313                         blocksOfDirectives = []
   1314 
   1315                         # Skip until we see the terminating ')'
   1316                         i += 1
   1317                         paren_depth = 1
   1318                         while i < numTokens:
   1319                             if block.tokens[i].id == ')':
   1320                                 paren_depth -= 1
   1321                                 if paren_depth == 0:
   1322                                     break
   1323                             elif block.tokens[i].id == '(':
   1324                                 paren_depth += 1
   1325                             i += 1
   1326                         lastTerminatorIndex = i + 1
   1327                     elif (state != FUNC_DECL and token_id == '(' and
   1328                           state_token != 'typedef'):
   1329                         blocksToKeep.extend(blocksInProgress)
   1330                         blocksInProgress = []
   1331                         blocksOfDirectives = []
   1332                         state = VAR_DECL
   1333                     elif state == NORMAL and token_id in ['struct', 'typedef',
   1334                                                           'enum', 'union',
   1335                                                           '__extension__']:
   1336                         state = OTHER_DECL
   1337                         state_token = token_id
   1338                     elif block.tokens[i].kind == TokenKind.IDENTIFIER:
   1339                         if state != VAR_DECL or ident == "":
   1340                             ident = token_id
   1341 
   1342                 if terminator:
   1343                     if state != VAR_DECL and state != FUNC_DECL or ident in keep:
   1344                         blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1]))
   1345                         blocksToKeep.extend(blocksInProgress)
   1346                     else:
   1347                         # Only keep the directives found.
   1348                         blocksToKeep.extend(blocksOfDirectives)
   1349                     lastTerminatorIndex = i + 1
   1350                     blocksInProgress = []
   1351                     blocksOfDirectives = []
   1352                     state = NORMAL
   1353                     ident = ""
   1354                     state_token = ""
   1355                 i += 1
   1356             if lastTerminatorIndex < numTokens:
   1357                 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens]))
   1358         if len(blocksInProgress) > 0:
   1359             blocksToKeep.extend(blocksInProgress)
   1360         self.blocks = blocksToKeep
   1361 
   1362     def replaceTokens(self, replacements):
   1363         """Replace tokens according to the given dict."""
   1364         extra_includes = []
   1365         for b in self.blocks:
   1366             made_change = False
   1367             if b.isInclude() is None:
   1368                 i = 0
   1369                 while i < len(b.tokens):
   1370                     tok = b.tokens[i]
   1371                     if (tok.kind == TokenKind.KEYWORD and tok.id == 'struct'
   1372                         and (i + 2) < len(b.tokens) and b.tokens[i + 2].id == '{'):
   1373                         struct_name = b.tokens[i + 1].id
   1374                         if struct_name in kernel_struct_replacements:
   1375                             extra_includes.append("<bits/%s.h>" % struct_name)
   1376                             end = i + 2
   1377                             while end < len(b.tokens) and b.tokens[end].id != '}':
   1378                                 end += 1
   1379                             end += 1 # Swallow '}'
   1380                             while end < len(b.tokens) and b.tokens[end].id != ';':
   1381                                 end += 1
   1382                             end += 1 # Swallow ';'
   1383                             # Remove these tokens. We'll replace them later with a #include block.
   1384                             b.tokens[i:end] = []
   1385                             made_change = True
   1386                             # We've just modified b.tokens, so revisit the current offset.
   1387                             continue
   1388                     if tok.kind == TokenKind.IDENTIFIER:
   1389                         if tok.id in replacements:
   1390                             tok.id = replacements[tok.id]
   1391                             made_change = True
   1392                     i += 1
   1393 
   1394                 if b.isDefine() and b.define_id in replacements:
   1395                     b.define_id = replacements[b.define_id]
   1396                     made_change = True
   1397 
   1398             if made_change and b.isIf():
   1399                 # Keep 'expr' in sync with 'tokens'.
   1400                 b.expr = CppExpr(b.tokens)
   1401 
   1402         for extra_include in extra_includes:
   1403             replacement = CppStringTokenizer(extra_include)
   1404             self.blocks.insert(2, Block(replacement.tokens, directive='include'))
   1405 
   1406 
   1407 
   1408 def strip_space(s):
   1409     """Strip out redundant space in a given string."""
   1410 
   1411     # NOTE: It ought to be more clever to not destroy spaces in string tokens.
   1412     replacements = {' . ': '.',
   1413                     ' [': '[',
   1414                     '[ ': '[',
   1415                     ' ]': ']',
   1416                     '( ': '(',
   1417                     ' )': ')',
   1418                     ' ,': ',',
   1419                     '# ': '#',
   1420                     ' ;': ';',
   1421                     '~ ': '~',
   1422                     ' -> ': '->'}
   1423     result = s
   1424     for r in replacements:
   1425         result = result.replace(r, replacements[r])
   1426 
   1427     # Remove the space between function name and the parenthesis.
   1428     result = re.sub(r'(\w+) \(', r'\1(', result)
   1429     return result
   1430 
   1431 
   1432 class BlockParser(object):
   1433     """A class that converts an input source file into a BlockList object."""
   1434 
   1435     def __init__(self, tokzer=None):
   1436         """Initialize a block parser.
   1437 
   1438         The input source is provided through a Tokenizer object.
   1439         """
   1440         self._tokzer = tokzer
   1441         self._parsed = False
   1442 
   1443     @property
   1444     def parsed(self):
   1445         return self._parsed
   1446 
   1447     @staticmethod
   1448     def _short_extent(extent):
   1449         return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
   1450                                   extent.end.line, extent.end.column)
   1451 
   1452     def getBlocks(self, tokzer=None):
   1453         """Return all the blocks parsed."""
   1454 
   1455         def consume_extent(i, tokens, extent=None, detect_change=False):
   1456             """Return tokens that belong to the given extent.
   1457 
   1458             It parses all the tokens that follow tokens[i], until getting out
   1459             of the extent. When detect_change is True, it may terminate early
   1460             when detecting preprocessing directives inside the extent.
   1461             """
   1462 
   1463             result = []
   1464             if extent is None:
   1465                 extent = tokens[i].cursor.extent
   1466 
   1467             while i < len(tokens) and tokens[i].location in extent:
   1468                 t = tokens[i]
   1469                 if debugBlockParser:
   1470                     print ' ' * 2, t.id, t.kind, t.cursor.kind
   1471                 if (detect_change and t.cursor.extent != extent and
   1472                     t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
   1473                     break
   1474                 result.append(t)
   1475                 i += 1
   1476             return (i, result)
   1477 
   1478         def consume_line(i, tokens):
   1479             """Return tokens that follow tokens[i] in the same line."""
   1480             result = []
   1481             line = tokens[i].location.line
   1482             while i < len(tokens) and tokens[i].location.line == line:
   1483                 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
   1484                     break
   1485                 result.append(tokens[i])
   1486                 i += 1
   1487             return (i, result)
   1488 
   1489         if tokzer is None:
   1490             tokzer = self._tokzer
   1491         tokens = tokzer.tokens
   1492 
   1493         blocks = []
   1494         buf = []
   1495         i = 0
   1496 
   1497         while i < len(tokens):
   1498             t = tokens[i]
   1499             cursor = t.cursor
   1500 
   1501             if debugBlockParser:
   1502                 print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
   1503                        "extent=[%s]" % (t.location.line, t.spelling, t.kind,
   1504                                         cursor.kind,
   1505                                         self._short_extent(cursor.extent)))
   1506 
   1507             if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
   1508                 if buf:
   1509                     blocks.append(Block(buf))
   1510                     buf = []
   1511 
   1512                 j = i
   1513                 if j + 1 >= len(tokens):
   1514                     raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
   1515                 directive = tokens[j+1].id
   1516 
   1517                 if directive == 'define':
   1518                     if i+2 >= len(tokens):
   1519                         raise BadExpectedToken("### BAD TOKEN at %s" %
   1520                                                (tokens[i].location))
   1521 
   1522                     # Skip '#' and 'define'.
   1523                     extent = tokens[i].cursor.extent
   1524                     i += 2
   1525                     id = ''
   1526                     # We need to separate the id from the remaining of
   1527                     # the line, especially for the function-like macro.
   1528                     if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
   1529                         (tokens[i].location.column + len(tokens[i].spelling) ==
   1530                          tokens[i+1].location.column)):
   1531                         while i < len(tokens):
   1532                             id += tokens[i].id
   1533                             if tokens[i].spelling == ')':
   1534                                 i += 1
   1535                                 break
   1536                             i += 1
   1537                     else:
   1538                         id += tokens[i].id
   1539                         # Advance to the next token that follows the macro id
   1540                         i += 1
   1541 
   1542                     (i, ret) = consume_extent(i, tokens, extent=extent)
   1543                     blocks.append(Block(ret, directive=directive,
   1544                                         lineno=t.location.line, identifier=id))
   1545 
   1546                 else:
   1547                     (i, ret) = consume_extent(i, tokens)
   1548                     blocks.append(Block(ret[2:], directive=directive,
   1549                                         lineno=t.location.line))
   1550 
   1551             elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
   1552                 if buf:
   1553                     blocks.append(Block(buf))
   1554                     buf = []
   1555                 directive = tokens[i+1].id
   1556                 (i, ret) = consume_extent(i, tokens)
   1557 
   1558                 blocks.append(Block(ret[2:], directive=directive,
   1559                                     lineno=t.location.line))
   1560 
   1561             elif cursor.kind == CursorKind.VAR_DECL:
   1562                 if buf:
   1563                     blocks.append(Block(buf))
   1564                     buf = []
   1565 
   1566                 (i, ret) = consume_extent(i, tokens, detect_change=True)
   1567                 buf += ret
   1568 
   1569             elif cursor.kind == CursorKind.FUNCTION_DECL:
   1570                 if buf:
   1571                     blocks.append(Block(buf))
   1572                     buf = []
   1573 
   1574                 (i, ret) = consume_extent(i, tokens, detect_change=True)
   1575                 buf += ret
   1576 
   1577             else:
   1578                 (i, ret) = consume_line(i, tokens)
   1579                 buf += ret
   1580 
   1581         if buf:
   1582             blocks.append(Block(buf))
   1583 
   1584         # _parsed=True indicates a successful parsing, although may result an
   1585         # empty BlockList.
   1586         self._parsed = True
   1587 
   1588         return BlockList(blocks)
   1589 
   1590     def parse(self, tokzer):
   1591         return self.getBlocks(tokzer)
   1592 
   1593     def parseFile(self, path):
   1594         return self.getBlocks(CppFileTokenizer(path))
   1595 
   1596 
   1597 class BlockParserTests(unittest.TestCase):
   1598     """BlockParser unit tests."""
   1599 
   1600     def get_blocks(self, lines):
   1601         blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
   1602         return map(lambda a: str(a), blocks)
   1603 
   1604     def test_hash(self):
   1605         self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"])
   1606 
   1607     def test_empty_line(self):
   1608         self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"])
   1609 
   1610     def test_hash_with_space(self):
   1611         # We currently cannot handle the following case with libclang properly.
   1612         # Fortunately it doesn't appear in current headers.
   1613         #self.assertEqual(self.get_blocks(["foo", "  #  ", "bar"]), ["foo", "bar"])
   1614         pass
   1615 
   1616     def test_with_comment(self):
   1617         self.assertEqual(self.get_blocks(["foo",
   1618                                           "  #  /* ahah */ if defined(__KERNEL__) /* more */",
   1619                                           "bar", "#endif"]),
   1620                          ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
   1621 
   1622 
   1623 ################################################################################
   1624 ################################################################################
   1625 #####                                                                      #####
   1626 #####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
   1627 #####                                                                      #####
   1628 ################################################################################
   1629 ################################################################################
   1630 
   1631 
   1632 def find_matching_endif(blocks, i):
   1633     """Traverse the blocks to find out the matching #endif."""
   1634     n = len(blocks)
   1635     depth = 1
   1636     while i < n:
   1637         if blocks[i].isDirective():
   1638             dir_ = blocks[i].directive
   1639             if dir_ in ["if", "ifndef", "ifdef"]:
   1640                 depth += 1
   1641             elif depth == 1 and dir_ in ["else", "elif"]:
   1642                 return i
   1643             elif dir_ == "endif":
   1644                 depth -= 1
   1645                 if depth == 0:
   1646                     return i
   1647         i += 1
   1648     return i
   1649 
   1650 
   1651 def optimize_if01(blocks):
   1652     """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
   1653     i = 0
   1654     n = len(blocks)
   1655     result = []
   1656     while i < n:
   1657         j = i
   1658         while j < n and not blocks[j].isIf():
   1659             j += 1
   1660         if j > i:
   1661             logging.debug("appending lines %d to %d", blocks[i].lineno,
   1662                           blocks[j-1].lineno)
   1663             result += blocks[i:j]
   1664         if j >= n:
   1665             break
   1666         expr = blocks[j].expr
   1667         r = expr.toInt()
   1668         if r is None:
   1669             result.append(blocks[j])
   1670             i = j + 1
   1671             continue
   1672 
   1673         if r == 0:
   1674             # if 0 => skip everything until the corresponding #endif
   1675             start_dir = blocks[j].directive
   1676             j = find_matching_endif(blocks, j + 1)
   1677             if j >= n:
   1678                 # unterminated #if 0, finish here
   1679                 break
   1680             dir_ = blocks[j].directive
   1681             if dir_ == "endif":
   1682                 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
   1683                               blocks[i].lineno, blocks[j].lineno)
   1684                 if start_dir == "elif":
   1685                     # Put an endif since we started with an elif.
   1686                     result += blocks[j:j+1]
   1687                 i = j + 1
   1688             elif dir_ == "else":
   1689                 # convert 'else' into 'if 1'
   1690                 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
   1691                               "to %d)", blocks[i].lineno, blocks[j-1].lineno)
   1692                 if start_dir == "elif":
   1693                     blocks[j].directive = "elif"
   1694                 else:
   1695                     blocks[j].directive = "if"
   1696                 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
   1697                 i = j
   1698             elif dir_ == "elif":
   1699                 # convert 'elif' into 'if'
   1700                 logging.debug("convert 'if 0' .. 'elif' into 'if'")
   1701                 if start_dir == "elif":
   1702                     blocks[j].directive = "elif"
   1703                 else:
   1704                     blocks[j].directive = "if"
   1705                 i = j
   1706             continue
   1707 
   1708         # if 1 => find corresponding endif and remove/transform them
   1709         k = find_matching_endif(blocks, j + 1)
   1710         if k >= n:
   1711             # unterminated #if 1, finish here
   1712             logging.debug("unterminated 'if 1'")
   1713             result += blocks[j+1:k]
   1714             break
   1715 
   1716         start_dir = blocks[j].directive
   1717         dir_ = blocks[k].directive
   1718         if dir_ == "endif":
   1719             logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
   1720                           blocks[j].lineno, blocks[k].lineno)
   1721             if start_dir == "elif":
   1722                 # Add the elif in to the results and convert it to an elif 1.
   1723                 blocks[j].tokens = CppStringTokenizer("1").tokens
   1724                 result += blocks[j:j+1]
   1725             result += optimize_if01(blocks[j+1:k])
   1726             if start_dir == "elif":
   1727                 # Add the endif in to the results.
   1728                 result += blocks[k:k+1]
   1729             i = k + 1
   1730         elif dir_ == "else":
   1731             # convert 'else' into 'if 0'
   1732             logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
   1733                           blocks[j].lineno, blocks[k].lineno)
   1734             if start_dir == "elif":
   1735                 # Add the elif in to the results and convert it to an elif 1.
   1736                 blocks[j].tokens = CppStringTokenizer("1").tokens
   1737                 result += blocks[j:j+1]
   1738             result += optimize_if01(blocks[j+1:k])
   1739             if start_dir == "elif":
   1740                 blocks[k].directive = "elif"
   1741             else:
   1742                 blocks[k].directive = "if"
   1743             blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
   1744             i = k
   1745         elif dir_ == "elif":
   1746             # convert 'elif' into 'if 0'
   1747             logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
   1748                           blocks[j].lineno, blocks[k].lineno)
   1749             result += optimize_if01(blocks[j+1:k])
   1750             blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
   1751             i = k
   1752     return result
   1753 
   1754 class OptimizerTests(unittest.TestCase):
   1755     def parse(self, text, macros=None):
   1756         out = utils.StringOutput()
   1757         blocks = BlockParser().parse(CppStringTokenizer(text))
   1758         blocks.replaceTokens(kernel_token_replacements)
   1759         blocks.optimizeAll(macros)
   1760         blocks.write(out)
   1761         return out.get()
   1762 
   1763     def test_if1(self):
   1764         text = """\
   1765 #if 1
   1766 #define  GOOD
   1767 #endif
   1768 """
   1769         expected = """\
   1770 #define GOOD
   1771 """
   1772         self.assertEqual(self.parse(text), expected)
   1773 
   1774     def test_if0(self):
   1775         text = """\
   1776 #if 0
   1777 #define  SHOULD_SKIP1
   1778 #define  SHOULD_SKIP2
   1779 #endif
   1780 """
   1781         expected = ""
   1782         self.assertEqual(self.parse(text), expected)
   1783 
   1784     def test_if1_else(self):
   1785         text = """\
   1786 #if 1
   1787 #define  GOOD
   1788 #else
   1789 #define  BAD
   1790 #endif
   1791 """
   1792         expected = """\
   1793 #define GOOD
   1794 """
   1795         self.assertEqual(self.parse(text), expected)
   1796 
   1797     def test_if0_else(self):
   1798         text = """\
   1799 #if 0
   1800 #define  BAD
   1801 #else
   1802 #define  GOOD
   1803 #endif
   1804 """
   1805         expected = """\
   1806 #define GOOD
   1807 """
   1808         self.assertEqual(self.parse(text), expected)
   1809 
   1810     def test_if_elif1(self):
   1811         text = """\
   1812 #if defined(something)
   1813 #define EXISTS
   1814 #elif 1
   1815 #define GOOD
   1816 #endif
   1817 """
   1818         expected = """\
   1819 #ifdef something
   1820 #define EXISTS
   1821 #elif 1
   1822 #define GOOD
   1823 #endif
   1824 """
   1825         self.assertEqual(self.parse(text), expected)
   1826 
   1827     def test_if_elif1_macro(self):
   1828         text = """\
   1829 #if defined(something)
   1830 #define EXISTS
   1831 #elif defined(WILL_BE_ONE)
   1832 #define GOOD
   1833 #endif
   1834 """
   1835         expected = """\
   1836 #ifdef something
   1837 #define EXISTS
   1838 #elif 1
   1839 #define GOOD
   1840 #endif
   1841 """
   1842         self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
   1843 
   1844 
   1845     def test_if_elif1_else(self):
   1846         text = """\
   1847 #if defined(something)
   1848 #define EXISTS
   1849 #elif 1
   1850 #define GOOD
   1851 #else
   1852 #define BAD
   1853 #endif
   1854 """
   1855         expected = """\
   1856 #ifdef something
   1857 #define EXISTS
   1858 #elif 1
   1859 #define GOOD
   1860 #endif
   1861 """
   1862         self.assertEqual(self.parse(text), expected)
   1863 
   1864     def test_if_elif1_else_macro(self):
   1865         text = """\
   1866 #if defined(something)
   1867 #define EXISTS
   1868 #elif defined(WILL_BE_ONE)
   1869 #define GOOD
   1870 #else
   1871 #define BAD
   1872 #endif
   1873 """
   1874         expected = """\
   1875 #ifdef something
   1876 #define EXISTS
   1877 #elif 1
   1878 #define GOOD
   1879 #endif
   1880 """
   1881         self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
   1882 
   1883 
   1884     def test_if_elif1_else_macro(self):
   1885         text = """\
   1886 #if defined(something)
   1887 #define EXISTS
   1888 #elif defined(WILL_BE_ONE)
   1889 #define GOOD
   1890 #else
   1891 #define BAD
   1892 #endif
   1893 """
   1894         expected = """\
   1895 #ifdef something
   1896 #define EXISTS
   1897 #elif 1
   1898 #define GOOD
   1899 #endif
   1900 """
   1901         self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected)
   1902 
   1903     def test_macro_set_to_undefined_single(self):
   1904         text = """\
   1905 #if defined(__KERNEL__)
   1906 #define BAD_KERNEL
   1907 #endif
   1908 """
   1909         expected = ""
   1910         macros = {"__KERNEL__": kCppUndefinedMacro}
   1911         self.assertEqual(self.parse(text, macros), expected)
   1912 
   1913     def test_macro_set_to_undefined_if(self):
   1914         text = """\
   1915 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
   1916 #define CHECK
   1917 #endif
   1918 """
   1919         expected = """\
   1920 #if !defined(__GLIBC__) || __GLIBC__ < 2
   1921 #define CHECK
   1922 #endif
   1923 """
   1924         macros = {"__KERNEL__": kCppUndefinedMacro}
   1925         self.assertEqual(self.parse(text, macros), expected)
   1926 
   1927     def test_endif_comment_removed(self):
   1928         text = """\
   1929 #ifndef SIGRTMAX
   1930 #define SIGRTMAX 123
   1931 #endif /* SIGRTMAX */
   1932 """
   1933         expected = """\
   1934 #ifndef __SIGRTMAX
   1935 #define __SIGRTMAX 123
   1936 #endif
   1937 """
   1938         self.assertEqual(self.parse(text), expected)
   1939 
   1940     def test_multilevel_if0(self):
   1941         text = """\
   1942 #if 0
   1943 #if 1
   1944 #define  BAD_6
   1945 #endif
   1946 #endif
   1947 """
   1948         expected = ""
   1949         self.assertEqual(self.parse(text), expected)
   1950 
   1951 class FullPathTest(unittest.TestCase):
   1952     """Test of the full path parsing."""
   1953 
   1954     def parse(self, text, keep=None):
   1955         if not keep:
   1956             keep = set()
   1957         out = utils.StringOutput()
   1958         blocks = BlockParser().parse(CppStringTokenizer(text))
   1959         blocks.removeVarsAndFuncs(keep)
   1960         blocks.replaceTokens(kernel_token_replacements)
   1961         blocks.optimizeAll(None)
   1962         blocks.write(out)
   1963         return out.get()
   1964 
   1965     def test_function_removed(self):
   1966         text = """\
   1967 static inline __u64 function()
   1968 {
   1969 }
   1970 """
   1971         expected = ""
   1972         self.assertEqual(self.parse(text), expected)
   1973 
   1974     def test_function_removed_with_struct(self):
   1975         text = """\
   1976 static inline struct something* function()
   1977 {
   1978 }
   1979 """
   1980         expected = ""
   1981         self.assertEqual(self.parse(text), expected)
   1982 
   1983     def test_function_kept(self):
   1984         text = """\
   1985 static inline __u64 function()
   1986 {
   1987 }
   1988 """
   1989         expected = """\
   1990 static inline __u64 function() {
   1991 }
   1992 """
   1993         self.assertEqual(self.parse(text, set(["function"])), expected)
   1994 
   1995     def test_var_removed(self):
   1996         text = "__u64 variable;"
   1997         expected = ""
   1998         self.assertEqual(self.parse(text), expected)
   1999 
   2000     def test_var_kept(self):
   2001         text = "__u64 variable;"
   2002         expected = "__u64 variable;\n"
   2003         self.assertEqual(self.parse(text, set(["variable"])), expected)
   2004 
   2005     def test_keep_function_typedef(self):
   2006         text = "typedef void somefunction_t(void);"
   2007         expected = "typedef void somefunction_t(void);\n"
   2008         self.assertEqual(self.parse(text), expected)
   2009 
   2010     def test_struct_keep_attribute(self):
   2011         text = """\
   2012 struct something_s {
   2013   __u32 s1;
   2014   __u32 s2;
   2015 } __attribute__((packed));
   2016 """
   2017         expected = """\
   2018 struct something_s {
   2019   __u32 s1;
   2020   __u32 s2;
   2021 } __attribute__((packed));
   2022 """
   2023         self.assertEqual(self.parse(text), expected)
   2024 
   2025     def test_function_keep_attribute_structs(self):
   2026         text = """\
   2027 static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
   2028 }
   2029 """
   2030         expected = """\
   2031 static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
   2032 }
   2033 """
   2034         self.assertEqual(self.parse(text, set(["function"])), expected)
   2035 
   2036     def test_struct_after_struct(self):
   2037         text = """\
   2038 struct first {
   2039 };
   2040 
   2041 struct second {
   2042   unsigned short s1;
   2043 #define SOMETHING 8
   2044   unsigned short s2;
   2045 };
   2046 """
   2047         expected = """\
   2048 struct first {
   2049 };
   2050 struct second {
   2051   unsigned short s1;
   2052 #define SOMETHING 8
   2053   unsigned short s2;
   2054 };
   2055 """
   2056         self.assertEqual(self.parse(text), expected)
   2057 
   2058     def test_other_not_removed(self):
   2059         text = """\
   2060 typedef union {
   2061   __u64 tu1;
   2062   __u64 tu2;
   2063 } typedef_name;
   2064 
   2065 union {
   2066   __u64 u1;
   2067   __u64 u2;
   2068 };
   2069 
   2070 struct {
   2071   __u64 s1;
   2072   __u64 s2;
   2073 };
   2074 
   2075 enum {
   2076   ENUM1 = 0,
   2077   ENUM2,
   2078 };
   2079 
   2080 __extension__ typedef __signed__ long long __s64;
   2081 """
   2082         expected = """\
   2083 typedef union {
   2084   __u64 tu1;
   2085   __u64 tu2;
   2086 } typedef_name;
   2087 union {
   2088   __u64 u1;
   2089   __u64 u2;
   2090 };
   2091 struct {
   2092   __u64 s1;
   2093   __u64 s2;
   2094 };
   2095 enum {
   2096   ENUM1 = 0,
   2097   ENUM2,
   2098 };
   2099 __extension__ typedef __signed__ long long __s64;
   2100 """
   2101 
   2102         self.assertEqual(self.parse(text), expected)
   2103 
   2104     def test_semicolon_after_function(self):
   2105         text = """\
   2106 static inline __u64 function()
   2107 {
   2108 };
   2109 
   2110 struct should_see {
   2111         __u32                           field;
   2112 };
   2113 """
   2114         expected = """\
   2115 struct should_see {
   2116   __u32 field;
   2117 };
   2118 """
   2119         self.assertEqual(self.parse(text), expected)
   2120 
   2121     def test_define_in_middle_keep(self):
   2122         text = """\
   2123 enum {
   2124   ENUM0 = 0x10,
   2125   ENUM1 = 0x20,
   2126 #define SOMETHING SOMETHING_ELSE
   2127   ENUM2 = 0x40,
   2128 };
   2129 """
   2130         expected = """\
   2131 enum {
   2132   ENUM0 = 0x10,
   2133   ENUM1 = 0x20,
   2134 #define SOMETHING SOMETHING_ELSE
   2135   ENUM2 = 0x40,
   2136 };
   2137 """
   2138         self.assertEqual(self.parse(text), expected)
   2139 
   2140     def test_define_in_middle_remove(self):
   2141         text = """\
   2142 static inline function() {
   2143 #define SOMETHING1 SOMETHING_ELSE1
   2144   i = 0;
   2145   {
   2146     i = 1;
   2147   }
   2148 #define SOMETHING2 SOMETHING_ELSE2
   2149 }
   2150 """
   2151         expected = """\
   2152 #define SOMETHING1 SOMETHING_ELSE1
   2153 #define SOMETHING2 SOMETHING_ELSE2
   2154 """
   2155         self.assertEqual(self.parse(text), expected)
   2156 
   2157     def test_define_in_middle_force_keep(self):
   2158         text = """\
   2159 static inline function() {
   2160 #define SOMETHING1 SOMETHING_ELSE1
   2161   i = 0;
   2162   {
   2163     i = 1;
   2164   }
   2165 #define SOMETHING2 SOMETHING_ELSE2
   2166 }
   2167 """
   2168         expected = """\
   2169 static inline function() {
   2170 #define SOMETHING1 SOMETHING_ELSE1
   2171   i = 0;
   2172  {
   2173     i = 1;
   2174   }
   2175 #define SOMETHING2 SOMETHING_ELSE2
   2176 }
   2177 """
   2178         self.assertEqual(self.parse(text, set(["function"])), expected)
   2179 
   2180     def test_define_before_remove(self):
   2181         text = """\
   2182 #define SHOULD_BE_KEPT NOTHING1
   2183 #define ANOTHER_TO_KEEP NOTHING2
   2184 static inline function() {
   2185 #define SOMETHING1 SOMETHING_ELSE1
   2186   i = 0;
   2187   {
   2188     i = 1;
   2189   }
   2190 #define SOMETHING2 SOMETHING_ELSE2
   2191 }
   2192 """
   2193         expected = """\
   2194 #define SHOULD_BE_KEPT NOTHING1
   2195 #define ANOTHER_TO_KEEP NOTHING2
   2196 #define SOMETHING1 SOMETHING_ELSE1
   2197 #define SOMETHING2 SOMETHING_ELSE2
   2198 """
   2199         self.assertEqual(self.parse(text), expected)
   2200 
   2201     def test_extern_C(self):
   2202         text = """\
   2203 #if defined(__cplusplus)
   2204 extern "C" {
   2205 #endif
   2206 
   2207 struct something {
   2208 };
   2209 
   2210 #if defined(__cplusplus)
   2211 }
   2212 #endif
   2213 """
   2214         expected = """\
   2215 #ifdef __cplusplus
   2216 extern "C" {
   2217 #endif
   2218 struct something {
   2219 };
   2220 #ifdef __cplusplus
   2221 }
   2222 #endif
   2223 """
   2224         self.assertEqual(self.parse(text), expected)
   2225 
   2226     def test_macro_definition_removed(self):
   2227         text = """\
   2228 #define MACRO_FUNCTION_NO_PARAMS static inline some_func() {}
   2229 MACRO_FUNCTION_NO_PARAMS()
   2230 
   2231 #define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
   2232 MACRO_FUNCTION_PARAMS(a = 1)
   2233 
   2234 something that should still be kept
   2235 MACRO_FUNCTION_PARAMS(b)
   2236 """
   2237         expected = """\
   2238 #define MACRO_FUNCTION_NO_PARAMS static inline some_func() { }
   2239 #define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
   2240 something that should still be kept
   2241 """
   2242         self.assertEqual(self.parse(text), expected)
   2243 
   2244 
   2245 if __name__ == '__main__':
   2246     unittest.main()
   2247