Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 """A glorified C pre-processor parser."""
      3 
      4 import ctypes
      5 import logging
      6 import os
      7 import re
      8 import site
      9 import utils
     10 
     11 top = os.getenv('ANDROID_BUILD_TOP')
     12 if top is None:
     13     utils.panic('ANDROID_BUILD_TOP not set.\n')
     14 
     15 # Set up the env vars for libclang.
     16 site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))
     17 
     18 import clang.cindex
     19 from clang.cindex import conf
     20 from clang.cindex import Cursor
     21 from clang.cindex import CursorKind
     22 from clang.cindex import SourceLocation
     23 from clang.cindex import SourceRange
     24 from clang.cindex import TokenGroup
     25 from clang.cindex import TokenKind
     26 from clang.cindex import TranslationUnit
     27 
     28 # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
     29 # Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
     30 clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64'))
     31 
     32 from defaults import kCppUndefinedMacro
     33 from defaults import kernel_remove_config_macros
     34 from defaults import kernel_token_replacements
     35 
     36 
     37 debugBlockParser = False
     38 debugCppExpr = False
     39 debugOptimIf01 = False
     40 
     41 ###############################################################################
     42 ###############################################################################
     43 #####                                                                     #####
     44 #####           C P P   T O K E N S                                       #####
     45 #####                                                                     #####
     46 ###############################################################################
     47 ###############################################################################
     48 
     49 # the list of supported C-preprocessor tokens
     50 # plus a couple of C tokens as well
     51 tokEOF = "\0"
     52 tokLN = "\n"
     53 tokSTRINGIFY = "#"
     54 tokCONCAT = "##"
     55 tokLOGICAND = "&&"
     56 tokLOGICOR = "||"
     57 tokSHL = "<<"
     58 tokSHR = ">>"
     59 tokEQUAL = "=="
     60 tokNEQUAL = "!="
     61 tokLT = "<"
     62 tokLTE = "<="
     63 tokGT = ">"
     64 tokGTE = ">="
     65 tokELLIPSIS = "..."
     66 tokSPACE = " "
     67 tokDEFINED = "defined"
     68 tokLPAREN = "("
     69 tokRPAREN = ")"
     70 tokNOT = "!"
     71 tokPLUS = "+"
     72 tokMINUS = "-"
     73 tokMULTIPLY = "*"
     74 tokDIVIDE = "/"
     75 tokMODULUS = "%"
     76 tokBINAND = "&"
     77 tokBINOR = "|"
     78 tokBINXOR = "^"
     79 tokCOMMA = ","
     80 tokLBRACE = "{"
     81 tokRBRACE = "}"
     82 tokARROW = "->"
     83 tokINCREMENT = "++"
     84 tokDECREMENT = "--"
     85 tokNUMBER = "<number>"
     86 tokIDENT = "<ident>"
     87 tokSTRING = "<string>"
     88 
     89 
     90 class Token(clang.cindex.Token):
     91     """A class that represents one token after parsing.
     92 
     93     It inherits the class in libclang, with an extra id property to hold the
     94     new spelling of the token. The spelling property in the base class is
     95     defined as read-only. New names after macro instantiation are saved in
     96     their ids now. It also facilitates the renaming of directive optimizations
     97     like replacing 'ifndef X' with 'if !defined(X)'.
     98 
     99     It also overrides the cursor property of the base class. Because the one
    100     in libclang always queries based on a single token, which usually doesn't
    101     hold useful information. The cursor in this class can be set by calling
    102     CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
    103     base class.
    104     """
    105 
    106     def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
    107                  cursor=None):
    108         clang.cindex.Token.__init__(self)
    109         self._id = None
    110         self._tu = tu
    111         self._group = group
    112         self._cursor = cursor
    113         # self.int_data and self.ptr_data are from the base class. But
    114         # self.int_data doesn't accept a None value.
    115         if int_data is not None:
    116             self.int_data = int_data
    117         self.ptr_data = ptr_data
    118 
    119     @property
    120     def id(self):
    121         """Name of the token."""
    122         if self._id is None:
    123             return self.spelling
    124         else:
    125             return self._id
    126 
    127     @id.setter
    128     def id(self, new_id):
    129         """Setting name of the token."""
    130         self._id = new_id
    131 
    132     @property
    133     def cursor(self):
    134         if self._cursor is None:
    135             self._cursor = clang.cindex.Token.cursor
    136         return self._cursor
    137 
    138     @cursor.setter
    139     def cursor(self, new_cursor):
    140         self._cursor = new_cursor
    141 
    142     def __repr__(self):
    143         if self.id == 'defined':
    144             return self.id
    145         elif self.kind == TokenKind.IDENTIFIER:
    146             return "(ident %s)" % self.id
    147 
    148         return self.id
    149 
    150     def __str__(self):
    151         return self.id
    152 
    153 
    154 class BadExpectedToken(Exception):
    155     """An exception that will be raised for unexpected tokens."""
    156     pass
    157 
    158 
    159 # The __contains__ function in libclang SourceRange class contains a bug. It
    160 # gives wrong result when dealing with single line range.
    161 # Bug filed with upstream:
    162 # http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
    163 def SourceRange__contains__(self, other):
    164     """Determine if a given location is inside the range."""
    165     if not isinstance(other, SourceLocation):
    166         return False
    167     if other.file is None and self.start.file is None:
    168         pass
    169     elif (self.start.file.name != other.file.name or
    170           other.file.name != self.end.file.name):
    171         # same file name
    172         return False
    173     # same file, in between lines
    174     if self.start.line < other.line < self.end.line:
    175         return True
    176     # same file, same line
    177     elif self.start.line == other.line == self.end.line:
    178         if self.start.column <= other.column <= self.end.column:
    179             return True
    180     elif self.start.line == other.line:
    181         # same file first line
    182         if self.start.column <= other.column:
    183             return True
    184     elif other.line == self.end.line:
    185         # same file last line
    186         if other.column <= self.end.column:
    187             return True
    188     return False
    189 
    190 
    191 SourceRange.__contains__ = SourceRange__contains__
    192 
    193 
    194 ################################################################################
    195 ################################################################################
    196 #####                                                                      #####
    197 #####           C P P   T O K E N I Z E R                                  #####
    198 #####                                                                      #####
    199 ################################################################################
    200 ################################################################################
    201 
    202 
    203 class CppTokenizer(object):
    204     """A tokenizer that converts some input text into a list of tokens.
    205 
    206     It calls libclang's tokenizer to get the parsed tokens. In addition, it
    207     updates the cursor property in each token after parsing, by calling
    208     getTokensWithCursors().
    209     """
    210 
    211     clang_flags = ['-E', '-x', 'c']
    212     options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
    213 
    214     def __init__(self):
    215         """Initialize a new CppTokenizer object."""
    216         self._indexer = clang.cindex.Index.create()
    217         self._tu = None
    218         self._index = 0
    219         self.tokens = None
    220 
    221     def _getTokensWithCursors(self):
    222         """Helper method to return all tokens with their cursors.
    223 
    224         The cursor property in a clang Token doesn't provide enough
    225         information. Because it is queried based on single token each time
    226         without any context, i.e. via calling conf.lib.clang_annotateTokens()
    227         with only one token given. So we often see 'INVALID_FILE' in one
    228         token's cursor. In this function it passes all the available tokens
    229         to get more informative cursors.
    230         """
    231 
    232         tokens_memory = ctypes.POINTER(clang.cindex.Token)()
    233         tokens_count = ctypes.c_uint()
    234 
    235         conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
    236                                 ctypes.byref(tokens_memory),
    237                                 ctypes.byref(tokens_count))
    238 
    239         count = int(tokens_count.value)
    240 
    241         # If we get no tokens, no memory was allocated. Be sure not to return
    242         # anything and potentially call a destructor on nothing.
    243         if count < 1:
    244             return
    245 
    246         cursors = (Cursor * count)()
    247         cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
    248 
    249         conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
    250                                       cursors_memory)
    251 
    252         tokens_array = ctypes.cast(
    253             tokens_memory,
    254             ctypes.POINTER(clang.cindex.Token * count)).contents
    255         token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
    256 
    257         tokens = []
    258         for i in xrange(0, count):
    259             token = Token(self._tu, token_group,
    260                           int_data=tokens_array[i].int_data,
    261                           ptr_data=tokens_array[i].ptr_data,
    262                           cursor=cursors[i])
    263             # We only want non-comment tokens.
    264             if token.kind != TokenKind.COMMENT:
    265                 tokens.append(token)
    266 
    267         return tokens
    268 
    269     def parseString(self, lines):
    270         """Parse a list of text lines into a BlockList object."""
    271         file_ = 'dummy.c'
    272         self._tu = self._indexer.parse(file_, self.clang_flags,
    273                                        unsaved_files=[(file_, lines)],
    274                                        options=self.options)
    275         self.tokens = self._getTokensWithCursors()
    276 
    277     def parseFile(self, file_):
    278         """Parse a file into a BlockList object."""
    279         self._tu = self._indexer.parse(file_, self.clang_flags,
    280                                        options=self.options)
    281         self.tokens = self._getTokensWithCursors()
    282 
    283     def nextToken(self):
    284         """Return next token from the list."""
    285         if self._index < len(self.tokens):
    286             t = self.tokens[self._index]
    287             self._index += 1
    288             return t
    289         else:
    290             return None
    291 
    292 
    293 class CppStringTokenizer(CppTokenizer):
    294     """A CppTokenizer derived class that accepts a string of text as input."""
    295 
    296     def __init__(self, line):
    297         CppTokenizer.__init__(self)
    298         self.parseString(line)
    299 
    300 
    301 class CppFileTokenizer(CppTokenizer):
    302     """A CppTokenizer derived class that accepts a file as input."""
    303 
    304     def __init__(self, file_):
    305         CppTokenizer.__init__(self)
    306         self.parseFile(file_)
    307 
    308 
    309 # Unit testing
    310 #
    311 class CppTokenizerTester(object):
    312     """A class used to test CppTokenizer classes."""
    313 
    314     def __init__(self, tokenizer=None):
    315         self._tokenizer = tokenizer
    316         self._token = None
    317 
    318     def setTokenizer(self, tokenizer):
    319         self._tokenizer = tokenizer
    320 
    321     def expect(self, id):
    322         self._token = self._tokenizer.nextToken()
    323         if self._token is None:
    324             tokid = ''
    325         else:
    326             tokid = self._token.id
    327         if tokid == id:
    328             return
    329         raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % (
    330             tokid, id))
    331 
    332     def expectToken(self, id, line, col):
    333         self.expect(id)
    334         if self._token.location.line != line:
    335             raise BadExpectedToken(
    336                 "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (
    337                     id, self._token.lineno, line))
    338         if self._token.location.column != col:
    339             raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % (
    340                 self._token.colno, col))
    341 
    342     def expectTokens(self, tokens):
    343         for id, line, col in tokens:
    344             self.expectToken(id, line, col)
    345 
    346     def expectList(self, list_):
    347         for item in list_:
    348             self.expect(item)
    349 
    350 
    351 def test_CppTokenizer():
    352     tester = CppTokenizerTester()
    353 
    354     tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)"))
    355     tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
    356                        "01923_xy", tokRPAREN])
    357 
    358     tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)"))
    359     tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
    360                        "defined", tokLPAREN, "BAZ", tokRPAREN])
    361 
    362     tester.setTokenizer(CppStringTokenizer("/*\n#\n*/"))
    363     tester.expectList([])
    364 
    365     tester.setTokenizer(CppStringTokenizer("first\nsecond"))
    366     tester.expectList(["first", "second"])
    367 
    368     tester.setTokenizer(CppStringTokenizer("first second\n  third"))
    369     tester.expectTokens([("first", 1, 1),
    370                          ("second", 1, 7),
    371                          ("third", 2, 3)])
    372 
    373     tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */"))
    374     tester.expectTokens([("boo", 1, 1)])
    375 
    376     tester.setTokenizer(CppStringTokenizer("an \\\n example"))
    377     tester.expectTokens([("an", 1, 1),
    378                          ("example", 2, 2)])
    379     return True
    380 
    381 
    382 ################################################################################
    383 ################################################################################
    384 #####                                                                      #####
    385 #####           C P P   E X P R E S S I O N S                              #####
    386 #####                                                                      #####
    387 ################################################################################
    388 ################################################################################
    389 
    390 
    391 class CppExpr(object):
    392     """A class that models the condition of #if directives into an expr tree.
    393 
    394     Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
    395     "op" is a string describing the operation
    396     """
    397 
    398     unaries = ["!", "~"]
    399     binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
    400                 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
    401     precedences = {
    402         "?": 1, ":": 1,
    403         "||": 2,
    404         "&&": 3,
    405         "|": 4,
    406         "^": 5,
    407         "&": 6,
    408         "==": 7, "!=": 7,
    409         "<": 8, "<=": 8, ">": 8, ">=": 8,
    410         "<<": 9, ">>": 9,
    411         "+": 10, "-": 10,
    412         "*": 11, "/": 11, "%": 11,
    413         "!": 12, "~": 12
    414     }
    415 
    416     def __init__(self, tokens):
    417         """Initialize a CppExpr. 'tokens' must be a CppToken list."""
    418         self.tokens = tokens
    419         self._num_tokens = len(tokens)
    420         self._index = 0
    421 
    422         if debugCppExpr:
    423             print "CppExpr: trying to parse %s" % repr(tokens)
    424         self.expr = self.parseExpression(0)
    425         if debugCppExpr:
    426             print "CppExpr: got " + repr(self.expr)
    427         if self._index != self._num_tokens:
    428             self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
    429                        % (self._index, self._num_tokens, repr(tokens)))
    430 
    431     def throw(self, exception, msg):
    432         if self._index < self._num_tokens:
    433             tok = self.tokens[self._index]
    434             print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
    435         else:
    436             print "EOF: %s" % msg
    437         raise exception(msg)
    438 
    439     def expectId(self, id):
    440         """Check that a given token id is at the current position."""
    441         token = self.tokens[self._index]
    442         if self._index >= self._num_tokens or token.id != id:
    443             self.throw(BadExpectedToken,
    444                        "### expecting '%s' in expression, got '%s'" % (
    445                            id, token.id))
    446         self._index += 1
    447 
    448     def is_decimal(self):
    449         token = self.tokens[self._index].id
    450         if token[-1] in "ULul":
    451             token = token[:-1]
    452         try:
    453             val = int(token, 10)
    454             self._index += 1
    455             return ('int', val)
    456         except ValueError:
    457             return None
    458 
    459     def is_octal(self):
    460         token = self.tokens[self._index].id
    461         if token[-1] in "ULul":
    462             token = token[:-1]
    463         if len(token) < 2 or token[0] != '0':
    464             return None
    465         try:
    466             val = int(token, 8)
    467             self._index += 1
    468             return ('oct', val)
    469         except ValueError:
    470             return None
    471 
    472     def is_hexadecimal(self):
    473         token = self.tokens[self._index].id
    474         if token[-1] in "ULul":
    475             token = token[:-1]
    476         if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
    477             return None
    478         try:
    479             val = int(token, 16)
    480             self._index += 1
    481             return ('hex', val)
    482         except ValueError:
    483             return None
    484 
    485     def is_integer(self):
    486         if self.tokens[self._index].kind != TokenKind.LITERAL:
    487             return None
    488 
    489         c = self.is_hexadecimal()
    490         if c:
    491             return c
    492 
    493         c = self.is_octal()
    494         if c:
    495             return c
    496 
    497         c = self.is_decimal()
    498         if c:
    499             return c
    500 
    501         return None
    502 
    503     def is_number(self):
    504         t = self.tokens[self._index]
    505         if t.id == tokMINUS and self._index + 1 < self._num_tokens:
    506             self._index += 1
    507             c = self.is_integer()
    508             if c:
    509                 op, val = c
    510                 return (op, -val)
    511         if t.id == tokPLUS and self._index + 1 < self._num_tokens:
    512             self._index += 1
    513             c = self.is_integer()
    514             if c:
    515                 return c
    516 
    517         return self.is_integer()
    518 
    519     def is_defined(self):
    520         t = self.tokens[self._index]
    521         if t.id != tokDEFINED:
    522             return None
    523 
    524         # We have the defined keyword, check the rest.
    525         self._index += 1
    526         used_parens = False
    527         if (self._index < self._num_tokens and
    528             self.tokens[self._index].id == tokLPAREN):
    529             used_parens = True
    530             self._index += 1
    531 
    532         if self._index >= self._num_tokens:
    533             self.throw(BadExpectedToken,
    534                        "### 'defined' must be followed by macro name or left "
    535                        "paren")
    536 
    537         t = self.tokens[self._index]
    538         if t.kind != TokenKind.IDENTIFIER:
    539             self.throw(BadExpectedToken,
    540                        "### 'defined' must be followed by macro name")
    541 
    542         self._index += 1
    543         if used_parens:
    544             self.expectId(tokRPAREN)
    545 
    546         return ("defined", t.id)
    547 
    548     def is_call_or_ident(self):
    549         if self._index >= self._num_tokens:
    550             return None
    551 
    552         t = self.tokens[self._index]
    553         if t.kind != TokenKind.IDENTIFIER:
    554             return None
    555 
    556         name = t.id
    557 
    558         self._index += 1
    559         if (self._index >= self._num_tokens or
    560             self.tokens[self._index].id != tokLPAREN):
    561             return ("ident", name)
    562 
    563         params = []
    564         depth = 1
    565         self._index += 1
    566         j = self._index
    567         while self._index < self._num_tokens:
    568             id = self.tokens[self._index].id
    569             if id == tokLPAREN:
    570                 depth += 1
    571             elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
    572                 k = self._index
    573                 param = self.tokens[j:k]
    574                 params.append(param)
    575                 if id == tokRPAREN:
    576                     break
    577                 j = self._index + 1
    578             elif id == tokRPAREN:
    579                 depth -= 1
    580             self._index += 1
    581 
    582         if self._index >= self._num_tokens:
    583             return None
    584 
    585         self._index += 1
    586         return ("call", (name, params))
    587 
    588     # Implements the "precedence climbing" algorithm from
    589     # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
    590     # The "classic" algorithm would be fine if we were using a tool to
    591     # generate the parser, but we're not. Dijkstra's "shunting yard"
    592     # algorithm hasn't been necessary yet.
    593 
    594     def parseExpression(self, minPrecedence):
    595         if self._index >= self._num_tokens:
    596             return None
    597 
    598         node = self.parsePrimary()
    599         while (self.token() and self.isBinary(self.token()) and
    600                self.precedence(self.token()) >= minPrecedence):
    601             op = self.token()
    602             self.nextToken()
    603             rhs = self.parseExpression(self.precedence(op) + 1)
    604             node = (op.id, node, rhs)
    605 
    606         return node
    607 
    608     def parsePrimary(self):
    609         op = self.token()
    610         if self.isUnary(op):
    611             self.nextToken()
    612             return (op.id, self.parseExpression(self.precedence(op)))
    613 
    614         primary = None
    615         if op.id == tokLPAREN:
    616             self.nextToken()
    617             primary = self.parseExpression(0)
    618             self.expectId(tokRPAREN)
    619         elif op.id == "?":
    620             self.nextToken()
    621             primary = self.parseExpression(0)
    622             self.expectId(":")
    623         elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
    624             primary = self.is_number()
    625         # Checking for 'defined' needs to come first now because 'defined' is
    626         # recognized as IDENTIFIER.
    627         elif op.id == tokDEFINED:
    628             primary = self.is_defined()
    629         elif op.kind == TokenKind.IDENTIFIER:
    630             primary = self.is_call_or_ident()
    631         else:
    632             self.throw(BadExpectedToken,
    633                        "didn't expect to see a %s in factor" % (
    634                            self.tokens[self._index].id))
    635         return primary
    636 
    637     def isBinary(self, token):
    638         return token.id in self.binaries
    639 
    640     def isUnary(self, token):
    641         return token.id in self.unaries
    642 
    643     def precedence(self, token):
    644         return self.precedences.get(token.id)
    645 
    646     def token(self):
    647         if self._index >= self._num_tokens:
    648             return None
    649         return self.tokens[self._index]
    650 
    651     def nextToken(self):
    652         self._index += 1
    653         if self._index >= self._num_tokens:
    654             return None
    655         return self.tokens[self._index]
    656 
    657     def dump_node(self, e):
    658         op = e[0]
    659         line = "(" + op
    660         if op == "int":
    661             line += " %d)" % e[1]
    662         elif op == "oct":
    663             line += " 0%o)" % e[1]
    664         elif op == "hex":
    665             line += " 0x%x)" % e[1]
    666         elif op == "ident":
    667             line += " %s)" % e[1]
    668         elif op == "defined":
    669             line += " %s)" % e[1]
    670         elif op == "call":
    671             arg = e[1]
    672             line += " %s [" % arg[0]
    673             prefix = ""
    674             for param in arg[1]:
    675                 par = ""
    676                 for tok in param:
    677                     par += str(tok)
    678                 line += "%s%s" % (prefix, par)
    679                 prefix = ","
    680             line += "])"
    681         elif op in CppExpr.unaries:
    682             line += " %s)" % self.dump_node(e[1])
    683         elif op in CppExpr.binaries:
    684             line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
    685         else:
    686             line += " ?%s)" % repr(e[1])
    687 
    688         return line
    689 
    690     def __repr__(self):
    691         return self.dump_node(self.expr)
    692 
    693     def source_node(self, e):
    694         op = e[0]
    695         if op == "int":
    696             return "%d" % e[1]
    697         if op == "hex":
    698             return "0x%x" % e[1]
    699         if op == "oct":
    700             return "0%o" % e[1]
    701         if op == "ident":
    702             # XXX: should try to expand
    703             return e[1]
    704         if op == "defined":
    705             return "defined(%s)" % e[1]
    706 
    707         prec = CppExpr.precedences.get(op, 1000)
    708         arg = e[1]
    709         if op in CppExpr.unaries:
    710             arg_src = self.source_node(arg)
    711             arg_op = arg[0]
    712             arg_prec = CppExpr.precedences.get(arg_op, 1000)
    713             if arg_prec < prec:
    714                 return "!(" + arg_src + ")"
    715             else:
    716                 return "!" + arg_src
    717         if op in CppExpr.binaries:
    718             arg2 = e[2]
    719             arg1_op = arg[0]
    720             arg2_op = arg2[0]
    721             arg1_src = self.source_node(arg)
    722             arg2_src = self.source_node(arg2)
    723             if CppExpr.precedences.get(arg1_op, 1000) < prec:
    724                 arg1_src = "(%s)" % arg1_src
    725             if CppExpr.precedences.get(arg2_op, 1000) < prec:
    726                 arg2_src = "(%s)" % arg2_src
    727 
    728             return "%s %s %s" % (arg1_src, op, arg2_src)
    729         return "???"
    730 
    731     def __str__(self):
    732         return self.source_node(self.expr)
    733 
    734     @staticmethod
    735     def int_node(e):
    736         if e[0] in ["int", "oct", "hex"]:
    737             return e[1]
    738         else:
    739             return None
    740 
    741     def toInt(self):
    742         return self.int_node(self.expr)
    743 
    744     def optimize_node(self, e, macros=None):
    745         if macros is None:
    746             macros = {}
    747         op = e[0]
    748 
    749         if op == "defined":
    750             op, name = e
    751             if macros.has_key(name):
    752                 if macros[name] == kCppUndefinedMacro:
    753                     return ("int", 0)
    754                 else:
    755                     try:
    756                         value = int(macros[name])
    757                         return ("int", value)
    758                     except ValueError:
    759                         return ("defined", macros[name])
    760 
    761             if kernel_remove_config_macros and name.startswith("CONFIG_"):
    762                 return ("int", 0)
    763 
    764             return e
    765 
    766         elif op == "ident":
    767             op, name = e
    768             if macros.has_key(name):
    769                 try:
    770                     value = int(macros[name])
    771                     expanded = ("int", value)
    772                 except ValueError:
    773                     expanded = ("ident", macros[name])
    774                 return self.optimize_node(expanded, macros)
    775             return e
    776 
    777         elif op == "!":
    778             op, v = e
    779             v = self.optimize_node(v, macros)
    780             if v[0] == "int":
    781                 if v[1] == 0:
    782                     return ("int", 1)
    783                 else:
    784                     return ("int", 0)
    785             return ('!', v)
    786 
    787         elif op == "&&":
    788             op, l, r = e
    789             l = self.optimize_node(l, macros)
    790             r = self.optimize_node(r, macros)
    791             li = self.int_node(l)
    792             ri = self.int_node(r)
    793             if li is not None:
    794                 if li == 0:
    795                     return ("int", 0)
    796                 else:
    797                     return r
    798             elif ri is not None:
    799                 if ri == 0:
    800                     return ("int", 0)
    801                 else:
    802                     return l
    803             return (op, l, r)
    804 
    805         elif op == "||":
    806             op, l, r = e
    807             l = self.optimize_node(l, macros)
    808             r = self.optimize_node(r, macros)
    809             li = self.int_node(l)
    810             ri = self.int_node(r)
    811             if li is not None:
    812                 if li == 0:
    813                     return r
    814                 else:
    815                     return ("int", 1)
    816             elif ri is not None:
    817                 if ri == 0:
    818                     return l
    819                 else:
    820                     return ("int", 1)
    821             return (op, l, r)
    822 
    823         else:
    824             return e
    825 
    826     def optimize(self, macros=None):
    827         if macros is None:
    828             macros = {}
    829         self.expr = self.optimize_node(self.expr, macros)
    830 
    831 
    832 def test_cpp_expr(expr, expected):
    833     e = CppExpr(CppStringTokenizer(expr).tokens)
    834     s1 = repr(e)
    835     if s1 != expected:
    836         print ("[FAIL]: expression '%s' generates '%s', should be "
    837                "'%s'" % (expr, s1, expected))
    838         global failure_count
    839         failure_count += 1
    840 
    841 
    842 def test_cpp_expr_optim(expr, expected, macros=None):
    843     if macros is None:
    844         macros = {}
    845     e = CppExpr(CppStringTokenizer(expr).tokens)
    846     e.optimize(macros)
    847     s1 = repr(e)
    848     if s1 != expected:
    849         print ("[FAIL]: optimized expression '%s' generates '%s' with "
    850                "macros %s, should be '%s'" % (expr, s1, macros, expected))
    851         global failure_count
    852         failure_count += 1
    853 
    854 
    855 def test_cpp_expr_source(expr, expected):
    856     e = CppExpr(CppStringTokenizer(expr).tokens)
    857     s1 = str(e)
    858     if s1 != expected:
    859         print ("[FAIL]: source expression '%s' generates '%s', should "
    860                "be '%s'" % (expr, s1, expected))
    861         global failure_count
    862         failure_count += 1
    863 
    864 
    865 def test_CppExpr():
    866     test_cpp_expr("0", "(int 0)")
    867     test_cpp_expr("1", "(int 1)")
    868     test_cpp_expr("-5", "(int -5)")
    869     test_cpp_expr("+1", "(int 1)")
    870     test_cpp_expr("0U", "(int 0)")
    871     test_cpp_expr("015", "(oct 015)")
    872     test_cpp_expr("015l", "(oct 015)")
    873     test_cpp_expr("0x3e", "(hex 0x3e)")
    874     test_cpp_expr("(0)", "(int 0)")
    875     test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
    876     test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
    877     test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
    878     test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    879     test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
    880     test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
    881     test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    882     test_cpp_expr("defined(ABC) || defined(BINGO)",
    883                   "(|| (defined ABC) (defined BINGO))")
    884     test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])")
    885     test_cpp_expr("A == 1 || defined(B)",
    886                   "(|| (== (ident A) (int 1)) (defined B))")
    887 
    888     test_cpp_expr_optim("0", "(int 0)")
    889     test_cpp_expr_optim("1", "(int 1)")
    890     test_cpp_expr_optim("1 && 1", "(int 1)")
    891     test_cpp_expr_optim("1 && +1", "(int 1)")
    892     test_cpp_expr_optim("0x1 && 01", "(oct 01)")
    893     test_cpp_expr_optim("1 && 0", "(int 0)")
    894     test_cpp_expr_optim("0 && 1", "(int 0)")
    895     test_cpp_expr_optim("0 && 0", "(int 0)")
    896     test_cpp_expr_optim("1 || 1", "(int 1)")
    897     test_cpp_expr_optim("1 || 0", "(int 1)")
    898     test_cpp_expr_optim("0 || 1", "(int 1)")
    899     test_cpp_expr_optim("0 || 0", "(int 0)")
    900     test_cpp_expr_optim("A", "(ident A)")
    901     test_cpp_expr_optim("A", "(int 1)", {"A": 1})
    902     test_cpp_expr_optim("A || B", "(int 1)", {"A": 1})
    903     test_cpp_expr_optim("A || B", "(int 1)", {"B": 1})
    904     test_cpp_expr_optim("A && B", "(ident B)", {"A": 1})
    905     test_cpp_expr_optim("A && B", "(ident A)", {"B": 1})
    906     test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
    907     test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
    908     test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    909     test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
    910     test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)",
    911                         {"EXAMPLE": "XOWOE"})
    912     test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)",
    913                         {"EXAMPLE": kCppUndefinedMacro})
    914     test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    915     test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))",
    916                         {"EXAMPLE": "XOWOE"})
    917     test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)",
    918                         {"EXAMPLE": kCppUndefinedMacro})
    919     test_cpp_expr_optim("defined(A) || defined(B)",
    920                         "(|| (defined A) (defined B))")
    921     test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"})
    922     test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"})
    923     test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)",
    924                         {"B": kCppUndefinedMacro})
    925     test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)",
    926                         {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro})
    927     test_cpp_expr_optim("defined(A) && defined(B)",
    928                         "(&& (defined A) (defined B))")
    929     test_cpp_expr_optim("defined(A) && defined(B)",
    930                         "(defined B)", {"A": "1"})
    931     test_cpp_expr_optim("defined(A) && defined(B)",
    932                         "(defined A)", {"B": "1"})
    933     test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)",
    934                         {"B": kCppUndefinedMacro})
    935     test_cpp_expr_optim("defined(A) && defined(B)",
    936                         "(int 0)", {"A": kCppUndefinedMacro})
    937     test_cpp_expr_optim("A == 1 || defined(B)",
    938                         "(|| (== (ident A) (int 1)) (defined B))")
    939     test_cpp_expr_optim(
    940         "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
    941         "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))",
    942         {"__KERNEL__": kCppUndefinedMacro})
    943 
    944     test_cpp_expr_source("0", "0")
    945     test_cpp_expr_source("1", "1")
    946     test_cpp_expr_source("1 && 1", "1 && 1")
    947     test_cpp_expr_source("1 && 0", "1 && 0")
    948     test_cpp_expr_source("0 && 1", "0 && 1")
    949     test_cpp_expr_source("0 && 0", "0 && 0")
    950     test_cpp_expr_source("1 || 1", "1 || 1")
    951     test_cpp_expr_source("1 || 0", "1 || 0")
    952     test_cpp_expr_source("0 || 1", "0 || 1")
    953     test_cpp_expr_source("0 || 0", "0 || 0")
    954     test_cpp_expr_source("EXAMPLE", "EXAMPLE")
    955     test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
    956     test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
    957     test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
    958     test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")
    959 
    960 
    961 ################################################################################
    962 ################################################################################
    963 #####                                                                      #####
    964 #####          C P P   B L O C K                                           #####
    965 #####                                                                      #####
    966 ################################################################################
    967 ################################################################################
    968 
    969 
    970 class Block(object):
    971     """A class used to model a block of input source text.
    972 
    973     There are two block types:
    974       - directive blocks: contain the tokens of a single pre-processor
    975         directive (e.g. #if)
    976       - text blocks, contain the tokens of non-directive blocks
    977 
    978     The cpp parser class below will transform an input source file into a list
    979     of Block objects (grouped in a BlockList object for convenience)
    980     """
    981 
    982     def __init__(self, tokens, directive=None, lineno=0, identifier=None):
    983         """Initialize a new block, if 'directive' is None, it is a text block.
    984 
    985         NOTE: This automatically converts '#ifdef MACRO' into
    986         '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
    987         """
    988 
    989         if directive == "ifdef":
    990             tok = Token()
    991             tok.id = tokDEFINED
    992             tokens = [tok] + tokens
    993             directive = "if"
    994 
    995         elif directive == "ifndef":
    996             tok1 = Token()
    997             tok2 = Token()
    998             tok1.id = tokNOT
    999             tok2.id = tokDEFINED
   1000             tokens = [tok1, tok2] + tokens
   1001             directive = "if"
   1002 
   1003         self.tokens = tokens
   1004         self.directive = directive
   1005         self.define_id = identifier
   1006         if lineno > 0:
   1007             self.lineno = lineno
   1008         else:
   1009             self.lineno = self.tokens[0].location.line
   1010 
   1011         if self.isIf():
   1012             self.expr = CppExpr(self.tokens)
   1013 
   1014     def isDirective(self):
   1015         """Return True iff this is a directive block."""
   1016         return self.directive is not None
   1017 
   1018     def isConditional(self):
   1019         """Return True iff this is a conditional directive block."""
   1020         return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
   1021                                   "endif"]
   1022 
   1023     def isDefine(self):
   1024         """Return the macro name in a #define directive, or None otherwise."""
   1025         if self.directive != "define":
   1026             return None
   1027         return self.define_id
   1028 
   1029     def isIf(self):
   1030         """Return True iff this is an #if-like directive block."""
   1031         return self.directive in ["if", "ifdef", "ifndef", "elif"]
   1032 
   1033     def isEndif(self):
   1034         """Return True iff this is an #endif directive block."""
   1035         return self.directive == "endif"
   1036 
   1037     def isInclude(self):
   1038         """Check whether this is a #include directive.
   1039 
   1040         If true, returns the corresponding file name (with brackets or
   1041         double-qoutes). None otherwise.
   1042         """
   1043 
   1044         if self.directive != "include":
   1045             return None
   1046         return ''.join([str(x) for x in self.tokens])
   1047 
   1048     @staticmethod
   1049     def format_blocks(tokens, indent=0):
   1050         """Return the formatted lines of strings with proper indentation."""
   1051         newline = True
   1052         result = []
   1053         buf = ''
   1054         i = 0
   1055         while i < len(tokens):
   1056             t = tokens[i]
   1057             if t.id == '{':
   1058                 buf += ' {'
   1059                 result.append(strip_space(buf))
   1060                 indent += 2
   1061                 buf = ''
   1062                 newline = True
   1063             elif t.id == '}':
   1064                 indent -= 2
   1065                 if not newline:
   1066                     result.append(strip_space(buf))
   1067                 # Look ahead to determine if it's the end of line.
   1068                 if (i + 1 < len(tokens) and
   1069                     (tokens[i+1].id == ';' or
   1070                      tokens[i+1].id in ['else', '__attribute__',
   1071                                         '__attribute', '__packed'] or
   1072                      tokens[i+1].kind == TokenKind.IDENTIFIER)):
   1073                     buf = ' ' * indent + '}'
   1074                     newline = False
   1075                 else:
   1076                     result.append(' ' * indent + '}')
   1077                     buf = ''
   1078                     newline = True
   1079             elif t.id == ';':
   1080                 result.append(strip_space(buf) + ';')
   1081                 buf = ''
   1082                 newline = True
   1083             # We prefer a new line for each constant in enum.
   1084             elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
   1085                 result.append(strip_space(buf) + ',')
   1086                 buf = ''
   1087                 newline = True
   1088             else:
   1089                 if newline:
   1090                     buf += ' ' * indent + str(t)
   1091                 else:
   1092                     buf += ' ' + str(t)
   1093                 newline = False
   1094             i += 1
   1095 
   1096         if buf:
   1097             result.append(strip_space(buf))
   1098 
   1099         return result, indent
   1100 
   1101     def write(self, out, indent):
   1102         """Dump the current block."""
   1103         # removeWhiteSpace() will sometimes creates non-directive blocks
   1104         # without any tokens. These come from blocks that only contained
   1105         # empty lines and spaces. They should not be printed in the final
   1106         # output, and then should not be counted for this operation.
   1107         #
   1108         if self.directive is None and not self.tokens:
   1109             return indent
   1110 
   1111         if self.directive:
   1112             out.write(str(self) + '\n')
   1113         else:
   1114             lines, indent = self.format_blocks(self.tokens, indent)
   1115             for line in lines:
   1116                 out.write(line + '\n')
   1117 
   1118         return indent
   1119 
   1120     def __repr__(self):
   1121         """Generate the representation of a given block."""
   1122         if self.directive:
   1123             result = "#%s " % self.directive
   1124             if self.isIf():
   1125                 result += repr(self.expr)
   1126             else:
   1127                 for tok in self.tokens:
   1128                     result += repr(tok)
   1129         else:
   1130             result = ""
   1131             for tok in self.tokens:
   1132                 result += repr(tok)
   1133 
   1134         return result
   1135 
   1136     def __str__(self):
   1137         """Generate the string representation of a given block."""
   1138         if self.directive:
   1139             # "#if"
   1140             if self.directive == "if":
   1141                 # small optimization to re-generate #ifdef and #ifndef
   1142                 e = self.expr.expr
   1143                 op = e[0]
   1144                 if op == "defined":
   1145                     result = "#ifdef %s" % e[1]
   1146                 elif op == "!" and e[1][0] == "defined":
   1147                     result = "#ifndef %s" % e[1][1]
   1148                 else:
   1149                     result = "#if " + str(self.expr)
   1150 
   1151             # "#define"
   1152             elif self.isDefine():
   1153                 result = "#%s %s" % (self.directive, self.define_id)
   1154                 if self.tokens:
   1155                     result += " "
   1156                 expr = strip_space(' '.join([tok.id for tok in self.tokens]))
   1157                 # remove the space between name and '(' in function call
   1158                 result += re.sub(r'(\w+) \(', r'\1(', expr)
   1159 
   1160             # "#error"
   1161             # Concatenating tokens with a space separator, because they may
   1162             # not be quoted and broken into several tokens
   1163             elif self.directive == "error":
   1164                 result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
   1165 
   1166             else:
   1167                 result = "#%s" % self.directive
   1168                 if self.tokens:
   1169                     result += " "
   1170                 result += ''.join([tok.id for tok in self.tokens])
   1171         else:
   1172             lines, _ = self.format_blocks(self.tokens)
   1173             result = '\n'.join(lines)
   1174 
   1175         return result
   1176 
   1177 
   1178 class BlockList(object):
   1179     """A convenience class used to hold and process a list of blocks.
   1180 
   1181     It calls the cpp parser to get the blocks.
   1182     """
   1183 
   1184     def __init__(self, blocks):
   1185         self.blocks = blocks
   1186 
   1187     def __len__(self):
   1188         return len(self.blocks)
   1189 
   1190     def __getitem__(self, n):
   1191         return self.blocks[n]
   1192 
   1193     def __repr__(self):
   1194         return repr(self.blocks)
   1195 
   1196     def __str__(self):
   1197         result = '\n'.join([str(b) for b in self.blocks])
   1198         return result
   1199 
   1200     def dump(self):
   1201         """Dump all the blocks in current BlockList."""
   1202         print '##### BEGIN #####'
   1203         for i, b in enumerate(self.blocks):
   1204             print '### BLOCK %d ###' % i
   1205             print b
   1206         print '##### END #####'
   1207 
   1208     def optimizeIf01(self):
   1209         """Remove the code between #if 0 .. #endif in a BlockList."""
   1210         self.blocks = optimize_if01(self.blocks)
   1211 
   1212     def optimizeMacros(self, macros):
   1213         """Remove known defined and undefined macros from a BlockList."""
   1214         for b in self.blocks:
   1215             if b.isIf():
   1216                 b.expr.optimize(macros)
   1217 
   1218     def removeMacroDefines(self, macros):
   1219         """Remove known macro definitions from a BlockList."""
   1220         self.blocks = remove_macro_defines(self.blocks, macros)
   1221 
   1222     def optimizeAll(self, macros):
   1223         self.optimizeMacros(macros)
   1224         self.optimizeIf01()
   1225         return
   1226 
   1227     def findIncludes(self):
   1228         """Return the list of included files in a BlockList."""
   1229         result = []
   1230         for b in self.blocks:
   1231             i = b.isInclude()
   1232             if i:
   1233                 result.append(i)
   1234         return result
   1235 
   1236     def write(self, out):
   1237         indent = 0
   1238         for b in self.blocks:
   1239             indent = b.write(out, indent)
   1240 
   1241     def removeVarsAndFuncs(self, knownStatics=None):
   1242         """Remove variable and function declarations.
   1243 
   1244         All extern and static declarations corresponding to variable and
   1245         function declarations are removed. We only accept typedefs and
   1246         enum/structs/union declarations.
   1247 
   1248         However, we keep the definitions corresponding to the set of known
   1249         static inline functions in the set 'knownStatics', which is useful
   1250         for optimized byteorder swap functions and stuff like that.
   1251         """
   1252 
   1253         # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
   1254         # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
   1255         # It could be problematic when we have function-like macros but without
   1256         # '}' following them. It will skip all the tokens/blocks until seeing a
   1257         # '}' as the function end. Fortunately we don't have such cases in the
   1258         # current kernel headers.
   1259 
   1260         # state = 0 => normal (i.e. LN + spaces)
   1261         # state = 1 => typedef/struct encountered, ends with ";"
   1262         # state = 2 => var declaration encountered, ends with ";"
   1263         # state = 3 => func declaration encountered, ends with "}"
   1264 
   1265         if knownStatics is None:
   1266             knownStatics = set()
   1267         state = 0
   1268         depth = 0
   1269         blocks2 = []
   1270         skipTokens = False
   1271         for b in self.blocks:
   1272             if b.isDirective():
   1273                 blocks2.append(b)
   1274             else:
   1275                 n = len(b.tokens)
   1276                 i = 0
   1277                 if skipTokens:
   1278                     first = n
   1279                 else:
   1280                     first = 0
   1281                 while i < n:
   1282                     tok = b.tokens[i]
   1283                     tokid = tok.id
   1284                     # If we are not looking for the start of a new
   1285                     # type/var/func, then skip over tokens until
   1286                     # we find our terminator, managing the depth of
   1287                     # accolades as we go.
   1288                     if state > 0:
   1289                         terminator = False
   1290                         if tokid == '{':
   1291                             depth += 1
   1292                         elif tokid == '}':
   1293                             if depth > 0:
   1294                                 depth -= 1
   1295                             if (depth == 0) and (state == 3):
   1296                                 terminator = True
   1297                         elif tokid == ';' and depth == 0:
   1298                             terminator = True
   1299 
   1300                         if terminator:
   1301                             # we found the terminator
   1302                             state = 0
   1303                             if skipTokens:
   1304                                 skipTokens = False
   1305                                 first = i + 1
   1306 
   1307                         i += 1
   1308                         continue
   1309 
   1310                     # Is it a new type definition, then start recording it
   1311                     if tok.id in ['struct', 'typedef', 'enum', 'union',
   1312                                   '__extension__']:
   1313                         state = 1
   1314                         i += 1
   1315                         continue
   1316 
   1317                     # Is it a variable or function definition. If so, first
   1318                     # try to determine which type it is, and also extract
   1319                     # its name.
   1320                     #
   1321                     # We're going to parse the next tokens of the same block
   1322                     # until we find a semi-column or a left parenthesis.
   1323                     #
   1324                     # The semi-column corresponds to a variable definition,
   1325                     # the left-parenthesis to a function definition.
   1326                     #
   1327                     # We also assume that the var/func name is the last
   1328                     # identifier before the terminator.
   1329                     #
   1330                     j = i + 1
   1331                     ident = ""
   1332                     while j < n:
   1333                         tokid = b.tokens[j].id
   1334                         if tokid == '(':  # a function declaration
   1335                             state = 3
   1336                             break
   1337                         elif tokid == ';':  # a variable declaration
   1338                             state = 2
   1339                             break
   1340                         if b.tokens[j].kind == TokenKind.IDENTIFIER:
   1341                             ident = b.tokens[j].id
   1342                         j += 1
   1343 
   1344                     if j >= n:
   1345                         # This can only happen when the declaration
   1346                         # does not end on the current block (e.g. with
   1347                         # a directive mixed inside it.
   1348                         #
   1349                         # We will treat it as malformed because
   1350                         # it's very hard to recover from this case
   1351                         # without making our parser much more
   1352                         # complex.
   1353                         #
   1354                         logging.debug("### skip unterminated static '%s'",
   1355                                       ident)
   1356                         break
   1357 
   1358                     if ident in knownStatics:
   1359                         logging.debug("### keep var/func '%s': %s", ident,
   1360                                       repr(b.tokens[i:j]))
   1361                     else:
   1362                         # We're going to skip the tokens for this declaration
   1363                         logging.debug("### skip var/func '%s': %s", ident,
   1364                                       repr(b.tokens[i:j]))
   1365                         if i > first:
   1366                             blocks2.append(Block(b.tokens[first:i]))
   1367                         skipTokens = True
   1368                         first = n
   1369 
   1370                     i += 1
   1371 
   1372                 if i > first:
   1373                     # print "### final '%s'" % repr(b.tokens[first:i])
   1374                     blocks2.append(Block(b.tokens[first:i]))
   1375 
   1376         self.blocks = blocks2
   1377 
   1378     def replaceTokens(self, replacements):
   1379         """Replace tokens according to the given dict."""
   1380         for b in self.blocks:
   1381             made_change = False
   1382             if b.isInclude() is None:
   1383                 for tok in b.tokens:
   1384                     if tok.kind == TokenKind.IDENTIFIER:
   1385                         if tok.id in replacements:
   1386                             tok.id = replacements[tok.id]
   1387                             made_change = True
   1388 
   1389                 if b.isDefine() and b.define_id in replacements:
   1390                     b.define_id = replacements[b.define_id]
   1391                     made_change = True
   1392 
   1393             if made_change and b.isIf():
   1394                 # Keep 'expr' in sync with 'tokens'.
   1395                 b.expr = CppExpr(b.tokens)
   1396 
   1397 
   1398 def strip_space(s):
   1399     """Strip out redundant space in a given string."""
   1400 
   1401     # NOTE: It ought to be more clever to not destroy spaces in string tokens.
   1402     replacements = {' . ': '.',
   1403                     ' [': '[',
   1404                     '[ ': '[',
   1405                     ' ]': ']',
   1406                     '( ': '(',
   1407                     ' )': ')',
   1408                     ' ,': ',',
   1409                     '# ': '#',
   1410                     ' ;': ';',
   1411                     '~ ': '~',
   1412                     ' -> ': '->'}
   1413     result = s
   1414     for r in replacements:
   1415         result = result.replace(r, replacements[r])
   1416 
   1417     # Remove the space between function name and the parenthesis.
   1418     result = re.sub(r'(\w+) \(', r'\1(', result)
   1419     return result
   1420 
   1421 
   1422 class BlockParser(object):
   1423     """A class that converts an input source file into a BlockList object."""
   1424 
   1425     def __init__(self, tokzer=None):
   1426         """Initialize a block parser.
   1427 
   1428         The input source is provided through a Tokenizer object.
   1429         """
   1430         self._tokzer = tokzer
   1431         self._parsed = False
   1432 
   1433     @property
   1434     def parsed(self):
   1435         return self._parsed
   1436 
   1437     @staticmethod
   1438     def _short_extent(extent):
   1439         return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
   1440                                   extent.end.line, extent.end.column)
   1441 
   1442     def getBlocks(self, tokzer=None):
   1443         """Return all the blocks parsed."""
   1444 
   1445         def consume_extent(i, tokens, extent=None, detect_change=False):
   1446             """Return tokens that belong to the given extent.
   1447 
   1448             It parses all the tokens that follow tokens[i], until getting out
   1449             of the extent. When detect_change is True, it may terminate early
   1450             when detecting preprocessing directives inside the extent.
   1451             """
   1452 
   1453             result = []
   1454             if extent is None:
   1455                 extent = tokens[i].cursor.extent
   1456 
   1457             while i < len(tokens) and tokens[i].location in extent:
   1458                 t = tokens[i]
   1459                 if debugBlockParser:
   1460                     print ' ' * 2, t.id, t.kind, t.cursor.kind
   1461                 if (detect_change and t.cursor.extent != extent and
   1462                     t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
   1463                     break
   1464                 result.append(t)
   1465                 i += 1
   1466             return (i, result)
   1467 
   1468         def consume_line(i, tokens):
   1469             """Return tokens that follow tokens[i] in the same line."""
   1470             result = []
   1471             line = tokens[i].location.line
   1472             while i < len(tokens) and tokens[i].location.line == line:
   1473                 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
   1474                     break
   1475                 result.append(tokens[i])
   1476                 i += 1
   1477             return (i, result)
   1478 
   1479         if tokzer is None:
   1480             tokzer = self._tokzer
   1481         tokens = tokzer.tokens
   1482 
   1483         blocks = []
   1484         buf = []
   1485         i = 0
   1486 
   1487         while i < len(tokens):
   1488             t = tokens[i]
   1489             cursor = t.cursor
   1490 
   1491             if debugBlockParser:
   1492                 print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
   1493                        "extent=[%s]" % (t.location.line, t.spelling, t.kind,
   1494                                         cursor.kind,
   1495                                         self._short_extent(cursor.extent)))
   1496 
   1497             if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
   1498                 if buf:
   1499                     blocks.append(Block(buf))
   1500                     buf = []
   1501 
   1502                 j = i
   1503                 if j + 1 >= len(tokens):
   1504                     raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
   1505                 directive = tokens[j+1].id
   1506 
   1507                 if directive == 'define':
   1508                     if i+2 >= len(tokens):
   1509                         raise BadExpectedToken("### BAD TOKEN at %s" %
   1510                                                (tokens[i].location))
   1511 
   1512                     # Skip '#' and 'define'.
   1513                     extent = tokens[i].cursor.extent
   1514                     i += 2
   1515                     id = ''
   1516                     # We need to separate the id from the remaining of
   1517                     # the line, especially for the function-like macro.
   1518                     if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
   1519                         (tokens[i].location.column + len(tokens[i].spelling) ==
   1520                          tokens[i+1].location.column)):
   1521                         while i < len(tokens):
   1522                             id += tokens[i].id
   1523                             if tokens[i].spelling == ')':
   1524                                 i += 1
   1525                                 break
   1526                             i += 1
   1527                     else:
   1528                         id += tokens[i].id
   1529                         # Advance to the next token that follows the macro id
   1530                         i += 1
   1531 
   1532                     (i, ret) = consume_extent(i, tokens, extent=extent)
   1533                     blocks.append(Block(ret, directive=directive,
   1534                                         lineno=t.location.line, identifier=id))
   1535 
   1536                 else:
   1537                     (i, ret) = consume_extent(i, tokens)
   1538                     blocks.append(Block(ret[2:], directive=directive,
   1539                                         lineno=t.location.line))
   1540 
   1541             elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
   1542                 if buf:
   1543                     blocks.append(Block(buf))
   1544                     buf = []
   1545                 directive = tokens[i+1].id
   1546                 (i, ret) = consume_extent(i, tokens)
   1547 
   1548                 blocks.append(Block(ret[2:], directive=directive,
   1549                                     lineno=t.location.line))
   1550 
   1551             elif cursor.kind == CursorKind.VAR_DECL:
   1552                 if buf:
   1553                     blocks.append(Block(buf))
   1554                     buf = []
   1555 
   1556                 (i, ret) = consume_extent(i, tokens, detect_change=True)
   1557                 buf += ret
   1558 
   1559             elif cursor.kind == CursorKind.FUNCTION_DECL:
   1560                 if buf:
   1561                     blocks.append(Block(buf))
   1562                     buf = []
   1563 
   1564                 (i, ret) = consume_extent(i, tokens, detect_change=True)
   1565                 buf += ret
   1566 
   1567             else:
   1568                 (i, ret) = consume_line(i, tokens)
   1569                 buf += ret
   1570 
   1571         if buf:
   1572             blocks.append(Block(buf))
   1573 
   1574         # _parsed=True indicates a successful parsing, although may result an
   1575         # empty BlockList.
   1576         self._parsed = True
   1577 
   1578         return BlockList(blocks)
   1579 
   1580     def parse(self, tokzer):
   1581         return self.getBlocks(tokzer)
   1582 
   1583     def parseFile(self, path):
   1584         return self.getBlocks(CppFileTokenizer(path))
   1585 
   1586 
   1587 def test_block_parsing(lines, expected):
   1588     """Helper method to test the correctness of BlockParser.parse."""
   1589     blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
   1590     if len(blocks) != len(expected):
   1591         raise BadExpectedToken("BlockParser.parse() returned '%s' expecting "
   1592                                "'%s'" % (str(blocks), repr(expected)))
   1593     for n in range(len(blocks)):
   1594         if str(blocks[n]) != expected[n]:
   1595             raise BadExpectedToken("BlockParser.parse()[%d] is '%s', "
   1596                                    "expecting '%s'" % (n, str(blocks[n]),
   1597                                                        expected[n]))
   1598 
   1599 
   1600 def test_BlockParser():
   1601     test_block_parsing(["#error hello"], ["#error hello"])
   1602     test_block_parsing(["foo", "", "bar"], ["foo bar"])
   1603 
   1604     # We currently cannot handle the following case with libclang properly.
   1605     # Fortunately it doesn't appear in current headers.
   1606     # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"])
   1607 
   1608     test_block_parsing(["foo",
   1609                         "  #  /* ahah */ if defined(__KERNEL__) /* more */",
   1610                         "bar", "#endif"],
   1611                        ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
   1612 
   1613 
   1614 ################################################################################
   1615 ################################################################################
   1616 #####                                                                      #####
   1617 #####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
   1618 #####                                                                      #####
   1619 ################################################################################
   1620 ################################################################################
   1621 
   1622 
   1623 def remove_macro_defines(blocks, excludedMacros=None):
   1624     """Remove macro definitions like #define <macroName>  ...."""
   1625     if excludedMacros is None:
   1626         excludedMacros = set()
   1627     result = []
   1628     for b in blocks:
   1629         macroName = b.isDefine()
   1630         if macroName is None or macroName not in excludedMacros:
   1631             result.append(b)
   1632 
   1633     return result
   1634 
   1635 
   1636 def find_matching_endif(blocks, i):
   1637     """Traverse the blocks to find out the matching #endif."""
   1638     n = len(blocks)
   1639     depth = 1
   1640     while i < n:
   1641         if blocks[i].isDirective():
   1642             dir_ = blocks[i].directive
   1643             if dir_ in ["if", "ifndef", "ifdef"]:
   1644                 depth += 1
   1645             elif depth == 1 and dir_ in ["else", "elif"]:
   1646                 return i
   1647             elif dir_ == "endif":
   1648                 depth -= 1
   1649                 if depth == 0:
   1650                     return i
   1651         i += 1
   1652     return i
   1653 
   1654 
   1655 def optimize_if01(blocks):
   1656     """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
   1657     i = 0
   1658     n = len(blocks)
   1659     result = []
   1660     while i < n:
   1661         j = i
   1662         while j < n and not blocks[j].isIf():
   1663             j += 1
   1664         if j > i:
   1665             logging.debug("appending lines %d to %d", blocks[i].lineno,
   1666                           blocks[j-1].lineno)
   1667             result += blocks[i:j]
   1668         if j >= n:
   1669             break
   1670         expr = blocks[j].expr
   1671         r = expr.toInt()
   1672         if r is None:
   1673             result.append(blocks[j])
   1674             i = j + 1
   1675             continue
   1676 
   1677         if r == 0:
   1678             # if 0 => skip everything until the corresponding #endif
   1679             j = find_matching_endif(blocks, j + 1)
   1680             if j >= n:
   1681                 # unterminated #if 0, finish here
   1682                 break
   1683             dir_ = blocks[j].directive
   1684             if dir_ == "endif":
   1685                 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
   1686                               blocks[i].lineno, blocks[j].lineno)
   1687                 i = j + 1
   1688             elif dir_ == "else":
   1689                 # convert 'else' into 'if 1'
   1690                 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
   1691                               "to %d)", blocks[i].lineno, blocks[j-1].lineno)
   1692                 blocks[j].directive = "if"
   1693                 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
   1694                 i = j
   1695             elif dir_ == "elif":
   1696                 # convert 'elif' into 'if'
   1697                 logging.debug("convert 'if 0' .. 'elif' into 'if'")
   1698                 blocks[j].directive = "if"
   1699                 i = j
   1700             continue
   1701 
   1702         # if 1 => find corresponding endif and remove/transform them
   1703         k = find_matching_endif(blocks, j + 1)
   1704         if k >= n:
   1705             # unterminated #if 1, finish here
   1706             logging.debug("unterminated 'if 1'")
   1707             result += blocks[j+1:k]
   1708             break
   1709 
   1710         dir_ = blocks[k].directive
   1711         if dir_ == "endif":
   1712             logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
   1713                           blocks[j].lineno, blocks[k].lineno)
   1714             result += optimize_if01(blocks[j+1:k])
   1715             i = k + 1
   1716         elif dir_ == "else":
   1717             # convert 'else' into 'if 0'
   1718             logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
   1719                           blocks[j].lineno, blocks[k].lineno)
   1720             result += optimize_if01(blocks[j+1:k])
   1721             blocks[k].directive = "if"
   1722             blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
   1723             i = k
   1724         elif dir_ == "elif":
   1725             # convert 'elif' into 'if 0'
   1726             logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
   1727                           blocks[j].lineno, blocks[k].lineno)
   1728             result += optimize_if01(blocks[j+1:k])
   1729             blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
   1730             i = k
   1731     return result
   1732 
   1733 
   1734 def test_optimizeAll():
   1735     text = """\
   1736 #if 1
   1737 #define  GOOD_1
   1738 #endif
   1739 #if 0
   1740 #define  BAD_2
   1741 #define  BAD_3
   1742 #endif
   1743 
   1744 #if 1
   1745 #define  GOOD_2
   1746 #else
   1747 #define  BAD_4
   1748 #endif
   1749 
   1750 #if 0
   1751 #define  BAD_5
   1752 #else
   1753 #define  GOOD_3
   1754 #endif
   1755 
   1756 #if defined(__KERNEL__)
   1757 #define BAD_KERNEL
   1758 #endif
   1759 
   1760 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
   1761 #define X
   1762 #endif
   1763 
   1764 #ifndef SIGRTMAX
   1765 #define SIGRTMAX 123
   1766 #endif /* SIGRTMAX */
   1767 
   1768 #if 0
   1769 #if 1
   1770 #define  BAD_6
   1771 #endif
   1772 #endif\
   1773 """
   1774 
   1775     expected = """\
   1776 #define GOOD_1
   1777 #define GOOD_2
   1778 #define GOOD_3
   1779 #if !defined(__GLIBC__) || __GLIBC__ < 2
   1780 #define X
   1781 #endif
   1782 #ifndef __SIGRTMAX
   1783 #define __SIGRTMAX 123
   1784 #endif
   1785 """
   1786 
   1787     out = utils.StringOutput()
   1788     blocks = BlockParser().parse(CppStringTokenizer(text))
   1789     blocks.replaceTokens(kernel_token_replacements)
   1790     blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro})
   1791     blocks.write(out)
   1792     if out.get() != expected:
   1793         print "[FAIL]: macro optimization failed\n"
   1794         print "<<<< expecting '",
   1795         print expected,
   1796         print "'\n>>>> result '",
   1797         print out.get(),
   1798         print "'\n----"
   1799         global failure_count
   1800         failure_count += 1
   1801 
   1802 
   1803 def runUnitTests():
   1804     """Always run all unit tests for this program."""
   1805     test_CppTokenizer()
   1806     test_CppExpr()
   1807     test_optimizeAll()
   1808     test_BlockParser()
   1809 
   1810 
   1811 failure_count = 0
   1812 runUnitTests()
   1813 if failure_count != 0:
   1814     utils.panic("Unit tests failed in cpp.py.\n")
   1815