Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 """A glorified C pre-processor parser."""
      3 
      4 import ctypes
      5 import logging
      6 import os
      7 import re
      8 import site
      9 import utils
     10 
     11 top = os.getenv('ANDROID_BUILD_TOP')
     12 if top is None:
     13     utils.panic('ANDROID_BUILD_TOP not set.\n')
     14 
     15 # Set up the env vars for libclang.
     16 site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))
     17 
     18 import clang.cindex
     19 from clang.cindex import conf
     20 from clang.cindex import Cursor
     21 from clang.cindex import CursorKind
     22 from clang.cindex import SourceLocation
     23 from clang.cindex import SourceRange
     24 from clang.cindex import TokenGroup
     25 from clang.cindex import TokenKind
     26 from clang.cindex import TranslationUnit
     27 
     28 # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
     29 # Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
     30 clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64'))
     31 
     32 from defaults import kCppUndefinedMacro
     33 from defaults import kernel_remove_config_macros
     34 from defaults import kernel_token_replacements
     35 
     36 
     37 debugBlockParser = False
     38 debugCppExpr = False
     39 debugOptimIf01 = False
     40 
     41 ###############################################################################
     42 ###############################################################################
     43 #####                                                                     #####
     44 #####           C P P   T O K E N S                                       #####
     45 #####                                                                     #####
     46 ###############################################################################
     47 ###############################################################################
     48 
     49 # the list of supported C-preprocessor tokens
     50 # plus a couple of C tokens as well
     51 tokEOF = "\0"
     52 tokLN = "\n"
     53 tokSTRINGIFY = "#"
     54 tokCONCAT = "##"
     55 tokLOGICAND = "&&"
     56 tokLOGICOR = "||"
     57 tokSHL = "<<"
     58 tokSHR = ">>"
     59 tokEQUAL = "=="
     60 tokNEQUAL = "!="
     61 tokLT = "<"
     62 tokLTE = "<="
     63 tokGT = ">"
     64 tokGTE = ">="
     65 tokELLIPSIS = "..."
     66 tokSPACE = " "
     67 tokDEFINED = "defined"
     68 tokLPAREN = "("
     69 tokRPAREN = ")"
     70 tokNOT = "!"
     71 tokPLUS = "+"
     72 tokMINUS = "-"
     73 tokMULTIPLY = "*"
     74 tokDIVIDE = "/"
     75 tokMODULUS = "%"
     76 tokBINAND = "&"
     77 tokBINOR = "|"
     78 tokBINXOR = "^"
     79 tokCOMMA = ","
     80 tokLBRACE = "{"
     81 tokRBRACE = "}"
     82 tokARROW = "->"
     83 tokINCREMENT = "++"
     84 tokDECREMENT = "--"
     85 tokNUMBER = "<number>"
     86 tokIDENT = "<ident>"
     87 tokSTRING = "<string>"
     88 
     89 
     90 class Token(clang.cindex.Token):
     91     """A class that represents one token after parsing.
     92 
     93     It inherits the class in libclang, with an extra id property to hold the
     94     new spelling of the token. The spelling property in the base class is
     95     defined as read-only. New names after macro instantiation are saved in
     96     their ids now. It also facilitates the renaming of directive optimizations
     97     like replacing 'ifndef X' with 'if !defined(X)'.
     98 
     99     It also overrides the cursor property of the base class. Because the one
    100     in libclang always queries based on a single token, which usually doesn't
    101     hold useful information. The cursor in this class can be set by calling
    102     CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
    103     base class.
    104     """
    105 
    106     def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
    107                  cursor=None):
    108         clang.cindex.Token.__init__(self)
    109         self._id = None
    110         self._tu = tu
    111         self._group = group
    112         self._cursor = cursor
    113         # self.int_data and self.ptr_data are from the base class. But
    114         # self.int_data doesn't accept a None value.
    115         if int_data is not None:
    116             self.int_data = int_data
    117         self.ptr_data = ptr_data
    118 
    119     @property
    120     def id(self):
    121         """Name of the token."""
    122         if self._id is None:
    123             return self.spelling
    124         else:
    125             return self._id
    126 
    127     @id.setter
    128     def id(self, new_id):
    129         """Setting name of the token."""
    130         self._id = new_id
    131 
    132     @property
    133     def cursor(self):
    134         if self._cursor is None:
    135             self._cursor = clang.cindex.Token.cursor
    136         return self._cursor
    137 
    138     @cursor.setter
    139     def cursor(self, new_cursor):
    140         self._cursor = new_cursor
    141 
    142     def __repr__(self):
    143         if self.id == 'defined':
    144             return self.id
    145         elif self.kind == TokenKind.IDENTIFIER:
    146             return "(ident %s)" % self.id
    147 
    148         return self.id
    149 
    150     def __str__(self):
    151         return self.id
    152 
    153 
    154 class BadExpectedToken(Exception):
    155     """An exception that will be raised for unexpected tokens."""
    156     pass
    157 
    158 
    159 # The __contains__ function in libclang SourceRange class contains a bug. It
    160 # gives wrong result when dealing with single line range.
    161 # Bug filed with upstream:
    162 # http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
    163 def SourceRange__contains__(self, other):
    164     """Determine if a given location is inside the range."""
    165     if not isinstance(other, SourceLocation):
    166         return False
    167     if other.file is None and self.start.file is None:
    168         pass
    169     elif (self.start.file.name != other.file.name or
    170           other.file.name != self.end.file.name):
    171         # same file name
    172         return False
    173     # same file, in between lines
    174     if self.start.line < other.line < self.end.line:
    175         return True
    176     # same file, same line
    177     elif self.start.line == other.line == self.end.line:
    178         if self.start.column <= other.column <= self.end.column:
    179             return True
    180     elif self.start.line == other.line:
    181         # same file first line
    182         if self.start.column <= other.column:
    183             return True
    184     elif other.line == self.end.line:
    185         # same file last line
    186         if other.column <= self.end.column:
    187             return True
    188     return False
    189 
    190 
    191 SourceRange.__contains__ = SourceRange__contains__
    192 
    193 
    194 ################################################################################
    195 ################################################################################
    196 #####                                                                      #####
    197 #####           C P P   T O K E N I Z E R                                  #####
    198 #####                                                                      #####
    199 ################################################################################
    200 ################################################################################
    201 
    202 
    203 class CppTokenizer(object):
    204     """A tokenizer that converts some input text into a list of tokens.
    205 
    206     It calls libclang's tokenizer to get the parsed tokens. In addition, it
    207     updates the cursor property in each token after parsing, by calling
    208     getTokensWithCursors().
    209     """
    210 
    211     clang_flags = ['-E', '-x', 'c']
    212     options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
    213 
    214     def __init__(self):
    215         """Initialize a new CppTokenizer object."""
    216         self._indexer = clang.cindex.Index.create()
    217         self._tu = None
    218         self._index = 0
    219         self.tokens = None
    220 
    221     def _getTokensWithCursors(self):
    222         """Helper method to return all tokens with their cursors.
    223 
    224         The cursor property in a clang Token doesn't provide enough
    225         information. Because it is queried based on single token each time
    226         without any context, i.e. via calling conf.lib.clang_annotateTokens()
    227         with only one token given. So we often see 'INVALID_FILE' in one
    228         token's cursor. In this function it passes all the available tokens
    229         to get more informative cursors.
    230         """
    231 
    232         tokens_memory = ctypes.POINTER(clang.cindex.Token)()
    233         tokens_count = ctypes.c_uint()
    234 
    235         conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
    236                                 ctypes.byref(tokens_memory),
    237                                 ctypes.byref(tokens_count))
    238 
    239         count = int(tokens_count.value)
    240 
    241         # If we get no tokens, no memory was allocated. Be sure not to return
    242         # anything and potentially call a destructor on nothing.
    243         if count < 1:
    244             return
    245 
    246         cursors = (Cursor * count)()
    247         cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))
    248 
    249         conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
    250                                       cursors_memory)
    251 
    252         tokens_array = ctypes.cast(
    253             tokens_memory,
    254             ctypes.POINTER(clang.cindex.Token * count)).contents
    255         token_group = TokenGroup(self._tu, tokens_memory, tokens_count)
    256 
    257         tokens = []
    258         for i in xrange(0, count):
    259             token = Token(self._tu, token_group,
    260                           int_data=tokens_array[i].int_data,
    261                           ptr_data=tokens_array[i].ptr_data,
    262                           cursor=cursors[i])
    263             # We only want non-comment tokens.
    264             if token.kind != TokenKind.COMMENT:
    265                 tokens.append(token)
    266 
    267         return tokens
    268 
    269     def parseString(self, lines):
    270         """Parse a list of text lines into a BlockList object."""
    271         file_ = 'dummy.c'
    272         self._tu = self._indexer.parse(file_, self.clang_flags,
    273                                        unsaved_files=[(file_, lines)],
    274                                        options=self.options)
    275         self.tokens = self._getTokensWithCursors()
    276 
    277     def parseFile(self, file_):
    278         """Parse a file into a BlockList object."""
    279         self._tu = self._indexer.parse(file_, self.clang_flags,
    280                                        options=self.options)
    281         self.tokens = self._getTokensWithCursors()
    282 
    283     def nextToken(self):
    284         """Return next token from the list."""
    285         if self._index < len(self.tokens):
    286             t = self.tokens[self._index]
    287             self._index += 1
    288             return t
    289         else:
    290             return None
    291 
    292 
    293 class CppStringTokenizer(CppTokenizer):
    294     """A CppTokenizer derived class that accepts a string of text as input."""
    295 
    296     def __init__(self, line):
    297         CppTokenizer.__init__(self)
    298         self.parseString(line)
    299 
    300 
    301 class CppFileTokenizer(CppTokenizer):
    302     """A CppTokenizer derived class that accepts a file as input."""
    303 
    304     def __init__(self, file_):
    305         CppTokenizer.__init__(self)
    306         self.parseFile(file_)
    307 
    308 
    309 # Unit testing
    310 #
    311 class CppTokenizerTester(object):
    312     """A class used to test CppTokenizer classes."""
    313 
    314     def __init__(self, tokenizer=None):
    315         self._tokenizer = tokenizer
    316         self._token = None
    317 
    318     def setTokenizer(self, tokenizer):
    319         self._tokenizer = tokenizer
    320 
    321     def expect(self, id):
    322         self._token = self._tokenizer.nextToken()
    323         if self._token is None:
    324             tokid = ''
    325         else:
    326             tokid = self._token.id
    327         if tokid == id:
    328             return
    329         raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % (
    330             tokid, id))
    331 
    332     def expectToken(self, id, line, col):
    333         self.expect(id)
    334         if self._token.location.line != line:
    335             raise BadExpectedToken(
    336                 "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (
    337                     id, self._token.lineno, line))
    338         if self._token.location.column != col:
    339             raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % (
    340                 self._token.colno, col))
    341 
    342     def expectTokens(self, tokens):
    343         for id, line, col in tokens:
    344             self.expectToken(id, line, col)
    345 
    346     def expectList(self, list_):
    347         for item in list_:
    348             self.expect(item)
    349 
    350 
    351 def test_CppTokenizer():
    352     tester = CppTokenizerTester()
    353 
    354     tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)"))
    355     tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
    356                        "01923_xy", tokRPAREN])
    357 
    358     tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)"))
    359     tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
    360                        "defined", tokLPAREN, "BAZ", tokRPAREN])
    361 
    362     tester.setTokenizer(CppStringTokenizer("/*\n#\n*/"))
    363     tester.expectList([])
    364 
    365     tester.setTokenizer(CppStringTokenizer("first\nsecond"))
    366     tester.expectList(["first", "second"])
    367 
    368     tester.setTokenizer(CppStringTokenizer("first second\n  third"))
    369     tester.expectTokens([("first", 1, 1),
    370                          ("second", 1, 7),
    371                          ("third", 2, 3)])
    372 
    373     tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */"))
    374     tester.expectTokens([("boo", 1, 1)])
    375 
    376     tester.setTokenizer(CppStringTokenizer("an \\\n example"))
    377     tester.expectTokens([("an", 1, 1),
    378                          ("example", 2, 2)])
    379     return True
    380 
    381 
    382 ################################################################################
    383 ################################################################################
    384 #####                                                                      #####
    385 #####           C P P   E X P R E S S I O N S                              #####
    386 #####                                                                      #####
    387 ################################################################################
    388 ################################################################################
    389 
    390 
    391 class CppExpr(object):
    392     """A class that models the condition of #if directives into an expr tree.
    393 
    394     Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
    395     "op" is a string describing the operation
    396     """
    397 
    398     unaries = ["!", "~"]
    399     binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
    400                 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
    401     precedences = {
    402         "?": 1, ":": 1,
    403         "||": 2,
    404         "&&": 3,
    405         "|": 4,
    406         "^": 5,
    407         "&": 6,
    408         "==": 7, "!=": 7,
    409         "<": 8, "<=": 8, ">": 8, ">=": 8,
    410         "<<": 9, ">>": 9,
    411         "+": 10, "-": 10,
    412         "*": 11, "/": 11, "%": 11,
    413         "!": 12, "~": 12
    414     }
    415 
    416     def __init__(self, tokens):
    417         """Initialize a CppExpr. 'tokens' must be a CppToken list."""
    418         self.tokens = tokens
    419         self._num_tokens = len(tokens)
    420         self._index = 0
    421 
    422         if debugCppExpr:
    423             print "CppExpr: trying to parse %s" % repr(tokens)
    424         self.expr = self.parseExpression(0)
    425         if debugCppExpr:
    426             print "CppExpr: got " + repr(self.expr)
    427         if self._index != self._num_tokens:
    428             self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
    429                        % (self._index, self._num_tokens, repr(tokens)))
    430 
    431     def throw(self, exception, msg):
    432         if self._index < self._num_tokens:
    433             tok = self.tokens[self._index]
    434             print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
    435         else:
    436             print "EOF: %s" % msg
    437         raise exception(msg)
    438 
    439     def expectId(self, id):
    440         """Check that a given token id is at the current position."""
    441         token = self.tokens[self._index]
    442         if self._index >= self._num_tokens or token.id != id:
    443             self.throw(BadExpectedToken,
    444                        "### expecting '%s' in expression, got '%s'" % (
    445                            id, token.id))
    446         self._index += 1
    447 
    448     def is_decimal(self):
    449         token = self.tokens[self._index].id
    450         if token[-1] in "ULul":
    451             token = token[:-1]
    452         try:
    453             val = int(token, 10)
    454             self._index += 1
    455             return ('int', val)
    456         except ValueError:
    457             return None
    458 
    459     def is_octal(self):
    460         token = self.tokens[self._index].id
    461         if token[-1] in "ULul":
    462             token = token[:-1]
    463         if len(token) < 2 or token[0] != '0':
    464             return None
    465         try:
    466             val = int(token, 8)
    467             self._index += 1
    468             return ('oct', val)
    469         except ValueError:
    470             return None
    471 
    472     def is_hexadecimal(self):
    473         token = self.tokens[self._index].id
    474         if token[-1] in "ULul":
    475             token = token[:-1]
    476         if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
    477             return None
    478         try:
    479             val = int(token, 16)
    480             self._index += 1
    481             return ('hex', val)
    482         except ValueError:
    483             return None
    484 
    485     def is_integer(self):
    486         if self.tokens[self._index].kind != TokenKind.LITERAL:
    487             return None
    488 
    489         c = self.is_hexadecimal()
    490         if c:
    491             return c
    492 
    493         c = self.is_octal()
    494         if c:
    495             return c
    496 
    497         c = self.is_decimal()
    498         if c:
    499             return c
    500 
    501         return None
    502 
    503     def is_number(self):
    504         t = self.tokens[self._index]
    505         if t.id == tokMINUS and self._index + 1 < self._num_tokens:
    506             self._index += 1
    507             c = self.is_integer()
    508             if c:
    509                 op, val = c
    510                 return (op, -val)
    511         if t.id == tokPLUS and self._index + 1 < self._num_tokens:
    512             self._index += 1
    513             c = self.is_integer()
    514             if c:
    515                 return c
    516 
    517         return self.is_integer()
    518 
    519     def is_defined(self):
    520         t = self.tokens[self._index]
    521         if t.id != tokDEFINED:
    522             return None
    523 
    524         # We have the defined keyword, check the rest.
    525         self._index += 1
    526         used_parens = False
    527         if (self._index < self._num_tokens and
    528             self.tokens[self._index].id == tokLPAREN):
    529             used_parens = True
    530             self._index += 1
    531 
    532         if self._index >= self._num_tokens:
    533             self.throw(BadExpectedToken,
    534                        "### 'defined' must be followed by macro name or left "
    535                        "paren")
    536 
    537         t = self.tokens[self._index]
    538         if t.kind != TokenKind.IDENTIFIER:
    539             self.throw(BadExpectedToken,
    540                        "### 'defined' must be followed by macro name")
    541 
    542         self._index += 1
    543         if used_parens:
    544             self.expectId(tokRPAREN)
    545 
    546         return ("defined", t.id)
    547 
    548     def is_call_or_ident(self):
    549         if self._index >= self._num_tokens:
    550             return None
    551 
    552         t = self.tokens[self._index]
    553         if t.kind != TokenKind.IDENTIFIER:
    554             return None
    555 
    556         name = t.id
    557 
    558         self._index += 1
    559         if (self._index >= self._num_tokens or
    560             self.tokens[self._index].id != tokLPAREN):
    561             return ("ident", name)
    562 
    563         params = []
    564         depth = 1
    565         self._index += 1
    566         j = self._index
    567         while self._index < self._num_tokens:
    568             id = self.tokens[self._index].id
    569             if id == tokLPAREN:
    570                 depth += 1
    571             elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
    572                 k = self._index
    573                 param = self.tokens[j:k]
    574                 params.append(param)
    575                 if id == tokRPAREN:
    576                     break
    577                 j = self._index + 1
    578             elif id == tokRPAREN:
    579                 depth -= 1
    580             self._index += 1
    581 
    582         if self._index >= self._num_tokens:
    583             return None
    584 
    585         self._index += 1
    586         return ("call", (name, params))
    587 
    588     # Implements the "precedence climbing" algorithm from
    589     # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
    590     # The "classic" algorithm would be fine if we were using a tool to
    591     # generate the parser, but we're not. Dijkstra's "shunting yard"
    592     # algorithm hasn't been necessary yet.
    593 
    594     def parseExpression(self, minPrecedence):
    595         if self._index >= self._num_tokens:
    596             return None
    597 
    598         node = self.parsePrimary()
    599         while (self.token() and self.isBinary(self.token()) and
    600                self.precedence(self.token()) >= minPrecedence):
    601             op = self.token()
    602             self.nextToken()
    603             rhs = self.parseExpression(self.precedence(op) + 1)
    604             node = (op.id, node, rhs)
    605 
    606         return node
    607 
    608     def parsePrimary(self):
    609         op = self.token()
    610         if self.isUnary(op):
    611             self.nextToken()
    612             return (op.id, self.parseExpression(self.precedence(op)))
    613 
    614         primary = None
    615         if op.id == tokLPAREN:
    616             self.nextToken()
    617             primary = self.parseExpression(0)
    618             self.expectId(tokRPAREN)
    619         elif op.id == "?":
    620             self.nextToken()
    621             primary = self.parseExpression(0)
    622             self.expectId(":")
    623         elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
    624             primary = self.is_number()
    625         # Checking for 'defined' needs to come first now because 'defined' is
    626         # recognized as IDENTIFIER.
    627         elif op.id == tokDEFINED:
    628             primary = self.is_defined()
    629         elif op.kind == TokenKind.IDENTIFIER:
    630             primary = self.is_call_or_ident()
    631         else:
    632             self.throw(BadExpectedToken,
    633                        "didn't expect to see a %s in factor" % (
    634                            self.tokens[self._index].id))
    635         return primary
    636 
    637     def isBinary(self, token):
    638         return token.id in self.binaries
    639 
    640     def isUnary(self, token):
    641         return token.id in self.unaries
    642 
    643     def precedence(self, token):
    644         return self.precedences.get(token.id)
    645 
    646     def token(self):
    647         if self._index >= self._num_tokens:
    648             return None
    649         return self.tokens[self._index]
    650 
    651     def nextToken(self):
    652         self._index += 1
    653         if self._index >= self._num_tokens:
    654             return None
    655         return self.tokens[self._index]
    656 
    657     def dump_node(self, e):
    658         op = e[0]
    659         line = "(" + op
    660         if op == "int":
    661             line += " %d)" % e[1]
    662         elif op == "oct":
    663             line += " 0%o)" % e[1]
    664         elif op == "hex":
    665             line += " 0x%x)" % e[1]
    666         elif op == "ident":
    667             line += " %s)" % e[1]
    668         elif op == "defined":
    669             line += " %s)" % e[1]
    670         elif op == "call":
    671             arg = e[1]
    672             line += " %s [" % arg[0]
    673             prefix = ""
    674             for param in arg[1]:
    675                 par = ""
    676                 for tok in param:
    677                     par += str(tok)
    678                 line += "%s%s" % (prefix, par)
    679                 prefix = ","
    680             line += "])"
    681         elif op in CppExpr.unaries:
    682             line += " %s)" % self.dump_node(e[1])
    683         elif op in CppExpr.binaries:
    684             line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
    685         else:
    686             line += " ?%s)" % repr(e[1])
    687 
    688         return line
    689 
    690     def __repr__(self):
    691         return self.dump_node(self.expr)
    692 
    693     def source_node(self, e):
    694         op = e[0]
    695         if op == "int":
    696             return "%d" % e[1]
    697         if op == "hex":
    698             return "0x%x" % e[1]
    699         if op == "oct":
    700             return "0%o" % e[1]
    701         if op == "ident":
    702             # XXX: should try to expand
    703             return e[1]
    704         if op == "defined":
    705             return "defined(%s)" % e[1]
    706 
    707         prec = CppExpr.precedences.get(op, 1000)
    708         arg = e[1]
    709         if op in CppExpr.unaries:
    710             arg_src = self.source_node(arg)
    711             arg_op = arg[0]
    712             arg_prec = CppExpr.precedences.get(arg_op, 1000)
    713             if arg_prec < prec:
    714                 return "!(" + arg_src + ")"
    715             else:
    716                 return "!" + arg_src
    717         if op in CppExpr.binaries:
    718             arg2 = e[2]
    719             arg1_op = arg[0]
    720             arg2_op = arg2[0]
    721             arg1_src = self.source_node(arg)
    722             arg2_src = self.source_node(arg2)
    723             if CppExpr.precedences.get(arg1_op, 1000) < prec:
    724                 arg1_src = "(%s)" % arg1_src
    725             if CppExpr.precedences.get(arg2_op, 1000) < prec:
    726                 arg2_src = "(%s)" % arg2_src
    727 
    728             return "%s %s %s" % (arg1_src, op, arg2_src)
    729         return "???"
    730 
    731     def __str__(self):
    732         return self.source_node(self.expr)
    733 
    734     @staticmethod
    735     def int_node(e):
    736         if e[0] in ["int", "oct", "hex"]:
    737             return e[1]
    738         else:
    739             return None
    740 
    741     def toInt(self):
    742         return self.int_node(self.expr)
    743 
    744     def optimize_node(self, e, macros=None):
    745         if macros is None:
    746             macros = {}
    747         op = e[0]
    748 
    749         if op == "defined":
    750             op, name = e
    751             if macros.has_key(name):
    752                 if macros[name] == kCppUndefinedMacro:
    753                     return ("int", 0)
    754                 else:
    755                     try:
    756                         value = int(macros[name])
    757                         return ("int", value)
    758                     except ValueError:
    759                         return ("defined", macros[name])
    760 
    761             if kernel_remove_config_macros and name.startswith("CONFIG_"):
    762                 return ("int", 0)
    763 
    764             return e
    765 
    766         elif op == "ident":
    767             op, name = e
    768             if macros.has_key(name):
    769                 try:
    770                     value = int(macros[name])
    771                     expanded = ("int", value)
    772                 except ValueError:
    773                     expanded = ("ident", macros[name])
    774                 return self.optimize_node(expanded, macros)
    775             return e
    776 
    777         elif op == "!":
    778             op, v = e
    779             v = self.optimize_node(v, macros)
    780             if v[0] == "int":
    781                 if v[1] == 0:
    782                     return ("int", 1)
    783                 else:
    784                     return ("int", 0)
    785             return ('!', v)
    786 
    787         elif op == "&&":
    788             op, l, r = e
    789             l = self.optimize_node(l, macros)
    790             r = self.optimize_node(r, macros)
    791             li = self.int_node(l)
    792             ri = self.int_node(r)
    793             if li is not None:
    794                 if li == 0:
    795                     return ("int", 0)
    796                 else:
    797                     return r
    798             elif ri is not None:
    799                 if ri == 0:
    800                     return ("int", 0)
    801                 else:
    802                     return l
    803             return (op, l, r)
    804 
    805         elif op == "||":
    806             op, l, r = e
    807             l = self.optimize_node(l, macros)
    808             r = self.optimize_node(r, macros)
    809             li = self.int_node(l)
    810             ri = self.int_node(r)
    811             if li is not None:
    812                 if li == 0:
    813                     return r
    814                 else:
    815                     return ("int", 1)
    816             elif ri is not None:
    817                 if ri == 0:
    818                     return l
    819                 else:
    820                     return ("int", 1)
    821             return (op, l, r)
    822 
    823         else:
    824             return e
    825 
    826     def optimize(self, macros=None):
    827         if macros is None:
    828             macros = {}
    829         self.expr = self.optimize_node(self.expr, macros)
    830 
    831 
    832 def test_cpp_expr(expr, expected):
    833     e = CppExpr(CppStringTokenizer(expr).tokens)
    834     s1 = repr(e)
    835     if s1 != expected:
    836         print ("[FAIL]: expression '%s' generates '%s', should be "
    837                "'%s'" % (expr, s1, expected))
    838         global failure_count
    839         failure_count += 1
    840 
    841 
    842 def test_cpp_expr_optim(expr, expected, macros=None):
    843     if macros is None:
    844         macros = {}
    845     e = CppExpr(CppStringTokenizer(expr).tokens)
    846     e.optimize(macros)
    847     s1 = repr(e)
    848     if s1 != expected:
    849         print ("[FAIL]: optimized expression '%s' generates '%s' with "
    850                "macros %s, should be '%s'" % (expr, s1, macros, expected))
    851         global failure_count
    852         failure_count += 1
    853 
    854 
    855 def test_cpp_expr_source(expr, expected):
    856     e = CppExpr(CppStringTokenizer(expr).tokens)
    857     s1 = str(e)
    858     if s1 != expected:
    859         print ("[FAIL]: source expression '%s' generates '%s', should "
    860                "be '%s'" % (expr, s1, expected))
    861         global failure_count
    862         failure_count += 1
    863 
    864 
    865 def test_CppExpr():
    866     test_cpp_expr("0", "(int 0)")
    867     test_cpp_expr("1", "(int 1)")
    868     test_cpp_expr("-5", "(int -5)")
    869     test_cpp_expr("+1", "(int 1)")
    870     test_cpp_expr("0U", "(int 0)")
    871     test_cpp_expr("015", "(oct 015)")
    872     test_cpp_expr("015l", "(oct 015)")
    873     test_cpp_expr("0x3e", "(hex 0x3e)")
    874     test_cpp_expr("(0)", "(int 0)")
    875     test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
    876     test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
    877     test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
    878     test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    879     test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
    880     test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
    881     test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    882     test_cpp_expr("defined(ABC) || defined(BINGO)",
    883                   "(|| (defined ABC) (defined BINGO))")
    884     test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])")
    885     test_cpp_expr("A == 1 || defined(B)",
    886                   "(|| (== (ident A) (int 1)) (defined B))")
    887 
    888     test_cpp_expr_optim("0", "(int 0)")
    889     test_cpp_expr_optim("1", "(int 1)")
    890     test_cpp_expr_optim("1 && 1", "(int 1)")
    891     test_cpp_expr_optim("1 && +1", "(int 1)")
    892     test_cpp_expr_optim("0x1 && 01", "(oct 01)")
    893     test_cpp_expr_optim("1 && 0", "(int 0)")
    894     test_cpp_expr_optim("0 && 1", "(int 0)")
    895     test_cpp_expr_optim("0 && 0", "(int 0)")
    896     test_cpp_expr_optim("1 || 1", "(int 1)")
    897     test_cpp_expr_optim("1 || 0", "(int 1)")
    898     test_cpp_expr_optim("0 || 1", "(int 1)")
    899     test_cpp_expr_optim("0 || 0", "(int 0)")
    900     test_cpp_expr_optim("A", "(ident A)")
    901     test_cpp_expr_optim("A", "(int 1)", {"A": 1})
    902     test_cpp_expr_optim("A || B", "(int 1)", {"A": 1})
    903     test_cpp_expr_optim("A || B", "(int 1)", {"B": 1})
    904     test_cpp_expr_optim("A && B", "(ident B)", {"A": 1})
    905     test_cpp_expr_optim("A && B", "(ident A)", {"B": 1})
    906     test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
    907     test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
    908     test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    909     test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
    910     test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)",
    911                         {"EXAMPLE": "XOWOE"})
    912     test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)",
    913                         {"EXAMPLE": kCppUndefinedMacro})
    914     test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    915     test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))",
    916                         {"EXAMPLE": "XOWOE"})
    917     test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)",
    918                         {"EXAMPLE": kCppUndefinedMacro})
    919     test_cpp_expr_optim("defined(A) || defined(B)",
    920                         "(|| (defined A) (defined B))")
    921     test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"})
    922     test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"})
    923     test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)",
    924                         {"B": kCppUndefinedMacro})
    925     test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)",
    926                         {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro})
    927     test_cpp_expr_optim("defined(A) && defined(B)",
    928                         "(&& (defined A) (defined B))")
    929     test_cpp_expr_optim("defined(A) && defined(B)",
    930                         "(defined B)", {"A": "1"})
    931     test_cpp_expr_optim("defined(A) && defined(B)",
    932                         "(defined A)", {"B": "1"})
    933     test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)",
    934                         {"B": kCppUndefinedMacro})
    935     test_cpp_expr_optim("defined(A) && defined(B)",
    936                         "(int 0)", {"A": kCppUndefinedMacro})
    937     test_cpp_expr_optim("A == 1 || defined(B)",
    938                         "(|| (== (ident A) (int 1)) (defined B))")
    939     test_cpp_expr_optim(
    940         "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
    941         "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))",
    942         {"__KERNEL__": kCppUndefinedMacro})
    943 
    944     test_cpp_expr_source("0", "0")
    945     test_cpp_expr_source("1", "1")
    946     test_cpp_expr_source("1 && 1", "1 && 1")
    947     test_cpp_expr_source("1 && 0", "1 && 0")
    948     test_cpp_expr_source("0 && 1", "0 && 1")
    949     test_cpp_expr_source("0 && 0", "0 && 0")
    950     test_cpp_expr_source("1 || 1", "1 || 1")
    951     test_cpp_expr_source("1 || 0", "1 || 0")
    952     test_cpp_expr_source("0 || 1", "0 || 1")
    953     test_cpp_expr_source("0 || 0", "0 || 0")
    954     test_cpp_expr_source("EXAMPLE", "EXAMPLE")
    955     test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
    956     test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
    957     test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
    958     test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")
    959 
    960 
    961 ################################################################################
    962 ################################################################################
    963 #####                                                                      #####
    964 #####          C P P   B L O C K                                           #####
    965 #####                                                                      #####
    966 ################################################################################
    967 ################################################################################
    968 
    969 
    970 class Block(object):
    971     """A class used to model a block of input source text.
    972 
    973     There are two block types:
    974       - directive blocks: contain the tokens of a single pre-processor
    975         directive (e.g. #if)
    976       - text blocks, contain the tokens of non-directive blocks
    977 
    978     The cpp parser class below will transform an input source file into a list
    979     of Block objects (grouped in a BlockList object for convenience)
    980     """
    981 
    982     def __init__(self, tokens, directive=None, lineno=0, identifier=None):
    983         """Initialize a new block, if 'directive' is None, it is a text block.
    984 
    985         NOTE: This automatically converts '#ifdef MACRO' into
    986         '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
    987         """
    988 
    989         if directive == "ifdef":
    990             tok = Token()
    991             tok.id = tokDEFINED
    992             tokens = [tok] + tokens
    993             directive = "if"
    994 
    995         elif directive == "ifndef":
    996             tok1 = Token()
    997             tok2 = Token()
    998             tok1.id = tokNOT
    999             tok2.id = tokDEFINED
   1000             tokens = [tok1, tok2] + tokens
   1001             directive = "if"
   1002 
   1003         self.tokens = tokens
   1004         self.directive = directive
   1005         self.define_id = identifier
   1006         if lineno > 0:
   1007             self.lineno = lineno
   1008         else:
   1009             self.lineno = self.tokens[0].location.line
   1010 
   1011         if self.isIf():
   1012             self.expr = CppExpr(self.tokens)
   1013 
   1014     def isDirective(self):
   1015         """Return True iff this is a directive block."""
   1016         return self.directive is not None
   1017 
   1018     def isConditional(self):
   1019         """Return True iff this is a conditional directive block."""
   1020         return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
   1021                                   "endif"]
   1022 
   1023     def isDefine(self):
   1024         """Return the macro name in a #define directive, or None otherwise."""
   1025         if self.directive != "define":
   1026             return None
   1027         return self.define_id
   1028 
   1029     def isIf(self):
   1030         """Return True iff this is an #if-like directive block."""
   1031         return self.directive in ["if", "ifdef", "ifndef", "elif"]
   1032 
   1033     def isEndif(self):
   1034         """Return True iff this is an #endif directive block."""
   1035         return self.directive == "endif"
   1036 
   1037     def isInclude(self):
   1038         """Check whether this is a #include directive.
   1039 
   1040         If true, returns the corresponding file name (with brackets or
   1041         double-qoutes). None otherwise.
   1042         """
   1043 
   1044         if self.directive != "include":
   1045             return None
   1046         return ''.join([str(x) for x in self.tokens])
   1047 
   1048     @staticmethod
   1049     def format_blocks(tokens, indent=0):
   1050         """Return the formatted lines of strings with proper indentation."""
   1051         newline = True
   1052         result = []
   1053         buf = ''
   1054         i = 0
   1055         while i < len(tokens):
   1056             t = tokens[i]
   1057             if t.id == '{':
   1058                 buf += ' {'
   1059                 result.append(strip_space(buf))
   1060                 indent += 2
   1061                 buf = ''
   1062                 newline = True
   1063             elif t.id == '}':
   1064                 indent -= 2
   1065                 if not newline:
   1066                     result.append(strip_space(buf))
   1067                 # Look ahead to determine if it's the end of line.
   1068                 if (i + 1 < len(tokens) and
   1069                     (tokens[i+1].id == ';' or
   1070                      tokens[i+1].id in ['else', '__attribute__',
   1071                                         '__attribute', '__packed'] or
   1072                      tokens[i+1].kind == TokenKind.IDENTIFIER)):
   1073                     buf = ' ' * indent + '}'
   1074                     newline = False
   1075                 else:
   1076                     result.append(' ' * indent + '}')
   1077                     buf = ''
   1078                     newline = True
   1079             elif t.id == ';':
   1080                 result.append(strip_space(buf) + ';')
   1081                 buf = ''
   1082                 newline = True
   1083             # We prefer a new line for each constant in enum.
   1084             elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
   1085                 result.append(strip_space(buf) + ',')
   1086                 buf = ''
   1087                 newline = True
   1088             else:
   1089                 if newline:
   1090                     buf += ' ' * indent + str(t)
   1091                 else:
   1092                     buf += ' ' + str(t)
   1093                 newline = False
   1094             i += 1
   1095 
   1096         if buf:
   1097             result.append(strip_space(buf))
   1098 
   1099         return result, indent
   1100 
   1101     def writeWithWarning(self, out, warning, left_count, repeat_count, indent):
   1102         """Dump the current block with warnings."""
   1103         # removeWhiteSpace() will sometimes creates non-directive blocks
   1104         # without any tokens. These come from blocks that only contained
   1105         # empty lines and spaces. They should not be printed in the final
   1106         # output, and then should not be counted for this operation.
   1107         #
   1108         if self.directive is None and not self.tokens:
   1109             return left_count, indent
   1110 
   1111         if self.directive:
   1112             out.write(str(self) + '\n')
   1113             left_count -= 1
   1114             if left_count == 0:
   1115                 out.write(warning)
   1116                 left_count = repeat_count
   1117 
   1118         else:
   1119             lines, indent = self.format_blocks(self.tokens, indent)
   1120             for line in lines:
   1121                 out.write(line + '\n')
   1122                 left_count -= 1
   1123                 if left_count == 0:
   1124                     out.write(warning)
   1125                     left_count = repeat_count
   1126 
   1127         return left_count, indent
   1128 
   1129     def __repr__(self):
   1130         """Generate the representation of a given block."""
   1131         if self.directive:
   1132             result = "#%s " % self.directive
   1133             if self.isIf():
   1134                 result += repr(self.expr)
   1135             else:
   1136                 for tok in self.tokens:
   1137                     result += repr(tok)
   1138         else:
   1139             result = ""
   1140             for tok in self.tokens:
   1141                 result += repr(tok)
   1142 
   1143         return result
   1144 
   1145     def __str__(self):
   1146         """Generate the string representation of a given block."""
   1147         if self.directive:
   1148             # "#if"
   1149             if self.directive == "if":
   1150                 # small optimization to re-generate #ifdef and #ifndef
   1151                 e = self.expr.expr
   1152                 op = e[0]
   1153                 if op == "defined":
   1154                     result = "#ifdef %s" % e[1]
   1155                 elif op == "!" and e[1][0] == "defined":
   1156                     result = "#ifndef %s" % e[1][1]
   1157                 else:
   1158                     result = "#if " + str(self.expr)
   1159 
   1160             # "#define"
   1161             elif self.isDefine():
   1162                 result = "#%s %s" % (self.directive, self.define_id)
   1163                 if self.tokens:
   1164                     result += " "
   1165                 expr = strip_space(' '.join([tok.id for tok in self.tokens]))
   1166                 # remove the space between name and '(' in function call
   1167                 result += re.sub(r'(\w+) \(', r'\1(', expr)
   1168 
   1169             # "#error"
   1170             # Concatenating tokens with a space separator, because they may
   1171             # not be quoted and broken into several tokens
   1172             elif self.directive == "error":
   1173                 result = "#error %s" % ' '.join([tok.id for tok in self.tokens])
   1174 
   1175             else:
   1176                 result = "#%s" % self.directive
   1177                 if self.tokens:
   1178                     result += " "
   1179                 result += ''.join([tok.id for tok in self.tokens])
   1180         else:
   1181             lines, _ = self.format_blocks(self.tokens)
   1182             result = '\n'.join(lines)
   1183 
   1184         return result
   1185 
   1186 
   1187 class BlockList(object):
   1188     """A convenience class used to hold and process a list of blocks.
   1189 
   1190     It calls the cpp parser to get the blocks.
   1191     """
   1192 
   1193     def __init__(self, blocks):
   1194         self.blocks = blocks
   1195 
   1196     def __len__(self):
   1197         return len(self.blocks)
   1198 
   1199     def __getitem__(self, n):
   1200         return self.blocks[n]
   1201 
   1202     def __repr__(self):
   1203         return repr(self.blocks)
   1204 
   1205     def __str__(self):
   1206         result = '\n'.join([str(b) for b in self.blocks])
   1207         return result
   1208 
   1209     def dump(self):
   1210         """Dump all the blocks in current BlockList."""
   1211         print '##### BEGIN #####'
   1212         for i, b in enumerate(self.blocks):
   1213             print '### BLOCK %d ###' % i
   1214             print b
   1215         print '##### END #####'
   1216 
   1217     def optimizeIf01(self):
   1218         """Remove the code between #if 0 .. #endif in a BlockList."""
   1219         self.blocks = optimize_if01(self.blocks)
   1220 
   1221     def optimizeMacros(self, macros):
   1222         """Remove known defined and undefined macros from a BlockList."""
   1223         for b in self.blocks:
   1224             if b.isIf():
   1225                 b.expr.optimize(macros)
   1226 
   1227     def removeMacroDefines(self, macros):
   1228         """Remove known macro definitions from a BlockList."""
   1229         self.blocks = remove_macro_defines(self.blocks, macros)
   1230 
   1231     def optimizeAll(self, macros):
   1232         self.optimizeMacros(macros)
   1233         self.optimizeIf01()
   1234         return
   1235 
   1236     def findIncludes(self):
   1237         """Return the list of included files in a BlockList."""
   1238         result = []
   1239         for b in self.blocks:
   1240             i = b.isInclude()
   1241             if i:
   1242                 result.append(i)
   1243         return result
   1244 
   1245     def write(self, out):
   1246         out.write(str(self))
   1247 
   1248     def writeWithWarning(self, out, warning, repeat_count):
   1249         left_count = repeat_count
   1250         indent = 0
   1251         for b in self.blocks:
   1252             left_count, indent = b.writeWithWarning(out, warning, left_count,
   1253                                                     repeat_count, indent)
   1254 
   1255     def removeVarsAndFuncs(self, knownStatics=None):
   1256         """Remove variable and function declarations.
   1257 
   1258         All extern and static declarations corresponding to variable and
   1259         function declarations are removed. We only accept typedefs and
   1260         enum/structs/union declarations.
   1261 
   1262         However, we keep the definitions corresponding to the set of known
   1263         static inline functions in the set 'knownStatics', which is useful
   1264         for optimized byteorder swap functions and stuff like that.
   1265         """
   1266 
   1267         # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
   1268         # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
   1269         # It could be problematic when we have function-like macros but without
   1270         # '}' following them. It will skip all the tokens/blocks until seeing a
   1271         # '}' as the function end. Fortunately we don't have such cases in the
   1272         # current kernel headers.
   1273 
   1274         # state = 0 => normal (i.e. LN + spaces)
   1275         # state = 1 => typedef/struct encountered, ends with ";"
   1276         # state = 2 => var declaration encountered, ends with ";"
   1277         # state = 3 => func declaration encountered, ends with "}"
   1278 
   1279         if knownStatics is None:
   1280             knownStatics = set()
   1281         state = 0
   1282         depth = 0
   1283         blocks2 = []
   1284         skipTokens = False
   1285         for b in self.blocks:
   1286             if b.isDirective():
   1287                 blocks2.append(b)
   1288             else:
   1289                 n = len(b.tokens)
   1290                 i = 0
   1291                 if skipTokens:
   1292                     first = n
   1293                 else:
   1294                     first = 0
   1295                 while i < n:
   1296                     tok = b.tokens[i]
   1297                     tokid = tok.id
   1298                     # If we are not looking for the start of a new
   1299                     # type/var/func, then skip over tokens until
   1300                     # we find our terminator, managing the depth of
   1301                     # accolades as we go.
   1302                     if state > 0:
   1303                         terminator = False
   1304                         if tokid == '{':
   1305                             depth += 1
   1306                         elif tokid == '}':
   1307                             if depth > 0:
   1308                                 depth -= 1
   1309                             if (depth == 0) and (state == 3):
   1310                                 terminator = True
   1311                         elif tokid == ';' and depth == 0:
   1312                             terminator = True
   1313 
   1314                         if terminator:
   1315                             # we found the terminator
   1316                             state = 0
   1317                             if skipTokens:
   1318                                 skipTokens = False
   1319                                 first = i + 1
   1320 
   1321                         i += 1
   1322                         continue
   1323 
   1324                     # Is it a new type definition, then start recording it
   1325                     if tok.id in ['struct', 'typedef', 'enum', 'union',
   1326                                   '__extension__']:
   1327                         state = 1
   1328                         i += 1
   1329                         continue
   1330 
   1331                     # Is it a variable or function definition. If so, first
   1332                     # try to determine which type it is, and also extract
   1333                     # its name.
   1334                     #
   1335                     # We're going to parse the next tokens of the same block
   1336                     # until we find a semi-column or a left parenthesis.
   1337                     #
   1338                     # The semi-column corresponds to a variable definition,
   1339                     # the left-parenthesis to a function definition.
   1340                     #
   1341                     # We also assume that the var/func name is the last
   1342                     # identifier before the terminator.
   1343                     #
   1344                     j = i + 1
   1345                     ident = ""
   1346                     while j < n:
   1347                         tokid = b.tokens[j].id
   1348                         if tokid == '(':  # a function declaration
   1349                             state = 3
   1350                             break
   1351                         elif tokid == ';':  # a variable declaration
   1352                             state = 2
   1353                             break
   1354                         if b.tokens[j].kind == TokenKind.IDENTIFIER:
   1355                             ident = b.tokens[j].id
   1356                         j += 1
   1357 
   1358                     if j >= n:
   1359                         # This can only happen when the declaration
   1360                         # does not end on the current block (e.g. with
   1361                         # a directive mixed inside it.
   1362                         #
   1363                         # We will treat it as malformed because
   1364                         # it's very hard to recover from this case
   1365                         # without making our parser much more
   1366                         # complex.
   1367                         #
   1368                         logging.debug("### skip unterminated static '%s'",
   1369                                       ident)
   1370                         break
   1371 
   1372                     if ident in knownStatics:
   1373                         logging.debug("### keep var/func '%s': %s", ident,
   1374                                       repr(b.tokens[i:j]))
   1375                     else:
   1376                         # We're going to skip the tokens for this declaration
   1377                         logging.debug("### skip var/func '%s': %s", ident,
   1378                                       repr(b.tokens[i:j]))
   1379                         if i > first:
   1380                             blocks2.append(Block(b.tokens[first:i]))
   1381                         skipTokens = True
   1382                         first = n
   1383 
   1384                     i += 1
   1385 
   1386                 if i > first:
   1387                     # print "### final '%s'" % repr(b.tokens[first:i])
   1388                     blocks2.append(Block(b.tokens[first:i]))
   1389 
   1390         self.blocks = blocks2
   1391 
   1392     def replaceTokens(self, replacements):
   1393         """Replace tokens according to the given dict."""
   1394         for b in self.blocks:
   1395             made_change = False
   1396             if b.isInclude() is None:
   1397                 for tok in b.tokens:
   1398                     if tok.kind == TokenKind.IDENTIFIER:
   1399                         if tok.id in replacements:
   1400                             tok.id = replacements[tok.id]
   1401                             made_change = True
   1402 
   1403                 if b.isDefine() and b.define_id in replacements:
   1404                     b.define_id = replacements[b.define_id]
   1405                     made_change = True
   1406 
   1407             if made_change and b.isIf():
   1408                 # Keep 'expr' in sync with 'tokens'.
   1409                 b.expr = CppExpr(b.tokens)
   1410 
   1411 
   1412 def strip_space(s):
   1413     """Strip out redundant space in a given string."""
   1414 
   1415     # NOTE: It ought to be more clever to not destroy spaces in string tokens.
   1416     replacements = {' . ': '.',
   1417                     ' [': '[',
   1418                     '[ ': '[',
   1419                     ' ]': ']',
   1420                     '( ': '(',
   1421                     ' )': ')',
   1422                     ' ,': ',',
   1423                     '# ': '#',
   1424                     ' ;': ';',
   1425                     '~ ': '~',
   1426                     ' -> ': '->'}
   1427     result = s
   1428     for r in replacements:
   1429         result = result.replace(r, replacements[r])
   1430 
   1431     # Remove the space between function name and the parenthesis.
   1432     result = re.sub(r'(\w+) \(', r'\1(', result)
   1433     return result
   1434 
   1435 
   1436 class BlockParser(object):
   1437     """A class that converts an input source file into a BlockList object."""
   1438 
   1439     def __init__(self, tokzer=None):
   1440         """Initialize a block parser.
   1441 
   1442         The input source is provided through a Tokenizer object.
   1443         """
   1444         self._tokzer = tokzer
   1445         self._parsed = False
   1446 
   1447     @property
   1448     def parsed(self):
   1449         return self._parsed
   1450 
   1451     @staticmethod
   1452     def _short_extent(extent):
   1453         return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
   1454                                   extent.end.line, extent.end.column)
   1455 
   1456     def getBlocks(self, tokzer=None):
   1457         """Return all the blocks parsed."""
   1458 
   1459         def consume_extent(i, tokens, extent=None, detect_change=False):
   1460             """Return tokens that belong to the given extent.
   1461 
   1462             It parses all the tokens that follow tokens[i], until getting out
   1463             of the extent. When detect_change is True, it may terminate early
   1464             when detecting preprocessing directives inside the extent.
   1465             """
   1466 
   1467             result = []
   1468             if extent is None:
   1469                 extent = tokens[i].cursor.extent
   1470 
   1471             while i < len(tokens) and tokens[i].location in extent:
   1472                 t = tokens[i]
   1473                 if debugBlockParser:
   1474                     print ' ' * 2, t.id, t.kind, t.cursor.kind
   1475                 if (detect_change and t.cursor.extent != extent and
   1476                     t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
   1477                     break
   1478                 result.append(t)
   1479                 i += 1
   1480             return (i, result)
   1481 
   1482         def consume_line(i, tokens):
   1483             """Return tokens that follow tokens[i] in the same line."""
   1484             result = []
   1485             line = tokens[i].location.line
   1486             while i < len(tokens) and tokens[i].location.line == line:
   1487                 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
   1488                     break
   1489                 result.append(tokens[i])
   1490                 i += 1
   1491             return (i, result)
   1492 
   1493         if tokzer is None:
   1494             tokzer = self._tokzer
   1495         tokens = tokzer.tokens
   1496 
   1497         blocks = []
   1498         buf = []
   1499         i = 0
   1500 
   1501         while i < len(tokens):
   1502             t = tokens[i]
   1503             cursor = t.cursor
   1504 
   1505             if debugBlockParser:
   1506                 print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
   1507                        "extent=[%s]" % (t.location.line, t.spelling, t.kind,
   1508                                         cursor.kind,
   1509                                         self._short_extent(cursor.extent)))
   1510 
   1511             if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
   1512                 if buf:
   1513                     blocks.append(Block(buf))
   1514                     buf = []
   1515 
   1516                 j = i
   1517                 if j + 1 >= len(tokens):
   1518                     raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
   1519                 directive = tokens[j+1].id
   1520 
   1521                 if directive == 'define':
   1522                     if i+2 >= len(tokens):
   1523                         raise BadExpectedToken("### BAD TOKEN at %s" %
   1524                                                (tokens[i].location))
   1525 
   1526                     # Skip '#' and 'define'.
   1527                     extent = tokens[i].cursor.extent
   1528                     i += 2
   1529                     id = ''
   1530                     # We need to separate the id from the remaining of
   1531                     # the line, especially for the function-like macro.
   1532                     if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
   1533                         (tokens[i].location.column + len(tokens[i].spelling) ==
   1534                          tokens[i+1].location.column)):
   1535                         while i < len(tokens):
   1536                             id += tokens[i].id
   1537                             if tokens[i].spelling == ')':
   1538                                 i += 1
   1539                                 break
   1540                             i += 1
   1541                     else:
   1542                         id += tokens[i].id
   1543                         # Advance to the next token that follows the macro id
   1544                         i += 1
   1545 
   1546                     (i, ret) = consume_extent(i, tokens, extent=extent)
   1547                     blocks.append(Block(ret, directive=directive,
   1548                                         lineno=t.location.line, identifier=id))
   1549 
   1550                 else:
   1551                     (i, ret) = consume_extent(i, tokens)
   1552                     blocks.append(Block(ret[2:], directive=directive,
   1553                                         lineno=t.location.line))
   1554 
   1555             elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
   1556                 if buf:
   1557                     blocks.append(Block(buf))
   1558                     buf = []
   1559                 directive = tokens[i+1].id
   1560                 (i, ret) = consume_extent(i, tokens)
   1561 
   1562                 blocks.append(Block(ret[2:], directive=directive,
   1563                                     lineno=t.location.line))
   1564 
   1565             elif cursor.kind == CursorKind.VAR_DECL:
   1566                 if buf:
   1567                     blocks.append(Block(buf))
   1568                     buf = []
   1569 
   1570                 (i, ret) = consume_extent(i, tokens, detect_change=True)
   1571                 buf += ret
   1572 
   1573             elif cursor.kind == CursorKind.FUNCTION_DECL:
   1574                 if buf:
   1575                     blocks.append(Block(buf))
   1576                     buf = []
   1577 
   1578                 (i, ret) = consume_extent(i, tokens, detect_change=True)
   1579                 buf += ret
   1580 
   1581             else:
   1582                 (i, ret) = consume_line(i, tokens)
   1583                 buf += ret
   1584 
   1585         if buf:
   1586             blocks.append(Block(buf))
   1587 
   1588         # _parsed=True indicates a successful parsing, although may result an
   1589         # empty BlockList.
   1590         self._parsed = True
   1591 
   1592         return BlockList(blocks)
   1593 
   1594     def parse(self, tokzer):
   1595         return self.getBlocks(tokzer)
   1596 
   1597     def parseFile(self, path):
   1598         return self.getBlocks(CppFileTokenizer(path))
   1599 
   1600 
   1601 def test_block_parsing(lines, expected):
   1602     """Helper method to test the correctness of BlockParser.parse."""
   1603     blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
   1604     if len(blocks) != len(expected):
   1605         raise BadExpectedToken("BlockParser.parse() returned '%s' expecting "
   1606                                "'%s'" % (str(blocks), repr(expected)))
   1607     for n in range(len(blocks)):
   1608         if str(blocks[n]) != expected[n]:
   1609             raise BadExpectedToken("BlockParser.parse()[%d] is '%s', "
   1610                                    "expecting '%s'" % (n, str(blocks[n]),
   1611                                                        expected[n]))
   1612 
   1613 
   1614 def test_BlockParser():
   1615     test_block_parsing(["#error hello"], ["#error hello"])
   1616     test_block_parsing(["foo", "", "bar"], ["foo bar"])
   1617 
   1618     # We currently cannot handle the following case with libclang properly.
   1619     # Fortunately it doesn't appear in current headers.
   1620     # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"])
   1621 
   1622     test_block_parsing(["foo",
   1623                         "  #  /* ahah */ if defined(__KERNEL__) /* more */",
   1624                         "bar", "#endif"],
   1625                        ["foo", "#ifdef __KERNEL__", "bar", "#endif"])
   1626 
   1627 
   1628 ################################################################################
   1629 ################################################################################
   1630 #####                                                                      #####
   1631 #####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
   1632 #####                                                                      #####
   1633 ################################################################################
   1634 ################################################################################
   1635 
   1636 
   1637 def remove_macro_defines(blocks, excludedMacros=None):
   1638     """Remove macro definitions like #define <macroName>  ...."""
   1639     if excludedMacros is None:
   1640         excludedMacros = set()
   1641     result = []
   1642     for b in blocks:
   1643         macroName = b.isDefine()
   1644         if macroName is None or macroName not in excludedMacros:
   1645             result.append(b)
   1646 
   1647     return result
   1648 
   1649 
   1650 def find_matching_endif(blocks, i):
   1651     """Traverse the blocks to find out the matching #endif."""
   1652     n = len(blocks)
   1653     depth = 1
   1654     while i < n:
   1655         if blocks[i].isDirective():
   1656             dir_ = blocks[i].directive
   1657             if dir_ in ["if", "ifndef", "ifdef"]:
   1658                 depth += 1
   1659             elif depth == 1 and dir_ in ["else", "elif"]:
   1660                 return i
   1661             elif dir_ == "endif":
   1662                 depth -= 1
   1663                 if depth == 0:
   1664                     return i
   1665         i += 1
   1666     return i
   1667 
   1668 
   1669 def optimize_if01(blocks):
   1670     """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
   1671     i = 0
   1672     n = len(blocks)
   1673     result = []
   1674     while i < n:
   1675         j = i
   1676         while j < n and not blocks[j].isIf():
   1677             j += 1
   1678         if j > i:
   1679             logging.debug("appending lines %d to %d", blocks[i].lineno,
   1680                           blocks[j-1].lineno)
   1681             result += blocks[i:j]
   1682         if j >= n:
   1683             break
   1684         expr = blocks[j].expr
   1685         r = expr.toInt()
   1686         if r is None:
   1687             result.append(blocks[j])
   1688             i = j + 1
   1689             continue
   1690 
   1691         if r == 0:
   1692             # if 0 => skip everything until the corresponding #endif
   1693             j = find_matching_endif(blocks, j + 1)
   1694             if j >= n:
   1695                 # unterminated #if 0, finish here
   1696                 break
   1697             dir_ = blocks[j].directive
   1698             if dir_ == "endif":
   1699                 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
   1700                               blocks[i].lineno, blocks[j].lineno)
   1701                 i = j + 1
   1702             elif dir_ == "else":
   1703                 # convert 'else' into 'if 1'
   1704                 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
   1705                               "to %d)", blocks[i].lineno, blocks[j-1].lineno)
   1706                 blocks[j].directive = "if"
   1707                 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
   1708                 i = j
   1709             elif dir_ == "elif":
   1710                 # convert 'elif' into 'if'
   1711                 logging.debug("convert 'if 0' .. 'elif' into 'if'")
   1712                 blocks[j].directive = "if"
   1713                 i = j
   1714             continue
   1715 
   1716         # if 1 => find corresponding endif and remove/transform them
   1717         k = find_matching_endif(blocks, j + 1)
   1718         if k >= n:
   1719             # unterminated #if 1, finish here
   1720             logging.debug("unterminated 'if 1'")
   1721             result += blocks[j+1:k]
   1722             break
   1723 
   1724         dir_ = blocks[k].directive
   1725         if dir_ == "endif":
   1726             logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
   1727                           blocks[j].lineno, blocks[k].lineno)
   1728             result += optimize_if01(blocks[j+1:k])
   1729             i = k + 1
   1730         elif dir_ == "else":
   1731             # convert 'else' into 'if 0'
   1732             logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
   1733                           blocks[j].lineno, blocks[k].lineno)
   1734             result += optimize_if01(blocks[j+1:k])
   1735             blocks[k].directive = "if"
   1736             blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
   1737             i = k
   1738         elif dir_ == "elif":
   1739             # convert 'elif' into 'if 0'
   1740             logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
   1741                           blocks[j].lineno, blocks[k].lineno)
   1742             result += optimize_if01(blocks[j+1:k])
   1743             blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
   1744             i = k
   1745     return result
   1746 
   1747 
   1748 def test_optimizeAll():
   1749     text = """\
   1750 #if 1
   1751 #define  GOOD_1
   1752 #endif
   1753 #if 0
   1754 #define  BAD_2
   1755 #define  BAD_3
   1756 #endif
   1757 
   1758 #if 1
   1759 #define  GOOD_2
   1760 #else
   1761 #define  BAD_4
   1762 #endif
   1763 
   1764 #if 0
   1765 #define  BAD_5
   1766 #else
   1767 #define  GOOD_3
   1768 #endif
   1769 
   1770 #if defined(__KERNEL__)
   1771 #define BAD_KERNEL
   1772 #endif
   1773 
   1774 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
   1775 #define X
   1776 #endif
   1777 
   1778 #ifndef SIGRTMAX
   1779 #define SIGRTMAX 123
   1780 #endif /* SIGRTMAX */
   1781 
   1782 #if 0
   1783 #if 1
   1784 #define  BAD_6
   1785 #endif
   1786 #endif\
   1787 """
   1788 
   1789     expected = """\
   1790 #define GOOD_1
   1791 #define GOOD_2
   1792 #define GOOD_3
   1793 #if !defined(__GLIBC__) || __GLIBC__ < 2
   1794 #define X
   1795 #endif
   1796 #ifndef __SIGRTMAX
   1797 #define __SIGRTMAX 123
   1798 #endif\
   1799 """
   1800 
   1801     out = utils.StringOutput()
   1802     blocks = BlockParser().parse(CppStringTokenizer(text))
   1803     blocks.replaceTokens(kernel_token_replacements)
   1804     blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro})
   1805     blocks.write(out)
   1806     if out.get() != expected:
   1807         print "[FAIL]: macro optimization failed\n"
   1808         print "<<<< expecting '",
   1809         print expected,
   1810         print "'\n>>>> result '",
   1811         print out.get(),
   1812         print "'\n----"
   1813         global failure_count
   1814         failure_count += 1
   1815 
   1816 
   1817 def runUnitTests():
   1818     """Always run all unit tests for this program."""
   1819     test_CppTokenizer()
   1820     test_CppExpr()
   1821     test_optimizeAll()
   1822     test_BlockParser()
   1823 
   1824 
   1825 failure_count = 0
   1826 runUnitTests()
   1827 if failure_count != 0:
   1828     utils.panic("Unit tests failed in cpp.py.\n")
   1829