Home | History | Annotate | Download | only in tools
      1 # a glorified C pre-processor parser
      2 
      3 import sys, re, string
      4 from utils import *
      5 from defaults import *
      6 
      7 debugTokens             = False
      8 debugDirectiveTokenizer = False
      9 debugLineParsing        = False
     10 debugCppExpr            = False
     11 debugOptimIf01          = False
     12 
     13 #####################################################################################
     14 #####################################################################################
     15 #####                                                                           #####
     16 #####           C P P   T O K E N S                                             #####
     17 #####                                                                           #####
     18 #####################################################################################
     19 #####################################################################################
     20 
     21 # the list of supported C-preprocessor tokens
     22 # plus a couple of C tokens as well
     23 tokEOF       = "\0"
     24 tokLN        = "\n"
     25 tokSTRINGIFY = "#"
     26 tokCONCAT    = "##"
     27 tokLOGICAND  = "&&"
     28 tokLOGICOR   = "||"
     29 tokSHL       = "<<"
     30 tokSHR       = ">>"
     31 tokEQUAL     = "=="
     32 tokNEQUAL    = "!="
     33 tokLT        = "<"
     34 tokLTE       = "<="
     35 tokGT        = ">"
     36 tokGTE       = ">="
     37 tokELLIPSIS  = "..."
     38 tokSPACE     = " "
     39 tokDEFINED   = "defined"
     40 tokLPAREN    = "("
     41 tokRPAREN    = ")"
     42 tokNOT       = "!"
     43 tokPLUS      = "+"
     44 tokMINUS     = "-"
     45 tokMULTIPLY  = "*"
     46 tokDIVIDE    = "/"
     47 tokMODULUS   = "%"
     48 tokBINAND    = "&"
     49 tokBINOR     = "|"
     50 tokBINXOR    = "^"
     51 tokCOMMA     = ","
     52 tokLBRACE    = "{"
     53 tokRBRACE    = "}"
     54 tokARROW     = "->"
     55 tokINCREMENT = "++"
     56 tokDECREMENT = "--"
     57 tokNUMBER    = "<number>"
     58 tokIDENT     = "<ident>"
     59 tokSTRING    = "<string>"
     60 
     61 class Token:
     62     """a simple class to hold information about a given token.
     63        each token has a position in the source code, as well as
     64        an 'id' and a 'value'. the id is a string that identifies
     65        the token's class, while the value is the string of the
     66        original token itself.
     67 
     68        for example, the tokenizer concatenates a series of spaces
     69        and tabs as a single tokSPACE id, whose value if the original
     70        spaces+tabs sequence."""
     71 
     72     def __init__(self):
     73         self.id     = None
     74         self.value  = None
     75         self.lineno = 0
     76         self.colno  = 0
     77 
     78     def set(self,id,val=None):
     79         self.id = id
     80         if val:
     81             self.value = val
     82         else:
     83             self.value = id
     84         return None
     85 
     86     def copyFrom(self,src):
     87         self.id     = src.id
     88         self.value  = src.value
     89         self.lineno = src.lineno
     90         self.colno  = src.colno
     91 
     92     def __repr__(self):
     93         if self.id == tokIDENT:
     94             return "(ident %s)" % self.value
     95         if self.id == tokNUMBER:
     96             return "(number %s)" % self.value
     97         if self.id == tokSTRING:
     98             return "(string '%s')" % self.value
     99         if self.id == tokLN:
    100             return "<LN>"
    101         if self.id == tokEOF:
    102             return "<EOF>"
    103         if self.id == tokSPACE and self.value == "\\":
    104             # this corresponds to a trailing \ that was transformed into a tokSPACE
    105             return "<\\>"
    106 
    107         return self.id
    108 
    109     def __str__(self):
    110         if self.id == tokIDENT:
    111             return self.value
    112         if self.id == tokNUMBER:
    113             return self.value
    114         if self.id == tokSTRING:
    115             return self.value
    116         if self.id == tokEOF:
    117             return "<EOF>"
    118         if self.id == tokSPACE:
    119             if self.value == "\\":  # trailing \
    120                 return "\\\n"
    121             else:
    122                 return self.value
    123 
    124         return self.id
    125 
    126 class BadExpectedToken(Exception):
    127     def __init__(self,msg):
    128         print msg
    129 
    130 
    131 #####################################################################################
    132 #####################################################################################
    133 #####                                                                           #####
    134 #####           C P P   T O K E N I Z E R                                       #####
    135 #####                                                                           #####
    136 #####################################################################################
    137 #####################################################################################
    138 
    139 # list of long symbols, i.e. those that take more than one characters
    140 cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
    141                    tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
    142 
    143 class CppTokenizer:
    144     """an abstract class used to convert some input text into a list
    145        of tokens. real implementations follow and differ in the format
    146        of the input text only"""
    147 
    148     def __init__(self):
    149         """initialize a new CppTokenizer object"""
    150         self.eof  = False  # end of file reached ?
    151         self.text = None   # content of current line, with final \n stripped
    152         self.line = 0      # number of current line
    153         self.pos  = 0      # current character position in current line
    154         self.len  = 0      # length of current line text
    155         self.held = Token()
    156 
    157     def setLineText(self,line):
    158         """set the content of the (next) current line. should be called
    159            by fillLineText() in derived classes"""
    160         self.text = line
    161         self.len  = len(line)
    162         self.pos  = 0
    163 
    164     def fillLineText(self):
    165         """refresh the content of 'line' with a new line of input"""
    166         # to be overriden
    167         self.eof = True
    168 
    169     def markPos(self,tok):
    170         """mark the position of the current token in the source file"""
    171         if self.eof or self.pos > self.len:
    172             tok.lineno = self.line + 1
    173             tok.colno  = 0
    174         else:
    175             tok.lineno = self.line
    176             tok.colno  = self.pos
    177 
    178     def peekChar(self):
    179         """return the current token under the cursor without moving it"""
    180         if self.eof:
    181             return tokEOF
    182 
    183         if self.pos > self.len:
    184             self.pos   = 0
    185             self.line += 1
    186             self.fillLineText()
    187             if self.eof:
    188                 return tokEOF
    189 
    190         if self.pos == self.len:
    191             return tokLN
    192         else:
    193             return self.text[self.pos]
    194 
    195     def peekNChar(self,n):
    196         """try to peek the next n chars on the same line"""
    197         if self.pos + n > self.len:
    198             return None
    199         return self.text[self.pos:self.pos+n]
    200 
    201     def skipChar(self):
    202         """increment the token cursor position"""
    203         if not self.eof:
    204             self.pos += 1
    205 
    206     def skipNChars(self,n):
    207         if self.pos + n <= self.len:
    208             self.pos += n
    209         else:
    210             while n > 0:
    211                 self.skipChar()
    212                 n -= 1
    213 
    214     def nextChar(self):
    215         """retrieve the token at the current cursor position, then skip it"""
    216         result = self.peekChar()
    217         self.skipChar()
    218         return  result
    219 
    220     def getEscape(self):
    221         # try to get all characters after a backslash (\)
    222         result = self.nextChar()
    223         if result == "0":
    224             # octal number ?
    225             num = self.peekNChar(3)
    226             if num != None:
    227                 isOctal = True
    228                 for d in num:
    229                     if not d in "01234567":
    230                         isOctal = False
    231                         break
    232                 if isOctal:
    233                     result += num
    234                     self.skipNChars(3)
    235         elif result == "x" or result == "X":
    236             # hex number ?
    237             num = self.peekNChar(2)
    238             if num != None:
    239                 isHex = True
    240                 for d in num:
    241                     if not d in "012345678abcdefABCDEF":
    242                         isHex = False
    243                         break
    244                 if isHex:
    245                     result += num
    246                     self.skipNChars(2)
    247         elif result == "u" or result == "U":
    248             # unicode char ?
    249             num = self.peekNChar(4)
    250             if num != None:
    251                 isHex = True
    252                 for d in num:
    253                     if not d in "012345678abcdefABCDEF":
    254                         isHex = False
    255                         break
    256                 if isHex:
    257                     result += num
    258                     self.skipNChars(4)
    259 
    260         return result
    261 
    262     def nextRealToken(self,tok):
    263         """return next CPP token, used internally by nextToken()"""
    264         c = self.nextChar()
    265         if c == tokEOF or c == tokLN:
    266             return tok.set(c)
    267 
    268         if c == '/':
    269             c = self.peekChar()
    270             if c == '/':   # C++ comment line
    271                 self.skipChar()
    272                 while 1:
    273                     c = self.nextChar()
    274                     if c == tokEOF or c == tokLN:
    275                         break
    276                 return tok.set(tokLN)
    277             if c == '*':   # C comment start
    278                 self.skipChar()
    279                 value = "/*"
    280                 prev_c = None
    281                 while 1:
    282                     c = self.nextChar()
    283                     if c == tokEOF:
    284                         return tok.set(tokEOF,value)
    285                     if c == '/' and prev_c == '*':
    286                         break
    287                     prev_c = c
    288                     value += c
    289 
    290                 value += "/"
    291                 return tok.set(tokSPACE,value)
    292             c = '/'
    293 
    294         if c.isspace():
    295             while 1:
    296                 c2 = self.peekChar()
    297                 if c2 == tokLN or not c2.isspace():
    298                     break
    299                 c += c2
    300                 self.skipChar()
    301             return tok.set(tokSPACE,c)
    302 
    303         if c == '\\':
    304             if debugTokens:
    305                 print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
    306             if self.peekChar() == tokLN:   # trailing \
    307                 # eat the tokLN
    308                 self.skipChar()
    309                 # we replace a trailing \ by a tokSPACE whose value is
    310                 # simply "\\". this allows us to detect them later when
    311                 # needed.
    312                 return tok.set(tokSPACE,"\\")
    313             else:
    314                 # treat as a single token here ?
    315                 c +=self.getEscape()
    316                 return tok.set(c)
    317 
    318         if c == "'":  # chars
    319             c2 = self.nextChar()
    320             c += c2
    321             if c2 == '\\':
    322                 c += self.getEscape()
    323 
    324             while 1:
    325                 c2 = self.nextChar()
    326                 if c2 == tokEOF:
    327                     break
    328                 c += c2
    329                 if c2 == "'":
    330                     break
    331 
    332             return tok.set(tokSTRING, c)
    333 
    334         if c == '"':  # strings
    335             quote = 0
    336             while 1:
    337                 c2  = self.nextChar()
    338                 if c2 == tokEOF:
    339                     return tok.set(tokSTRING,c)
    340 
    341                 c += c2
    342                 if not quote:
    343                     if c2 == '"':
    344                         return tok.set(tokSTRING,c)
    345                     if c2 == "\\":
    346                         quote = 1
    347                 else:
    348                     quote = 0
    349 
    350         if c >= "0" and c <= "9":  # integers ?
    351             while 1:
    352                 c2 = self.peekChar()
    353                 if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
    354                     break
    355                 c += c2
    356                 self.skipChar()
    357             return tok.set(tokNUMBER,c)
    358 
    359         if c.isalnum() or c == "_":  # identifiers ?
    360             while 1:
    361                 c2 = self.peekChar()
    362                 if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
    363                     break
    364                 c += c2
    365                 self.skipChar()
    366             if c == tokDEFINED:
    367                 return tok.set(tokDEFINED)
    368             else:
    369                 return tok.set(tokIDENT,c)
    370 
    371         # check special symbols
    372         for sk in cppLongSymbols:
    373             if c == sk[0]:
    374                 sklen = len(sk[1:])
    375                 if self.pos + sklen <= self.len and \
    376                    self.text[self.pos:self.pos+sklen] == sk[1:]:
    377                     self.pos += sklen
    378                     return tok.set(sk)
    379 
    380         return tok.set(c)
    381 
    382     def nextToken(self,tok):
    383         """return the next token from the input text. this function
    384            really updates 'tok', and does not return a new one"""
    385         self.markPos(tok)
    386         self.nextRealToken(tok)
    387 
    388     def getToken(self):
    389         tok = Token()
    390         self.nextToken(tok)
    391         if debugTokens:
    392             print "getTokens: %s" % repr(tok)
    393         return tok
    394 
    395     def toTokenList(self):
    396         """convert the input text of a CppTokenizer into a direct
    397            list of token objects. tokEOF is stripped from the result"""
    398         result = []
    399         while 1:
    400             tok = Token()
    401             self.nextToken(tok)
    402             if tok.id == tokEOF:
    403                 break
    404             result.append(tok)
    405         return result
    406 
    407 class CppLineTokenizer(CppTokenizer):
    408     """a CppTokenizer derived class that accepts a single line of text as input"""
    409     def __init__(self,line,lineno=1):
    410         CppTokenizer.__init__(self)
    411         self.line = lineno
    412         self.setLineText(line)
    413 
    414 
    415 class CppLinesTokenizer(CppTokenizer):
    416     """a CppTokenizer derived class that accepts a list of texdt lines as input.
    417        the lines must not have a trailing \n"""
    418     def __init__(self,lines=[],lineno=1):
    419         """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
    420         CppTokenizer.__init__(self)
    421         self.line  = lineno
    422         self.lines = lines
    423         self.index = 0
    424         self.count = len(lines)
    425 
    426         if self.count > 0:
    427             self.fillLineText()
    428         else:
    429             self.eof = True
    430 
    431     def addLine(self,line):
    432         """add a line to a CppLinesTokenizer. this can be done after tokenization
    433            happens"""
    434         if self.count == 0:
    435             self.setLineText(line)
    436             self.index = 1
    437         self.lines.append(line)
    438         self.count += 1
    439         self.eof    = False
    440 
    441     def fillLineText(self):
    442         if self.index < self.count:
    443             self.setLineText(self.lines[self.index])
    444             self.index += 1
    445         else:
    446             self.eof = True
    447 
    448 
    449 class CppFileTokenizer(CppTokenizer):
    450     def __init__(self,file,lineno=1):
    451         CppTokenizer.__init__(self)
    452         self.file = file
    453         self.line = lineno
    454 
    455     def fillLineText(self):
    456         line = self.file.readline()
    457         if len(line) > 0:
    458             if line[-1] == '\n':
    459                 line = line[:-1]
    460             if len(line) > 0 and line[-1] == "\r":
    461                 line = line[:-1]
    462             self.setLineText(line)
    463         else:
    464             self.eof = True
    465 
    466 # Unit testing
    467 #
    468 class CppTokenizerTester:
    469     """a class used to test CppTokenizer classes"""
    470     def __init__(self,tokenizer=None):
    471         self.tokenizer = tokenizer
    472         self.token     = Token()
    473 
    474     def setTokenizer(self,tokenizer):
    475         self.tokenizer = tokenizer
    476 
    477     def expect(self,id):
    478         self.tokenizer.nextToken(self.token)
    479         tokid = self.token.id
    480         if tokid == id:
    481             return
    482         if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
    483             return
    484         raise BadExpectedToken, "###  BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
    485 
    486     def expectToken(self,id,line,col):
    487         self.expect(id)
    488         if self.token.lineno != line:
    489             raise BadExpectedToken, "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
    490         if self.token.colno != col:
    491             raise BadExpectedToken, "###  BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
    492 
    493     def expectTokenVal(self,id,value,line,col):
    494         self.expectToken(id,line,col)
    495         if self.token.value != value:
    496             raise BadExpectedToken, "###  BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
    497 
    498     def expectList(self,list):
    499         for item in list:
    500             self.expect(item)
    501 
    502 def test_CppTokenizer():
    503     tester = CppTokenizerTester()
    504 
    505     tester.setTokenizer( CppLineTokenizer("#an/example  && (01923_xy)") )
    506     tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
    507                        tokRPAREN, tokLN, tokEOF] )
    508 
    509     tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
    510     tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
    511                         tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
    512 
    513     tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
    514     tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
    515 
    516     tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
    517     tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
    518 
    519     tester.setTokenizer( CppLinesTokenizer( ["first second", "  third"] ) )
    520     tester.expectToken( "first", 1, 0 )
    521     tester.expectToken( tokSPACE, 1, 5 )
    522     tester.expectToken( "second", 1, 6 )
    523     tester.expectToken( tokLN, 1, 12 )
    524     tester.expectToken( tokSPACE, 2, 0 )
    525     tester.expectToken( "third", 2, 2 )
    526 
    527     tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
    528     tester.expectList( [ "boo", tokSPACE ] )
    529     tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
    530     tester.expectList( [ tokLN, tokEOF ] )
    531 
    532     tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
    533     tester.expectToken( "an", 1, 0 )
    534     tester.expectToken( tokSPACE, 1, 2 )
    535     tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
    536     tester.expectToken( tokSPACE, 2, 0 )
    537     tester.expectToken( "example", 2, 1 )
    538     tester.expectToken( tokLN, 2, 8 )
    539 
    540     return True
    541 
    542 
    543 #####################################################################################
    544 #####################################################################################
    545 #####                                                                           #####
    546 #####           C P P   E X P R E S S I O N S                                   #####
    547 #####                                                                           #####
    548 #####################################################################################
    549 #####################################################################################
    550 
    551 class CppExpr:
    552     """a class that models the condition of #if directives into
    553         an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
    554         where "op" is a string describing the operation"""
    555 
    556     unaries  = [ "!", "~" ]
    557     binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=", "?", ":" ]
    558     precedences = {
    559         "?": 1, ":": 1,
    560         "||": 2,
    561         "&&": 3,
    562         "|": 4,
    563         "^": 5,
    564         "&": 6,
    565         "==": 7, "!=": 7,
    566         "<": 8, "<=": 8, ">": 8, ">=": 8,
    567         "<<": 9, ">>": 9,
    568         "+": 10, "-": 10,
    569         "*": 11, "/": 11, "%": 11,
    570         "!": 12, "~": 12
    571     }
    572 
    573     re_cpp_constant = re.compile(r"((\d|\w|_)+)")
    574 
    575     def __init__(self, tokens):
    576         """initialize a CppExpr. 'tokens' must be a CppToken list"""
    577         self.tok  = tokens
    578         self.n    = len(tokens)
    579         self.i    = 0
    580         if debugCppExpr:
    581             print "CppExpr: trying to parse %s" % repr(tokens)
    582         self.expr = self.parseExpression(0)
    583         if debugCppExpr:
    584             print "CppExpr: got " + repr(self.expr)
    585         if self.i != self.n:
    586             print 'crap at end of input (%d != %d): %s' % (self.i, self.n, repr(tokens))
    587             raise
    588 
    589 
    590     def throw(self, exception, msg):
    591         if self.i < self.n:
    592             tok = self.tok[self.i]
    593             print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
    594         else:
    595             print "EOF: %s" % msg
    596         raise exception(msg)
    597 
    598 
    599     def skip_spaces(self):
    600         """skip spaces in input token list"""
    601         while self.i < self.n:
    602             t = self.tok[self.i]
    603             if t.id != tokSPACE and t.id != tokLN:
    604                 break
    605             self.i += 1
    606 
    607 
    608     def expectId(self, id):
    609         """check that a given token id is at the current position, then skip over it"""
    610         self.skip_spaces()
    611         if self.i >= self.n or self.tok[self.i].id != id:
    612             self.throw(BadExpectedToken,self.i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[self.i].id))
    613         self.i += 1
    614 
    615 
    616     def expectIdent(self):
    617         self.skip_spaces()
    618         if self.i >= self.n or self.tok[self.i].id != tokIDENT:
    619             self.throw(BadExpectedToken, self.i,"### expecting identifier in expression, got '%s'" % (id, self.tok[self.i].id))
    620         self.i += 1
    621 
    622 
    623     def is_decimal(self):
    624         v = self.tok[self.i].value[:]
    625         while len(v) > 0 and v[-1] in "ULul":
    626             v = v[:-1]
    627         for digit in v:
    628             if not digit.isdigit():
    629                 return None
    630 
    631         self.i += 1
    632         return ("int", string.atoi(v))
    633 
    634 
    635     def is_hexadecimal(self):
    636         v = self.tok[self.i].value[:]
    637         while len(v) > 0 and v[-1] in "ULul":
    638             v = v[:-1]
    639         if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
    640             for digit in v[2:]:
    641                 if not digit in "0123456789abcdefABCDEF":
    642                     return None
    643 
    644             # for a hex expression tuple, the argument
    645             # is the value as an integer
    646             self.i += 1
    647             return ("hex", int(v[2:], 16))
    648 
    649         return None
    650 
    651 
    652     def is_integer(self):
    653         if self.tok[self.i].id != tokNUMBER:
    654             return None
    655 
    656         c = self.is_decimal()
    657         if c: return c
    658 
    659         c = self.is_hexadecimal()
    660         if c: return c
    661 
    662         return None
    663 
    664 
    665     def is_number(self):
    666         t = self.tok[self.i]
    667         if t.id == tokMINUS and self.i+1 < self.n:
    668             self.i += 1
    669             c = self.is_integer()
    670             if c:
    671                 op, val  = c
    672                 return (op, -val)
    673         if t.id == tokPLUS and self.i+1 < self.n:
    674             c = self.is_integer()
    675             if c: return c
    676 
    677         return self.is_integer()
    678 
    679 
    680     def is_defined(self):
    681         t = self.tok[self.i]
    682         if t.id != tokDEFINED:
    683             return None
    684 
    685         # we have the defined keyword, check the rest
    686         self.i += 1
    687         self.skip_spaces()
    688         used_parens = 0
    689         if self.i < self.n and self.tok[self.i].id == tokLPAREN:
    690             used_parens = 1
    691             self.i += 1
    692             self.skip_spaces()
    693 
    694         if self.i >= self.n:
    695             self.throw(CppConstantExpected,i,"### 'defined' must be followed  by macro name or left paren")
    696 
    697         t = self.tok[self.i]
    698         if t.id != tokIDENT:
    699             self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
    700 
    701         self.i += 1
    702         if used_parens:
    703             self.expectId(tokRPAREN)
    704 
    705         return ("defined", t.value)
    706 
    707 
    708     def is_call_or_ident(self):
    709         self.skip_spaces()
    710         if self.i >= self.n:
    711             return None
    712 
    713         t = self.tok[self.i]
    714         if t.id != tokIDENT:
    715             return None
    716 
    717         name = t.value
    718 
    719         self.i += 1
    720         self.skip_spaces()
    721         if self.i >= self.n or self.tok[self.i].id != tokLPAREN:
    722             return ("ident", name)
    723 
    724         params    = []
    725         depth     = 1
    726         self.i += 1
    727         j  = self.i
    728         while self.i < self.n:
    729             id = self.tok[self.i].id
    730             if id == tokLPAREN:
    731                 depth += 1
    732             elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
    733                 while j < self.i and self.tok[j].id == tokSPACE:
    734                     j += 1
    735                 k = self.i
    736                 while k > j and self.tok[k-1].id == tokSPACE:
    737                     k -= 1
    738                 param = self.tok[j:k]
    739                 params.append(param)
    740                 if id == tokRPAREN:
    741                     break
    742                 j = self.i+1
    743             elif id == tokRPAREN:
    744                 depth -= 1
    745             self.i += 1
    746 
    747         if self.i >= self.n:
    748             return None
    749 
    750         self.i += 1
    751         return ("call", (name, params))
    752 
    753 
    754     # Implements the "precedence climbing" algorithm from http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
    755     # The "classic" algorithm would be fine if we were using a tool to generate the parser, but we're not.
    756     # Dijkstra's "shunting yard" algorithm hasn't been necessary yet.
    757     def parseExpression(self, minPrecedence):
    758         self.skip_spaces()
    759         if self.i >= self.n:
    760             return None
    761 
    762         node = self.parsePrimary()
    763         while self.token() != None and self.isBinary(self.token()) and self.precedence(self.token()) >= minPrecedence:
    764             op = self.token()
    765             self.nextToken()
    766             rhs = self.parseExpression(self.precedence(op) + 1)
    767             node = (op.id, node, rhs)
    768 
    769         return node
    770 
    771 
    772     def parsePrimary(self):
    773         op = self.token()
    774         if self.isUnary(op):
    775             self.nextToken()
    776             return (op.id, self.parseExpression(self.precedence(op)))
    777 
    778         primary = None
    779         if op.id == tokLPAREN:
    780             self.nextToken()
    781             primary = self.parseExpression(0)
    782             self.expectId(tokRPAREN)
    783         elif op.id == "?":
    784             self.nextToken()
    785             primary = self.parseExpression(0)
    786             self.expectId(":")
    787         elif op.id == tokNUMBER:
    788             primary = self.is_number()
    789         elif op.id == tokIDENT:
    790             primary = self.is_call_or_ident()
    791         elif op.id == tokDEFINED:
    792             primary = self.is_defined()
    793         else:
    794             self.throw(BadExpectedToken, "didn't expect to see a %s in factor" % (self.tok[self.i].id))
    795 
    796         self.skip_spaces()
    797 
    798         return primary;
    799 
    800 
    801     def isBinary(self, token):
    802         return token.id in self.binaries
    803 
    804 
    805     def isUnary(self, token):
    806         return token.id in self.unaries
    807 
    808 
    809     def precedence(self, token):
    810         return self.precedences.get(token.id)
    811 
    812 
    813     def token(self):
    814         if self.i >= self.n:
    815             return None
    816         return self.tok[self.i]
    817 
    818 
    819     def nextToken(self):
    820         self.i += 1
    821         self.skip_spaces()
    822         if self.i >= self.n:
    823             return None
    824         return self.tok[self.i]
    825 
    826 
    827     def dump_node(self, e):
    828         op = e[0]
    829         line = "(" + op
    830         if op == "int":
    831             line += " %d)" % e[1]
    832         elif op == "hex":
    833             line += " 0x%x)" % e[1]
    834         elif op == "ident":
    835             line += " %s)" % e[1]
    836         elif op == "defined":
    837             line += " %s)" % e[1]
    838         elif op == "call":
    839             arg = e[1]
    840             line += " %s [" % arg[0]
    841             prefix = ""
    842             for param in arg[1]:
    843                 par = ""
    844                 for tok in param:
    845                     par += str(tok)
    846                 line += "%s%s" % (prefix, par)
    847                 prefix = ","
    848             line += "])"
    849         elif op in CppExpr.unaries:
    850             line += " %s)" % self.dump_node(e[1])
    851         elif op in CppExpr.binaries:
    852             line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
    853         else:
    854             line += " ?%s)" % repr(e[1])
    855 
    856         return line
    857 
    858     def __repr__(self):
    859         return self.dump_node(self.expr)
    860 
    861     def source_node(self, e):
    862         op = e[0]
    863         if op == "int":
    864             return "%d" % e[1]
    865         if op == "hex":
    866             return "0x%x" % e[1]
    867         if op == "ident":
    868             # XXX: should try to expand
    869             return e[1]
    870         if op == "defined":
    871             return "defined(%s)" % e[1]
    872 
    873         prec = CppExpr.precedences.get(op,1000)
    874         arg  = e[1]
    875         if op in CppExpr.unaries:
    876             arg_src = self.source_node(arg)
    877             arg_op  = arg[0]
    878             arg_prec = CppExpr.precedences.get(arg[0],1000)
    879             if arg_prec < prec:
    880                 return "!(" + arg_src + ")"
    881             else:
    882                 return "!" + arg_src
    883         if op in CppExpr.binaries:
    884             arg2     = e[2]
    885             arg1_op  = arg[0]
    886             arg2_op  = arg2[0]
    887             arg1_src = self.source_node(arg)
    888             arg2_src = self.source_node(arg2)
    889             if CppExpr.precedences.get(arg1_op,1000) < prec:
    890                 arg1_src = "(%s)" % arg1_src
    891             if CppExpr.precedences.get(arg2_op,1000) < prec:
    892                 arg2_src = "(%s)" % arg2_src
    893 
    894             return "%s %s %s" % (arg1_src, op, arg2_src)
    895         return "???"
    896 
    897     def __str__(self):
    898         return self.source_node(self.expr)
    899 
    900     def int_node(self,e):
    901         if e[0] == "int":
    902             return e[1]
    903         elif e[1] == "hex":
    904             return int(e[1],16)
    905         else:
    906             return None
    907 
    908     def toInt(self):
    909         return self.int_node(self.expr)
    910 
    911     def optimize_node(self, e, macros={}):
    912         op = e[0]
    913         if op == "defined":
    914             op, name = e
    915             if macros.has_key(name):
    916                 if macros[name] == kCppUndefinedMacro:
    917                     return ("int", 0)
    918                 else:
    919                     try:
    920                         value = int(macros[name])
    921                         return ("int", value)
    922                     except:
    923                         return ("defined", macros[name])
    924 
    925             if kernel_remove_config_macros and name.startswith("CONFIG_"):
    926                 return ("int", 0)
    927 
    928             return e
    929 
    930         elif op == "ident":
    931             op, name = e
    932             if macros.has_key(name):
    933                 try:
    934                     value = int(macros[name])
    935                     expanded = ("int", value)
    936                 except:
    937                     expanded = ("ident", macros[name])
    938                 return self.optimize_node(expanded, macros)
    939             return e
    940 
    941         elif op == "!":
    942             op, v = e
    943             v = self.optimize_node(v, macros)
    944             if v[0] == "int":
    945                 if v[1] == 0:
    946                     return ("int", 1)
    947                 else:
    948                     return ("int", 0)
    949             return ('!', v)
    950 
    951         elif op == "&&":
    952             op, l, r = e
    953             l  = self.optimize_node(l, macros)
    954             r  = self.optimize_node(r, macros)
    955             li = self.int_node(l)
    956             ri = self.int_node(r)
    957             if li != None:
    958                 if li == 0:
    959                     return ("int", 0)
    960                 else:
    961                     return r
    962             elif ri != None:
    963                 if ri == 0:
    964                     return ("int", 0)
    965                 else:
    966                     return l
    967             return (op, l, r)
    968 
    969         elif op == "||":
    970             op, l, r = e
    971             l  = self.optimize_node(l, macros)
    972             r  = self.optimize_node(r, macros)
    973             li = self.int_node(l)
    974             ri = self.int_node(r)
    975             if li != None:
    976                 if li == 0:
    977                     return r
    978                 else:
    979                     return ("int", 1)
    980             elif ri != None:
    981                 if ri == 0:
    982                     return l
    983                 else:
    984                     return ("int", 1)
    985             return (op, l, r)
    986 
    987         else:
    988             return e
    989 
    990     def optimize(self,macros={}):
    991         self.expr = self.optimize_node(self.expr, macros)
    992 
    993     def is_equal_node(self,e1,e2):
    994         if e1[0] != e2[0] or len(e1) != len(e2):
    995             return False
    996 
    997         op = e1[0]
    998         if op == "int" or op == "hex" or op == "!" or op == "defined":
    999             return e1[0] == e2[0]
   1000 
   1001         return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
   1002 
   1003     def is_equal(self,other):
   1004         return self.is_equal_node(self.expr,other.expr)
   1005 
   1006 def test_cpp_expr(expr, expected):
   1007     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
   1008     s1 = repr(e)
   1009     if s1 != expected:
   1010         print "[FAIL]: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
   1011         global failure_count
   1012         failure_count += 1
   1013 
   1014 def test_cpp_expr_optim(expr, expected, macros={}):
   1015     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
   1016     e.optimize(macros)
   1017     s1 = repr(e)
   1018     if s1 != expected:
   1019         print "[FAIL]: optimized expression '%s' generates '%s' with macros %s, should be '%s'" % (expr, s1, macros, expected)
   1020         global failure_count
   1021         failure_count += 1
   1022 
   1023 def test_cpp_expr_source(expr, expected):
   1024     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
   1025     s1 = str(e)
   1026     if s1 != expected:
   1027         print "[FAIL]: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
   1028         global failure_count
   1029         failure_count += 1
   1030 
   1031 def test_CppExpr():
   1032     test_cpp_expr("0", "(int 0)")
   1033     test_cpp_expr("1", "(int 1)")
   1034     test_cpp_expr("(0)", "(int 0)")
   1035     test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
   1036     test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
   1037     test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
   1038     test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
   1039     test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
   1040     test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
   1041     test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
   1042     test_cpp_expr("defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))")
   1043     test_cpp_expr("FOO(BAR)", "(call FOO [BAR])")
   1044     test_cpp_expr("A == 1 || defined(B)", "(|| (== (ident A) (int 1)) (defined B))")
   1045 
   1046     test_cpp_expr_optim("0", "(int 0)")
   1047     test_cpp_expr_optim("1", "(int 1)")
   1048     test_cpp_expr_optim("1 && 1", "(int 1)")
   1049     test_cpp_expr_optim("1 && 0", "(int 0)")
   1050     test_cpp_expr_optim("0 && 1", "(int 0)")
   1051     test_cpp_expr_optim("0 && 0", "(int 0)")
   1052     test_cpp_expr_optim("1 || 1", "(int 1)")
   1053     test_cpp_expr_optim("1 || 0", "(int 1)")
   1054     test_cpp_expr_optim("0 || 1", "(int 1)")
   1055     test_cpp_expr_optim("0 || 0", "(int 0)")
   1056     test_cpp_expr_optim("A", "(ident A)")
   1057     test_cpp_expr_optim("A", "(int 1)", { "A": 1 })
   1058     test_cpp_expr_optim("A || B", "(int 1)", { "A": 1 })
   1059     test_cpp_expr_optim("A || B", "(int 1)", { "B": 1 })
   1060     test_cpp_expr_optim("A && B", "(ident B)", { "A": 1 })
   1061     test_cpp_expr_optim("A && B", "(ident A)", { "B": 1 })
   1062     test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
   1063     test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
   1064     test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
   1065     test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
   1066     test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", { "EXAMPLE": "XOWOE" })
   1067     test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro})
   1068     test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
   1069     test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", { "EXAMPLE" : "XOWOE" })
   1070     test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro })
   1071     test_cpp_expr_optim("defined(A) || defined(B)", "(|| (defined A) (defined B))")
   1072     test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", { "A" : "1" })
   1073     test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", { "B" : "1" })
   1074     test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", { "B" : kCppUndefinedMacro })
   1075     test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", { "A" : kCppUndefinedMacro, "B" : kCppUndefinedMacro })
   1076     test_cpp_expr_optim("defined(A) && defined(B)", "(&& (defined A) (defined B))")
   1077     test_cpp_expr_optim("defined(A) && defined(B)", "(defined B)", { "A" : "1" })
   1078     test_cpp_expr_optim("defined(A) && defined(B)", "(defined A)", { "B" : "1" })
   1079     test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", { "B" : kCppUndefinedMacro })
   1080     test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", { "A" : kCppUndefinedMacro })
   1081     test_cpp_expr_optim("A == 1 || defined(B)", "(|| (== (ident A) (int 1)) (defined B))" )
   1082     test_cpp_expr_optim("defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", { "__KERNEL__": kCppUndefinedMacro })
   1083 
   1084     test_cpp_expr_source("0", "0")
   1085     test_cpp_expr_source("1", "1")
   1086     test_cpp_expr_source("1 && 1", "1 && 1")
   1087     test_cpp_expr_source("1 && 0", "1 && 0")
   1088     test_cpp_expr_source("0 && 1", "0 && 1")
   1089     test_cpp_expr_source("0 && 0", "0 && 0")
   1090     test_cpp_expr_source("1 || 1", "1 || 1")
   1091     test_cpp_expr_source("1 || 0", "1 || 0")
   1092     test_cpp_expr_source("0 || 1", "0 || 1")
   1093     test_cpp_expr_source("0 || 0", "0 || 0")
   1094     test_cpp_expr_source("EXAMPLE", "EXAMPLE")
   1095     test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
   1096     test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
   1097     test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
   1098     test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")
   1099 
   1100 
   1101 #####################################################################################
   1102 #####################################################################################
   1103 #####                                                                           #####
   1104 #####          C P P   B L O C K                                                #####
   1105 #####                                                                           #####
   1106 #####################################################################################
   1107 #####################################################################################
   1108 
   1109 class Block:
   1110     """a class used to model a block of input source text. there are two block types:
   1111         - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
   1112         - text blocks, contain the tokens of non-directive blocks
   1113 
   1114        the cpp parser class below will transform an input source file into a list of Block
   1115        objects (grouped in a BlockList object for convenience)"""
   1116 
   1117     def __init__(self,tokens,directive=None,lineno=0):
   1118         """initialize a new block, if 'directive' is None, this is a text block
   1119            NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
   1120                  and '#ifndef MACRO' into '#if !defined(MACRO)'"""
   1121         if directive == "ifdef":
   1122             tok = Token()
   1123             tok.set(tokDEFINED)
   1124             tokens = [ tok ] + tokens
   1125             directive = "if"
   1126 
   1127         elif directive == "ifndef":
   1128             tok1 = Token()
   1129             tok2 = Token()
   1130             tok1.set(tokNOT)
   1131             tok2.set(tokDEFINED)
   1132             tokens = [ tok1, tok2 ] + tokens
   1133             directive = "if"
   1134 
   1135         self.tokens    = tokens
   1136         self.directive = directive
   1137         if lineno > 0:
   1138             self.lineno = lineno
   1139         else:
   1140             self.lineno = self.tokens[0].lineno
   1141 
   1142         if self.isIf():
   1143             self.expr = CppExpr( self.tokens )
   1144 
   1145     def isDirective(self):
   1146         """returns True iff this is a directive block"""
   1147         return self.directive != None
   1148 
   1149     def isConditional(self):
   1150         """returns True iff this is a conditional directive block"""
   1151         return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
   1152 
   1153     def isDefine(self):
   1154         """returns the macro name in a #define directive, or None otherwise"""
   1155         if self.directive != "define":
   1156             return None
   1157 
   1158         return self.tokens[0].value
   1159 
   1160     def isIf(self):
   1161         """returns True iff this is an #if-like directive block"""
   1162         return self.directive in ["if","ifdef","ifndef","elif"]
   1163 
   1164     def isInclude(self):
   1165         """checks whether this is a #include directive. if true, then returns the
   1166            corresponding file name (with brackets or double-qoutes). None otherwise"""
   1167         if self.directive != "include":
   1168             return None
   1169 
   1170         if self.tokens[0].id == tokSTRING:
   1171             # a double-quote include, that's easy
   1172             return self.tokens[0].value
   1173 
   1174         # we only want the bracket part, not any comments or junk after it
   1175         if self.tokens[0].id == "<":
   1176             i   = 0
   1177             tok = self.tokens
   1178             n   = len(tok)
   1179             while i < n and tok[i].id != ">":
   1180                 i += 1
   1181 
   1182             if i >= n:
   1183                 return None
   1184 
   1185             return string.join([ str(x) for x in tok[:i+1] ],"")
   1186 
   1187         else:
   1188             return None
   1189 
   1190     def removeWhiteSpace(self):
   1191         # Remove trailing whitespace and empty lines
   1192         # All whitespace is also contracted to a single space
   1193         if self.directive != None:
   1194             return
   1195 
   1196         tokens = []
   1197         line   = 0     # index of line start
   1198         space  = -1    # index of first space, or -1
   1199         ii = 0
   1200         nn = len(self.tokens)
   1201         while ii < nn:
   1202             tok = self.tokens[ii]
   1203 
   1204             # If we find a space, record its position if this is the first
   1205             # one the line start or the previous character. Don't append
   1206             # anything to tokens array yet though.
   1207             if tok.id == tokSPACE:
   1208                 if space < 0:
   1209                     space = ii
   1210                 ii += 1
   1211                 continue
   1212 
   1213             # If this is a line space, ignore the spaces we found previously
   1214             # on the line, and remove empty lines.
   1215             if tok.id == tokLN:
   1216                 old_line  = line
   1217                 old_space = space
   1218                 ii   += 1
   1219                 line  = ii
   1220                 space = -1
   1221                 if old_space == old_line:  # line only contains spaces
   1222                     continue
   1223                 if ii-1 == old_line:  # line is empty
   1224                     continue
   1225                 tokens.append(tok)
   1226                 continue
   1227 
   1228             # Other token, append any space range if any, converting each
   1229             # one to a single space character, then append the token.
   1230             if space >= 0:
   1231                 jj = space
   1232                 space = -1
   1233                 while jj < ii:
   1234                     tok2 = self.tokens[jj]
   1235                     tok2.value = " "
   1236                     tokens.append(tok2)
   1237                     jj += 1
   1238 
   1239             tokens.append(tok)
   1240             ii += 1
   1241 
   1242         self.tokens = tokens
   1243 
   1244     def writeWithWarning(self,out,warning,left_count,repeat_count):
   1245         # removeWhiteSpace() will sometimes creates non-directive blocks
   1246         # without any tokens. These come from blocks that only contained
   1247         # empty lines and spaces. They should not be printed in the final
   1248         # output, and then should not be counted for this operation.
   1249         #
   1250         if not self.directive and self.tokens == []:
   1251             return left_count
   1252 
   1253         if self.directive:
   1254             out.write(str(self).rstrip() + "\n")
   1255             left_count -= 1
   1256             if left_count == 0:
   1257                 out.write(warning)
   1258                 left_count = repeat_count
   1259 
   1260         else:
   1261             for tok in self.tokens:
   1262                 out.write(str(tok))
   1263                 if tok.id == tokLN:
   1264                     left_count -= 1
   1265                     if left_count == 0:
   1266                         out.write(warning)
   1267                         left_count = repeat_count
   1268 
   1269         return left_count
   1270 
   1271 
   1272     def __repr__(self):
   1273         """generate the representation of a given block"""
   1274         if self.directive:
   1275             result = "#%s " % self.directive
   1276             if self.isIf():
   1277                 result += repr(self.expr)
   1278             else:
   1279                 for tok in self.tokens:
   1280                     result += repr(tok)
   1281         else:
   1282             result = ""
   1283             for tok in self.tokens:
   1284                 result += repr(tok)
   1285 
   1286         return result
   1287 
   1288     def __str__(self):
   1289         """generate the string representation of a given block"""
   1290         if self.directive:
   1291             if self.directive == "if":
   1292                 # small optimization to re-generate #ifdef and #ifndef
   1293                 e = self.expr.expr
   1294                 op = e[0]
   1295                 if op == "defined":
   1296                     result = "#ifdef %s" % e[1]
   1297                 elif op == "!" and e[1][0] == "defined":
   1298                     result = "#ifndef %s" % e[1][1]
   1299                 else:
   1300                     result = "#if " + str(self.expr)
   1301             else:
   1302                 result = "#%s" % self.directive
   1303                 if len(self.tokens):
   1304                     result += " "
   1305                 for tok in self.tokens:
   1306                     result += str(tok)
   1307         else:
   1308             result = ""
   1309             for tok in self.tokens:
   1310                 result += str(tok)
   1311 
   1312         return result
   1313 
   1314 class BlockList:
   1315     """a convenience class used to hold and process a list of blocks returned by
   1316        the cpp parser"""
   1317     def __init__(self,blocks):
   1318         self.blocks = blocks
   1319 
   1320     def __len__(self):
   1321         return len(self.blocks)
   1322 
   1323     def __getitem__(self,n):
   1324         return self.blocks[n]
   1325 
   1326     def __repr__(self):
   1327         return repr(self.blocks)
   1328 
   1329     def __str__(self):
   1330         result = ""
   1331         for b in self.blocks:
   1332             result += str(b)
   1333             if b.isDirective():
   1334                 result = result.rstrip() + '\n'
   1335         return result
   1336 
   1337     def  optimizeIf01(self):
   1338         """remove the code between #if 0 .. #endif in a BlockList"""
   1339         self.blocks = optimize_if01(self.blocks)
   1340 
   1341     def optimizeMacros(self, macros):
   1342         """remove known defined and undefined macros from a BlockList"""
   1343         for b in self.blocks:
   1344             if b.isIf():
   1345                 b.expr.optimize(macros)
   1346 
   1347     def removeMacroDefines(self,macros):
   1348         """remove known macro definitions from a BlockList"""
   1349         self.blocks = remove_macro_defines(self.blocks,macros)
   1350 
   1351     def removeWhiteSpace(self):
   1352         for b in self.blocks:
   1353             b.removeWhiteSpace()
   1354 
   1355     def optimizeAll(self,macros):
   1356         self.optimizeMacros(macros)
   1357         self.optimizeIf01()
   1358         return
   1359 
   1360     def findIncludes(self):
   1361         """return the list of included files in a BlockList"""
   1362         result = []
   1363         for b in self.blocks:
   1364             i = b.isInclude()
   1365             if i:
   1366                 result.append(i)
   1367 
   1368         return result
   1369 
   1370 
   1371     def write(self,out):
   1372         out.write(str(self))
   1373 
   1374     def writeWithWarning(self,out,warning,repeat_count):
   1375         left_count = repeat_count
   1376         for b in self.blocks:
   1377             left_count = b.writeWithWarning(out,warning,left_count,repeat_count)
   1378 
   1379     def removeComments(self):
   1380         for b in self.blocks:
   1381             for tok in b.tokens:
   1382                 if tok.id == tokSPACE:
   1383                     tok.value = " "
   1384 
   1385     def removeVarsAndFuncs(self,knownStatics=set()):
   1386         """remove all extern and static declarations corresponding
   1387            to variable and function declarations. we only accept typedefs
   1388            and enum/structs/union declarations.
   1389 
   1390            however, we keep the definitions corresponding to the set
   1391            of known static inline functions in the set 'knownStatics',
   1392            which is useful for optimized byteorder swap functions and
   1393            stuff like that.
   1394            """
   1395         # state = 0 => normal (i.e. LN + spaces)
   1396         # state = 1 => typedef/struct encountered, ends with ";"
   1397         # state = 2 => var declaration encountered, ends with ";"
   1398         # state = 3 => func declaration encountered, ends with "}"
   1399         state      = 0
   1400         depth      = 0
   1401         blocks2    = []
   1402         skipTokens = False
   1403         for b in self.blocks:
   1404             if b.isDirective():
   1405                 blocks2.append(b)
   1406             else:
   1407                 n     = len(b.tokens)
   1408                 i     = 0
   1409                 if skipTokens:
   1410                     first = n
   1411                 else:
   1412                     first = 0
   1413                 while i < n:
   1414                     tok = b.tokens[i]
   1415                     tokid = tok.id
   1416                     # If we are not looking for the start of a new
   1417                     # type/var/func, then skip over tokens until
   1418                     # we find our terminator, managing the depth of
   1419                     # accolades as we go.
   1420                     if state > 0:
   1421                         terminator = False
   1422                         if tokid == '{':
   1423                             depth += 1
   1424                         elif tokid == '}':
   1425                             if depth > 0:
   1426                                 depth -= 1
   1427                             if (depth == 0) and (state == 3):
   1428                                 terminator = True
   1429                         elif tokid == ';' and depth == 0:
   1430                             terminator = True
   1431 
   1432                         if terminator:
   1433                             # we found the terminator
   1434                             state = 0
   1435                             if skipTokens:
   1436                                 skipTokens = False
   1437                                 first = i+1
   1438 
   1439                         i = i+1
   1440                         continue
   1441 
   1442                     # We are looking for the start of a new type/func/var
   1443                     # ignore whitespace
   1444                     if tokid in [tokLN, tokSPACE]:
   1445                         i = i+1
   1446                         continue
   1447 
   1448                     # Is it a new type definition, then start recording it
   1449                     if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
   1450                         state = 1
   1451                         i     = i+1
   1452                         continue
   1453 
   1454                     # Is it a variable or function definition. If so, first
   1455                     # try to determine which type it is, and also extract
   1456                     # its name.
   1457                     #
   1458                     # We're going to parse the next tokens of the same block
   1459                     # until we find a semi-column or a left parenthesis.
   1460                     #
   1461                     # The semi-column corresponds to a variable definition,
   1462                     # the left-parenthesis to a function definition.
   1463                     #
   1464                     # We also assume that the var/func name is the last
   1465                     # identifier before the terminator.
   1466                     #
   1467                     j = i+1
   1468                     ident = ""
   1469                     while j < n:
   1470                         tokid = b.tokens[j].id
   1471                         if tokid == '(':  # a function declaration
   1472                             state = 3
   1473                             break
   1474                         elif tokid == ';': # a variable declaration
   1475                             state = 2
   1476                             break
   1477                         if tokid == tokIDENT:
   1478                             ident = b.tokens[j].value
   1479                         j += 1
   1480 
   1481                     if j >= n:
   1482                         # This can only happen when the declaration
   1483                         # does not end on the current block (e.g. with
   1484                         # a directive mixed inside it.
   1485                         #
   1486                         # We will treat it as malformed because
   1487                         # it's very hard to recover from this case
   1488                         # without making our parser much more
   1489                         # complex.
   1490                         #
   1491                         #print "### skip unterminated static '%s'" % ident
   1492                         break
   1493 
   1494                     if ident in knownStatics:
   1495                         #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j]))
   1496                         pass
   1497                     else:
   1498                         # We're going to skip the tokens for this declaration
   1499                         #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j]))
   1500                         if i > first:
   1501                             blocks2.append( Block(b.tokens[first:i]))
   1502                         skipTokens = True
   1503                         first      = n
   1504 
   1505                     i = i+1
   1506 
   1507                 if i > first:
   1508                     #print "### final '%s'" % repr(b.tokens[first:i])
   1509                     blocks2.append( Block(b.tokens[first:i]) )
   1510 
   1511         self.blocks = blocks2
   1512 
   1513     def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
   1514         """insert your standard issue disclaimer that this is an
   1515            auto-generated file, etc.."""
   1516         tokens = CppLineTokenizer( disclaimer ).toTokenList()
   1517         tokens = tokens[:-1]  # remove trailing tokLN
   1518         self.blocks = [ Block(tokens) ] + self.blocks
   1519 
   1520     def replaceTokens(self,replacements):
   1521         """replace tokens according to the given dict"""
   1522         for b in self.blocks:
   1523             made_change = False
   1524             if b.isInclude() == None:
   1525                 for tok in b.tokens:
   1526                     if tok.id == tokIDENT:
   1527                         if tok.value in replacements:
   1528                             tok.value = replacements[tok.value]
   1529                             made_change = True
   1530 
   1531             if made_change and b.isIf():
   1532                 # Keep 'expr' in sync with 'tokens'.
   1533                 b.expr = CppExpr(b.tokens)
   1534 
   1535 class BlockParser:
   1536     """a class used to convert an input source file into a BlockList object"""
   1537 
   1538     def __init__(self,tokzer=None):
   1539         """initialize a block parser. the input source is provided through a Tokenizer
   1540            object"""
   1541         self.reset(tokzer)
   1542 
   1543     def reset(self,tokzer):
   1544         self.state  = 1
   1545         self.tokzer = tokzer
   1546 
   1547     def getBlocks(self,tokzer=None):
   1548         """tokenize and parse the input source, return a BlockList object
   1549            NOTE: empty and line-numbering directives are ignored and removed
   1550                  from the result. as a consequence, it is possible to have
   1551                  two successive text blocks in the result"""
   1552         # state 0 => in source code
   1553         # state 1 => in source code, after a LN
   1554         # state 2 => in source code, after LN then some space
   1555         state   = 1
   1556         lastLN  = 0
   1557         current = []
   1558         blocks  = []
   1559 
   1560         if tokzer == None:
   1561             tokzer = self.tokzer
   1562 
   1563         while 1:
   1564             tok = tokzer.getToken()
   1565             if tok.id == tokEOF:
   1566                 break
   1567 
   1568             if tok.id == tokLN:
   1569                 state    = 1
   1570                 current.append(tok)
   1571                 lastLN   = len(current)
   1572 
   1573             elif tok.id == tokSPACE:
   1574                 if state == 1:
   1575                     state = 2
   1576                 current.append(tok)
   1577 
   1578             elif tok.id == "#":
   1579                 if state > 0:
   1580                     # this is the start of a directive
   1581 
   1582                     if lastLN > 0:
   1583                         # record previous tokens as text block
   1584                         block   = Block(current[:lastLN])
   1585                         blocks.append(block)
   1586                         lastLN  = 0
   1587 
   1588                     current = []
   1589 
   1590                     # skip spaces after the #
   1591                     while 1:
   1592                         tok = tokzer.getToken()
   1593                         if tok.id != tokSPACE:
   1594                             break
   1595 
   1596                     if tok.id != tokIDENT:
   1597                         # empty or line-numbering, ignore it
   1598                         if tok.id != tokLN and tok.id != tokEOF:
   1599                             while 1:
   1600                                 tok = tokzer.getToken()
   1601                                 if tok.id == tokLN or tok.id == tokEOF:
   1602                                     break
   1603                         continue
   1604 
   1605                     directive = tok.value
   1606                     lineno    = tok.lineno
   1607 
   1608                     # skip spaces
   1609                     tok = tokzer.getToken()
   1610                     while tok.id == tokSPACE:
   1611                         tok = tokzer.getToken()
   1612 
   1613                     # then record tokens until LN
   1614                     dirtokens = []
   1615                     while tok.id != tokLN and tok.id != tokEOF:
   1616                         dirtokens.append(tok)
   1617                         tok = tokzer.getToken()
   1618 
   1619                     block = Block(dirtokens,directive,lineno)
   1620                     blocks.append(block)
   1621                     state   = 1
   1622 
   1623             else:
   1624                 state = 0
   1625                 current.append(tok)
   1626 
   1627         if len(current) > 0:
   1628             block = Block(current)
   1629             blocks.append(block)
   1630 
   1631         return BlockList(blocks)
   1632 
   1633     def parse(self,tokzer):
   1634         return self.getBlocks( tokzer )
   1635 
   1636     def parseLines(self,lines):
   1637         """parse a list of text lines into a BlockList object"""
   1638         return self.getBlocks( CppLinesTokenizer(lines) )
   1639 
   1640     def parseFile(self,path):
   1641         """parse a file into a BlockList object"""
   1642         file = open(path, "rt")
   1643         result = self.getBlocks( CppFileTokenizer(file) )
   1644         file.close()
   1645         return result
   1646 
   1647 
   1648 def test_block_parsing(lines,expected):
   1649     blocks = BlockParser().parse( CppLinesTokenizer(lines) )
   1650     if len(blocks) != len(expected):
   1651         raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
   1652               % (str(blocks), repr(expected))
   1653     for n in range(len(blocks)):
   1654         if str(blocks[n]) != expected[n]:
   1655             raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
   1656                   % (n, str(blocks[n]), expected[n])
   1657     #for block in blocks:
   1658     #    print block
   1659 
   1660 def test_BlockParser():
   1661     test_block_parsing(["#error hello"],["#error hello"])
   1662     test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
   1663     test_block_parsing([ "foo", "  #  ", "bar" ], [ "foo\n","bar\n" ])
   1664     test_block_parsing(\
   1665         [ "foo", "   #  ", "  #  /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
   1666         [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
   1667 
   1668 
   1669 #####################################################################################
   1670 #####################################################################################
   1671 #####                                                                           #####
   1672 #####        B L O C K   L I S T   O P T I M I Z A T I O N                      #####
   1673 #####                                                                           #####
   1674 #####################################################################################
   1675 #####################################################################################
   1676 
   1677 def  remove_macro_defines( blocks, excludedMacros=set() ):
   1678     """remove macro definitions like #define <macroName>  ...."""
   1679     result = []
   1680     for b in blocks:
   1681         macroName = b.isDefine()
   1682         if macroName == None or not macroName in excludedMacros:
   1683             result.append(b)
   1684 
   1685     return result
   1686 
   1687 def  find_matching_endif( blocks, i ):
   1688     n     = len(blocks)
   1689     depth = 1
   1690     while i < n:
   1691         if blocks[i].isDirective():
   1692             dir = blocks[i].directive
   1693             if dir in [ "if", "ifndef", "ifdef" ]:
   1694                 depth += 1
   1695             elif depth == 1 and dir in [ "else", "elif" ]:
   1696                 return i
   1697             elif dir == "endif":
   1698                 depth -= 1
   1699                 if depth == 0:
   1700                     return i
   1701         i += 1
   1702     return i
   1703 
   1704 def  optimize_if01( blocks ):
   1705     """remove the code between #if 0 .. #endif in a list of CppBlocks"""
   1706     i = 0
   1707     n = len(blocks)
   1708     result = []
   1709     while i < n:
   1710         j = i
   1711         while j < n and not blocks[j].isIf():
   1712             j += 1
   1713         if j > i:
   1714             D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
   1715             result += blocks[i:j]
   1716         if j >= n:
   1717             break
   1718         expr = blocks[j].expr
   1719         r    = expr.toInt()
   1720         if r == None:
   1721             result.append(blocks[j])
   1722             i = j + 1
   1723             continue
   1724 
   1725         if r == 0:
   1726             # if 0 => skip everything until the corresponding #endif
   1727             j = find_matching_endif( blocks, j+1 )
   1728             if j >= n:
   1729                 # unterminated #if 0, finish here
   1730                 break
   1731             dir = blocks[j].directive
   1732             if dir == "endif":
   1733                 D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
   1734                 i = j + 1
   1735             elif dir == "else":
   1736                 # convert 'else' into 'if 1'
   1737                 D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
   1738                 blocks[j].directive = "if"
   1739                 blocks[j].expr      = CppExpr( CppLineTokenizer("1").toTokenList() )
   1740                 i = j
   1741             elif dir == "elif":
   1742                 # convert 'elif' into 'if'
   1743                 D2("convert 'if 0' .. 'elif' into 'if'")
   1744                 blocks[j].directive = "if"
   1745                 i = j
   1746             continue
   1747 
   1748         # if 1 => find corresponding endif and remove/transform them
   1749         k = find_matching_endif( blocks, j+1 )
   1750         if k >= n:
   1751             # unterminated #if 1, finish here
   1752             D2("unterminated 'if 1'")
   1753             result += blocks[j+1:k]
   1754             break
   1755 
   1756         dir = blocks[k].directive
   1757         if dir == "endif":
   1758             D2("convert 'if 1' .. 'endif' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
   1759             result += optimize_if01(blocks[j+1:k])
   1760             i       = k+1
   1761         elif dir == "else":
   1762             # convert 'else' into 'if 0'
   1763             D2("convert 'if 1' .. 'else' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
   1764             result += optimize_if01(blocks[j+1:k])
   1765             blocks[k].directive = "if"
   1766             blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
   1767             i = k
   1768         elif dir == "elif":
   1769             # convert 'elif' into 'if 0'
   1770             D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
   1771             result += optimize_if01(blocks[j+1:k])
   1772             blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
   1773             i = k
   1774     return result
   1775 
   1776 def  test_optimizeAll():
   1777     text = """\
   1778 #if 1
   1779 #define  GOOD_1
   1780 #endif
   1781 #if 0
   1782 #define  BAD_2
   1783 #define  BAD_3
   1784 #endif
   1785 
   1786 #if 1
   1787 #define  GOOD_2
   1788 #else
   1789 #define  BAD_4
   1790 #endif
   1791 
   1792 #if 0
   1793 #define  BAD_5
   1794 #else
   1795 #define  GOOD_3
   1796 #endif
   1797 
   1798 #if defined(__KERNEL__)
   1799 #define BAD_KERNEL
   1800 #endif
   1801 
   1802 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
   1803 #define X
   1804 #endif
   1805 
   1806 #ifndef SIGRTMAX
   1807 #define SIGRTMAX 123
   1808 #endif /* SIGRTMAX */
   1809 
   1810 #if 0
   1811 #if 1
   1812 #define  BAD_6
   1813 #endif
   1814 #endif\
   1815 """
   1816 
   1817     expected = """\
   1818 #define GOOD_1
   1819 
   1820 #define GOOD_2
   1821 
   1822 #define GOOD_3
   1823 
   1824 
   1825 #if !defined(__GLIBC__) || __GLIBC__ < 2
   1826 #define X
   1827 #endif
   1828 
   1829 #ifndef __SIGRTMAX
   1830 #define __SIGRTMAX 123
   1831 #endif
   1832 
   1833 """
   1834 
   1835     out = StringOutput()
   1836     lines = string.split(text, '\n')
   1837     list = BlockParser().parse( CppLinesTokenizer(lines) )
   1838     #D_setlevel(2)
   1839     list.replaceTokens( kernel_token_replacements )
   1840     list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
   1841     list.write(out)
   1842     if out.get() != expected:
   1843         print "[FAIL]: macro optimization failed\n"
   1844         print "<<<< expecting '",
   1845         print expected,
   1846         print "'\n>>>> result '"
   1847         print out.get(),
   1848         print "'\n----"
   1849         global failure_count
   1850         failure_count += 1
   1851 
   1852 
   1853 # -- Always run the unit tests.
   1854 
   1855 def runUnitTests():
   1856     """run all unit tests for this program"""
   1857     test_CppTokenizer()
   1858     test_CppExpr()
   1859     test_optimizeAll()
   1860     test_BlockParser()
   1861 
   1862 failure_count = 0
   1863 runUnitTests()
   1864 if failure_count != 0:
   1865     sys.exit(1)
   1866