Home | History | Annotate | Download | only in tools
      1 # a glorified C pre-processor parser
      2 
      3 import sys, re, string
      4 from utils import *
      5 from defaults import *
      6 
      7 debugTokens             = False
      8 debugDirectiveTokenizer = False
      9 debugLineParsing        = False
     10 debugCppExpr            = False
     11 debugOptimIf01          = False
     12 
     13 #####################################################################################
     14 #####################################################################################
     15 #####                                                                           #####
     16 #####           C P P   T O K E N S                                             #####
     17 #####                                                                           #####
     18 #####################################################################################
     19 #####################################################################################
     20 
     21 # the list of supported C-preprocessor tokens
     22 # plus a couple of C tokens as well
     23 tokEOF       = "\0"
     24 tokLN        = "\n"
     25 tokSTRINGIFY = "#"
     26 tokCONCAT    = "##"
     27 tokLOGICAND  = "&&"
     28 tokLOGICOR   = "||"
     29 tokSHL       = "<<"
     30 tokSHR       = ">>"
     31 tokEQUAL     = "=="
     32 tokNEQUAL    = "!="
     33 tokLT        = "<"
     34 tokLTE       = "<="
     35 tokGT        = ">"
     36 tokGTE       = ">="
     37 tokELLIPSIS  = "..."
     38 tokSPACE     = " "
     39 tokDEFINED   = "defined"
     40 tokLPAREN    = "("
     41 tokRPAREN    = ")"
     42 tokNOT       = "!"
     43 tokPLUS      = "+"
     44 tokMINUS     = "-"
     45 tokMULTIPLY  = "*"
     46 tokDIVIDE    = "/"
     47 tokMODULUS   = "%"
     48 tokBINAND    = "&"
     49 tokBINOR     = "|"
     50 tokBINXOR    = "^"
     51 tokCOMMA     = ","
     52 tokLBRACE    = "{"
     53 tokRBRACE    = "}"
     54 tokARROW     = "->"
     55 tokINCREMENT = "++"
     56 tokDECREMENT = "--"
     57 tokNUMBER    = "<number>"
     58 tokIDENT     = "<ident>"
     59 tokSTRING    = "<string>"
     60 
     61 class Token:
     62     """a simple class to hold information about a given token.
     63        each token has a position in the source code, as well as
     64        an 'id' and a 'value'. the id is a string that identifies
     65        the token's class, while the value is the string of the
     66        original token itself.
     67 
     68        for example, the tokenizer concatenates a series of spaces
     69        and tabs as a single tokSPACE id, whose value if the original
     70        spaces+tabs sequence."""
     71 
     72     def __init__(self):
     73         self.id     = None
     74         self.value  = None
     75         self.lineno = 0
     76         self.colno  = 0
     77 
     78     def set(self,id,val=None):
     79         self.id = id
     80         if val:
     81             self.value = val
     82         else:
     83             self.value = id
     84         return None
     85 
     86     def copyFrom(self,src):
     87         self.id     = src.id
     88         self.value  = src.value
     89         self.lineno = src.lineno
     90         self.colno  = src.colno
     91 
     92     def __repr__(self):
     93         if self.id == tokIDENT:
     94             return "(ident %s)" % self.value
     95         if self.id == tokNUMBER:
     96             return "(number %s)" % self.value
     97         if self.id == tokSTRING:
     98             return "(string '%s')" % self.value
     99         if self.id == tokLN:
    100             return "<LN>"
    101         if self.id == tokEOF:
    102             return "<EOF>"
    103         if self.id == tokSPACE and self.value == "\\":
    104             # this corresponds to a trailing \ that was transformed into a tokSPACE
    105             return "<\\>"
    106 
    107         return self.id
    108 
    109     def __str__(self):
    110         if self.id == tokIDENT:
    111             return self.value
    112         if self.id == tokNUMBER:
    113             return self.value
    114         if self.id == tokSTRING:
    115             return self.value
    116         if self.id == tokEOF:
    117             return "<EOF>"
    118         if self.id == tokSPACE:
    119             if self.value == "\\":  # trailing \
    120                 return "\\\n"
    121             else:
    122                 return self.value
    123 
    124         return self.id
    125 
    126 class BadExpectedToken(Exception):
    127     def __init__(self,msg):
    128         print msg
    129 
    130 #####################################################################################
    131 #####################################################################################
    132 #####                                                                           #####
    133 #####          C P P   T O K E N   C U R S O R                                  #####
    134 #####                                                                           #####
    135 #####################################################################################
    136 #####################################################################################
    137 
    138 class TokenCursor:
    139     """a small class to iterate over a list of Token objects"""
    140     def __init__(self,tokens):
    141         self.tokens = tokens
    142         self.n      = 0
    143         self.count  = len(tokens)
    144 
    145     def set(self,n):
    146         """set the current position"""
    147         if n < 0:
    148             n = 0
    149         if n > self.count:
    150             n = self.count
    151         self.n = n
    152 
    153     def peekId(self):
    154         """retrieve the id of the current token"""
    155         if (self.n >= self.count):
    156             return None
    157         return self.tokens[self.n].id
    158 
    159     def peek(self):
    160         """retrieve the current token. does not change position"""
    161         if (self.n >= self.count):
    162             return None
    163         return self.tokens[self.n]
    164 
    165     def skip(self):
    166         """increase current token position"""
    167         if (self.n < self.count):
    168             self.n += 1
    169 
    170     def skipSpaces(self):
    171         """skip over all space tokens, this includes tokSPACE and tokLN"""
    172         while 1:
    173             tok = self.peekId()
    174             if tok != tokSPACE and tok != tokLN:
    175                 break
    176             self.skip()
    177 
    178     def skipIfId(self,id):
    179         """skip an optional token"""
    180         if self.peekId() == id:
    181             self.skip()
    182 
    183     def expectId(self,id):
    184         """raise an exception if the current token hasn't a given id.
    185            otherwise skip over it"""
    186         tok = self.peek()
    187         if tok.id != id:
    188             raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
    189         self.skip()
    190 
    191     def remain(self):
    192         """return the list of remaining tokens"""
    193         return self.tokens[self.n:]
    194 
    195 
    196 #####################################################################################
    197 #####################################################################################
    198 #####                                                                           #####
    199 #####           C P P   T O K E N I Z E R                                       #####
    200 #####                                                                           #####
    201 #####################################################################################
    202 #####################################################################################
    203 
    204 # list of long symbols, i.e. those that take more than one characters
    205 cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
    206                    tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
    207 
    208 class CppTokenizer:
    209     """an abstract class used to convert some input text into a list
    210        of tokens. real implementations follow and differ in the format
    211        of the input text only"""
    212 
    213     def __init__(self):
    214         """initialize a new CppTokenizer object"""
    215         self.eof  = False  # end of file reached ?
    216         self.text = None   # content of current line, with final \n stripped
    217         self.line = 0      # number of current line
    218         self.pos  = 0      # current character position in current line
    219         self.len  = 0      # length of current line text
    220         self.held = Token()
    221 
    222     def setLineText(self,line):
    223         """set the content of the (next) current line. should be called
    224            by fillLineText() in derived classes"""
    225         self.text = line
    226         self.len  = len(line)
    227         self.pos  = 0
    228 
    229     def fillLineText(self):
    230         """refresh the content of 'line' with a new line of input"""
    231         # to be overriden
    232         self.eof = True
    233 
    234     def markPos(self,tok):
    235         """mark the position of the current token in the source file"""
    236         if self.eof or self.pos > self.len:
    237             tok.lineno = self.line + 1
    238             tok.colno  = 0
    239         else:
    240             tok.lineno = self.line
    241             tok.colno  = self.pos
    242 
    243     def peekChar(self):
    244         """return the current token under the cursor without moving it"""
    245         if self.eof:
    246             return tokEOF
    247 
    248         if self.pos > self.len:
    249             self.pos   = 0
    250             self.line += 1
    251             self.fillLineText()
    252             if self.eof:
    253                 return tokEOF
    254 
    255         if self.pos == self.len:
    256             return tokLN
    257         else:
    258             return self.text[self.pos]
    259 
    260     def peekNChar(self,n):
    261         """try to peek the next n chars on the same line"""
    262         if self.pos + n > self.len:
    263             return None
    264         return self.text[self.pos:self.pos+n]
    265 
    266     def skipChar(self):
    267         """increment the token cursor position"""
    268         if not self.eof:
    269             self.pos += 1
    270 
    271     def skipNChars(self,n):
    272         if self.pos + n <= self.len:
    273             self.pos += n
    274         else:
    275             while n > 0:
    276                 self.skipChar()
    277                 n -= 1
    278 
    279     def nextChar(self):
    280         """retrieve the token at the current cursor position, then skip it"""
    281         result = self.peekChar()
    282         self.skipChar()
    283         return  result
    284 
    285     def getEscape(self):
    286         # try to get all characters after a backslash (\)
    287         result = self.nextChar()
    288         if result == "0":
    289             # octal number ?
    290             num = self.peekNChar(3)
    291             if num != None:
    292                 isOctal = True
    293                 for d in num:
    294                     if not d in "01234567":
    295                         isOctal = False
    296                         break
    297                 if isOctal:
    298                     result += num
    299                     self.skipNChars(3)
    300         elif result == "x" or result == "X":
    301             # hex number ?
    302             num = self.peekNChar(2)
    303             if num != None:
    304                 isHex = True
    305                 for d in num:
    306                     if not d in "012345678abcdefABCDEF":
    307                         isHex = False
    308                         break
    309                 if isHex:
    310                     result += num
    311                     self.skipNChars(2)
    312         elif result == "u" or result == "U":
    313             # unicode char ?
    314             num = self.peekNChar(4)
    315             if num != None:
    316                 isHex = True
    317                 for d in num:
    318                     if not d in "012345678abcdefABCDEF":
    319                         isHex = False
    320                         break
    321                 if isHex:
    322                     result += num
    323                     self.skipNChars(4)
    324 
    325         return result
    326 
    327     def nextRealToken(self,tok):
    328         """return next CPP token, used internally by nextToken()"""
    329         c = self.nextChar()
    330         if c == tokEOF or c == tokLN:
    331             return tok.set(c)
    332 
    333         if c == '/':
    334             c = self.peekChar()
    335             if c == '/':   # C++ comment line
    336                 self.skipChar()
    337                 while 1:
    338                     c = self.nextChar()
    339                     if c == tokEOF or c == tokLN:
    340                         break
    341                 return tok.set(tokLN)
    342             if c == '*':   # C comment start
    343                 self.skipChar()
    344                 value = "/*"
    345                 prev_c = None
    346                 while 1:
    347                     c = self.nextChar()
    348                     if c == tokEOF:
    349                         #print "## EOF after '%s'" % value
    350                         return tok.set(tokEOF,value)
    351                     if c == '/' and prev_c == '*':
    352                         break
    353                     prev_c = c
    354                     value += c
    355 
    356                 value += "/"
    357                 #print "## COMMENT: '%s'" % value
    358                 return tok.set(tokSPACE,value)
    359             c = '/'
    360 
    361         if c.isspace():
    362             while 1:
    363                 c2 = self.peekChar()
    364                 if c2 == tokLN or not c2.isspace():
    365                     break
    366                 c += c2
    367                 self.skipChar()
    368             return tok.set(tokSPACE,c)
    369 
    370         if c == '\\':
    371             if debugTokens:
    372                 print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
    373             if self.peekChar() == tokLN:   # trailing \
    374                 # eat the tokLN
    375                 self.skipChar()
    376                 # we replace a trailing \ by a tokSPACE whose value is
    377                 # simply "\\". this allows us to detect them later when
    378                 # needed.
    379                 return tok.set(tokSPACE,"\\")
    380             else:
    381                 # treat as a single token here ?
    382                 c +=self.getEscape()
    383                 return tok.set(c)
    384 
    385         if c == "'":  # chars
    386             c2 = self.nextChar()
    387             c += c2
    388             if c2 == '\\':
    389                 c += self.getEscape()
    390 
    391             while 1:
    392                 c2 = self.nextChar()
    393                 if c2 == tokEOF:
    394                     break
    395                 c += c2
    396                 if c2 == "'":
    397                     break
    398 
    399             return tok.set(tokSTRING, c)
    400 
    401         if c == '"':  # strings
    402             quote = 0
    403             while 1:
    404                 c2  = self.nextChar()
    405                 if c2 == tokEOF:
    406                     return tok.set(tokSTRING,c)
    407 
    408                 c += c2
    409                 if not quote:
    410                     if c2 == '"':
    411                         return tok.set(tokSTRING,c)
    412                     if c2 == "\\":
    413                         quote = 1
    414                 else:
    415                     quote = 0
    416 
    417         if c >= "0" and c <= "9":  # integers ?
    418             while 1:
    419                 c2 = self.peekChar()
    420                 if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
    421                     break
    422                 c += c2
    423                 self.skipChar()
    424             return tok.set(tokNUMBER,c)
    425 
    426         if c.isalnum() or c == "_":  # identifiers ?
    427             while 1:
    428                 c2 = self.peekChar()
    429                 if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
    430                     break
    431                 c += c2
    432                 self.skipChar()
    433             if c == tokDEFINED:
    434                 return tok.set(tokDEFINED)
    435             else:
    436                 return tok.set(tokIDENT,c)
    437 
    438         # check special symbols
    439         for sk in cppLongSymbols:
    440             if c == sk[0]:
    441                 sklen = len(sk[1:])
    442                 if self.pos + sklen <= self.len and \
    443                    self.text[self.pos:self.pos+sklen] == sk[1:]:
    444                     self.pos += sklen
    445                     return tok.set(sk)
    446 
    447         return tok.set(c)
    448 
    449     def nextToken(self,tok):
    450         """return the next token from the input text. this function
    451            really updates 'tok', and does not return a new one"""
    452         self.markPos(tok)
    453         self.nextRealToken(tok)
    454 
    455     def getToken(self):
    456         tok = Token()
    457         self.nextToken(tok)
    458         if debugTokens:
    459             print "getTokens: %s" % repr(tok)
    460         return tok
    461 
    462     def toTokenList(self):
    463         """convert the input text of a CppTokenizer into a direct
    464            list of token objects. tokEOF is stripped from the result"""
    465         result = []
    466         while 1:
    467             tok = Token()
    468             self.nextToken(tok)
    469             if tok.id == tokEOF:
    470                 break
    471             result.append(tok)
    472         return result
    473 
    474 class CppLineTokenizer(CppTokenizer):
    475     """a CppTokenizer derived class that accepts a single line of text as input"""
    476     def __init__(self,line,lineno=1):
    477         CppTokenizer.__init__(self)
    478         self.line = lineno
    479         self.setLineText(line)
    480 
    481 
    482 class CppLinesTokenizer(CppTokenizer):
    483     """a CppTokenizer derived class that accepts a list of texdt lines as input.
    484        the lines must not have a trailing \n"""
    485     def __init__(self,lines=[],lineno=1):
    486         """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
    487         CppTokenizer.__init__(self)
    488         self.line  = lineno
    489         self.lines = lines
    490         self.index = 0
    491         self.count = len(lines)
    492 
    493         if self.count > 0:
    494             self.fillLineText()
    495         else:
    496             self.eof = True
    497 
    498     def addLine(self,line):
    499         """add a line to a CppLinesTokenizer. this can be done after tokenization
    500            happens"""
    501         if self.count == 0:
    502             self.setLineText(line)
    503             self.index = 1
    504         self.lines.append(line)
    505         self.count += 1
    506         self.eof    = False
    507 
    508     def fillLineText(self):
    509         if self.index < self.count:
    510             self.setLineText(self.lines[self.index])
    511             self.index += 1
    512         else:
    513             self.eof = True
    514 
    515 
    516 class CppFileTokenizer(CppTokenizer):
    517     def __init__(self,file,lineno=1):
    518         CppTokenizer.__init__(self)
    519         self.file = file
    520         self.line = lineno
    521 
    522     def fillLineText(self):
    523         line = self.file.readline()
    524         if len(line) > 0:
    525             if line[-1] == '\n':
    526                 line = line[:-1]
    527             if len(line) > 0 and line[-1] == "\r":
    528                 line = line[:-1]
    529             self.setLineText(line)
    530         else:
    531             self.eof = True
    532 
    533 # Unit testing
    534 #
    535 class CppTokenizerTester:
    536     """a class used to test CppTokenizer classes"""
    537     def __init__(self,tokenizer=None):
    538         self.tokenizer = tokenizer
    539         self.token     = Token()
    540 
    541     def setTokenizer(self,tokenizer):
    542         self.tokenizer = tokenizer
    543 
    544     def expect(self,id):
    545         self.tokenizer.nextToken(self.token)
    546         tokid = self.token.id
    547         if tokid == id:
    548             return
    549         if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
    550             return
    551         raise BadExpectedToken, "###  BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
    552 
    553     def expectToken(self,id,line,col):
    554         self.expect(id)
    555         if self.token.lineno != line:
    556             raise BadExpectedToken, "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
    557         if self.token.colno != col:
    558             raise BadExpectedToken, "###  BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
    559 
    560     def expectTokenVal(self,id,value,line,col):
    561         self.expectToken(id,line,col)
    562         if self.token.value != value:
    563             raise BadExpectedToken, "###  BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
    564 
    565     def expectList(self,list):
    566         for item in list:
    567             self.expect(item)
    568 
    569 def test_CppTokenizer():
    570     print "running CppTokenizer tests"
    571     tester = CppTokenizerTester()
    572 
    573     tester.setTokenizer( CppLineTokenizer("#an/example  && (01923_xy)") )
    574     tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
    575                        tokRPAREN, tokLN, tokEOF] )
    576 
    577     tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
    578     tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
    579                         tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
    580 
    581     tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
    582     tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
    583 
    584     tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
    585     tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
    586 
    587     tester.setTokenizer( CppLinesTokenizer( ["first second", "  third"] ) )
    588     tester.expectToken( "first", 1, 0 )
    589     tester.expectToken( tokSPACE, 1, 5 )
    590     tester.expectToken( "second", 1, 6 )
    591     tester.expectToken( tokLN, 1, 12 )
    592     tester.expectToken( tokSPACE, 2, 0 )
    593     tester.expectToken( "third", 2, 2 )
    594 
    595     tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
    596     tester.expectList( [ "boo", tokSPACE ] )
    597     tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
    598     tester.expectList( [ tokLN, tokEOF ] )
    599 
    600     tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
    601     tester.expectToken( "an", 1, 0 )
    602     tester.expectToken( tokSPACE, 1, 2 )
    603     tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
    604     tester.expectToken( tokSPACE, 2, 0 )
    605     tester.expectToken( "example", 2, 1 )
    606     tester.expectToken( tokLN, 2, 8 )
    607 
    608     return True
    609 
    610 
    611 #####################################################################################
    612 #####################################################################################
    613 #####                                                                           #####
    614 #####           C P P   E X P R E S S I O N S                                   #####
    615 #####                                                                           #####
    616 #####################################################################################
    617 #####################################################################################
    618 
    619 # Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
    620 # op is an "operator" string
    621 
    622 class Expr:
    623     """a class used to model a CPP expression"""
    624     opInteger   = "int"
    625     opIdent     = "ident"
    626     opCall      = "call"
    627     opDefined   = "defined"
    628     opTest      = "?"
    629     opLogicNot  = "!"
    630     opNot       = "~"
    631     opNeg       = "[-]"
    632     opUnaryPlus = "[+]"
    633     opAdd = "+"
    634     opSub = "-"
    635     opMul = "*"
    636     opDiv = "/"
    637     opMod = "%"
    638     opAnd = "&"
    639     opOr  = "|"
    640     opXor = "^"
    641     opLogicAnd = "&&"
    642     opLogicOr  = "||"
    643     opEqual = "=="
    644     opNotEqual = "!="
    645     opLess = "<"
    646     opLessEq = "<="
    647     opGreater = ">"
    648     opGreaterEq = ">="
    649     opShl = "<<"
    650     opShr = ">>"
    651 
    652     unaries  = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
    653     binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
    654                  opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
    655 
    656     precedences = {
    657                     opTest: 0,
    658                     opLogicOr:  1,
    659                     opLogicNot: 2,
    660                     opOr : 3,
    661                     opXor: 4,
    662                     opAnd: 5,
    663                     opEqual: 6, opNotEqual: 6,
    664                     opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
    665                     opShl:8, opShr:8,
    666                     opAdd:9, opSub:9,
    667                     opMul:10, opDiv:10, opMod:10,
    668                     opLogicNot:11,
    669                     opNot: 12,
    670                     }
    671 
    672     def __init__(self,op):
    673         self.op = op
    674 
    675     def __repr__(self):
    676         return "(%s)" % self.op
    677 
    678     def __str__(self):
    679         return "operator(%s)" % self.op
    680 
    681     def precedence(self):
    682         """return the precedence of a given operator"""
    683         return Expr.precedences.get(self.op, 1000)
    684 
    685     def isUnary(self):
    686         return self.op in Expr.unaries
    687 
    688     def isBinary(self):
    689         return self.op in Expr.binaries
    690 
    691     def isDefined(self):
    692         return self.op is opDefined
    693 
    694     def toInt(self):
    695         """return the integer value of a given expression. only valid for integer expressions
    696            will return None otherwise"""
    697         return None
    698 
    699 class IntExpr(Expr):
    700     def __init__(self,value):
    701         Expr.__init__(self,opInteger)
    702         self.arg   = value
    703 
    704     def __repr__(self):
    705         return "(int %s)" % self.arg
    706 
    707     def __str__(self):
    708         return self.arg
    709 
    710     def toInt(self):
    711         s = self.arg  # string value
    712         # get rid of U or L suffixes
    713         while len(s) > 0 and s[-1] in "LUlu":
    714             s = s[:-1]
    715         return string.atoi(s)
    716 
    717 class IdentExpr(Expr):
    718     def __init__(self,name):
    719         Expr.__init__(self,opIdent)
    720         self.name = name
    721 
    722     def __repr__(self):
    723         return "(ident %s)" % self.name
    724 
    725     def __str__(self):
    726         return self.name
    727 
    728 class CallExpr(Expr):
    729     def __init__(self,funcname,params):
    730         Expr.__init__(self,opCall)
    731         self.funcname = funcname
    732         self.params   = params
    733 
    734     def __repr__(self):
    735         result = "(call %s [" % self.funcname
    736         comma  = ""
    737         for param in self.params:
    738             result += "%s%s" % (comma, repr(param))
    739             comma   = ","
    740         result += "])"
    741         return result
    742 
    743     def __str__(self):
    744         result = "%s(" % self.funcname
    745         comma = ""
    746         for param in self.params:
    747             result += "%s%s" % (comma, str(param))
    748             comma = ","
    749 
    750         result += ")"
    751         return result
    752 
    753 class TestExpr(Expr):
    754     def __init__(self,cond,iftrue,iffalse):
    755         Expr.__init__(self,opTest)
    756         self.cond    = cond
    757         self.iftrue  = iftrue
    758         self.iffalse = iffalse
    759 
    760     def __repr__(self):
    761         return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
    762 
    763     def __str__(self):
    764         return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
    765 
    766 class SingleArgExpr(Expr):
    767     def __init__(self,op,arg):
    768         Expr.__init__(self,op)
    769         self.arg = arg
    770 
    771     def __repr__(self):
    772         return "(%s %s)" % (self.op, repr(self.arg))
    773 
    774 class DefinedExpr(SingleArgExpr):
    775     def __init__(self,op,macroname):
    776         SingleArgExpr.__init__(self.opDefined,macroname)
    777 
    778     def __str__(self):
    779         return "defined(%s)" % self.arg
    780 
    781 
    782 class UnaryExpr(SingleArgExpr):
    783     def __init__(self,op,arg,opstr=None):
    784         SingleArgExpr.__init__(self,op,arg)
    785         if not opstr:
    786             opstr = op
    787         self.opstr = opstr
    788 
    789     def __str__(self):
    790         arg_s     = str(self.arg)
    791         arg_prec  = self.arg.precedence()
    792         self_prec = self.precedence()
    793         if arg_prec < self_prec:
    794             return "%s(%s)" % (self.opstr,arg_s)
    795         else:
    796             return "%s%s" % (self.opstr, arg_s)
    797 
    798 class TwoArgExpr(Expr):
    799     def __init__(self,op,arg1,arg2):
    800         Expr.__init__(self,op)
    801         self.arg1 = arg1
    802         self.arg2 = arg2
    803 
    804     def __repr__(self):
    805         return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
    806 
    807 class BinaryExpr(TwoArgExpr):
    808     def __init__(self,op,arg1,arg2,opstr=None):
    809         TwoArgExpr.__init__(self,op,arg1,arg2)
    810         if not opstr:
    811             opstr = op
    812         self.opstr = opstr
    813 
    814     def __str__(self):
    815         arg1_s    = str(self.arg1)
    816         arg2_s    = str(self.arg2)
    817         arg1_prec = self.arg1.precedence()
    818         arg2_prec = self.arg2.precedence()
    819         self_prec = self.precedence()
    820 
    821         result = ""
    822         if arg1_prec < self_prec:
    823             result += "(%s)" % arg1_s
    824         else:
    825             result += arg1_s
    826 
    827         result += " %s " % self.opstr
    828 
    829         if arg2_prec < self_prec:
    830             result += "(%s)" % arg2_s
    831         else:
    832             result += arg2_s
    833 
    834         return result
    835 
    836 #####################################################################################
    837 #####################################################################################
    838 #####                                                                           #####
    839 #####           C P P   E X P R E S S I O N   P A R S E R                       #####
    840 #####                                                                           #####
    841 #####################################################################################
    842 #####################################################################################
    843 
    844 
    845 class ExprParser:
    846     """a class used to convert a list of tokens into a cpp Expr object"""
    847 
    848     re_octal   = re.compile(r"\s*\(0[0-7]+\).*")
    849     re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*")
    850     re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*")
    851 
    852     def __init__(self,tokens):
    853         self.tok = tokens
    854         self.n   = len(self.tok)
    855         self.i   = 0
    856 
    857     def mark(self):
    858         return self.i
    859 
    860     def release(self,pos):
    861         self.i = pos
    862 
    863     def peekId(self):
    864         if self.i < self.n:
    865             return self.tok[self.i].id
    866         return None
    867 
    868     def peek(self):
    869         if self.i < self.n:
    870             return self.tok[self.i]
    871         return None
    872 
    873     def skip(self):
    874         if self.i < self.n:
    875             self.i += 1
    876 
    877     def skipOptional(self,id):
    878         if self.i < self.n and self.tok[self.i].id == id:
    879             self.i += 1
    880 
    881     def skipSpaces(self):
    882         i   = self.i
    883         n   = self.n
    884         tok = self.tok
    885         while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
    886             i += 1
    887         self.i = i
    888 
    889     # all the isXXX functions returns a (expr,nextpos) pair if a match is found
    890     # or None if not
    891 
    892     def is_integer(self):
    893         id = self.tok[self.i].id
    894         c  = id[0]
    895         if c < '0' or c > '9':
    896             return None
    897 
    898         m = ExprParser.re_octal.match(id)
    899         if m:
    900             return (IntExpr(id), m.end(1))
    901 
    902         m = ExprParser.re_decimal.match(id)
    903         if m:
    904             return (IntExpr(id), m.end(1))
    905 
    906         m = ExprParser.re_hexadecimal(id)
    907         if m:
    908             return (IntExpr(id), m.end(1))
    909 
    910         return None
    911 
    912     def is_defined(self):
    913         id = self.tok[self.i].id
    914         if id != "defined":
    915             return None
    916 
    917         pos = self.mark()
    918 
    919         use_paren = 0
    920         if self.peekId() == tokLPAREN:
    921             self.skip()
    922             use_paren = 1
    923 
    924         if self.peekId() != tokIDENT:
    925             self.throw( BadExpectedToken, "identifier expected")
    926 
    927         macroname = self.peek().value
    928         self.skip()
    929         if use_paren:
    930             self.skipSpaces()
    931             if self.peekId() != tokRPAREN:
    932                 self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
    933             self.skip()
    934 
    935         i = self.i
    936         return (DefinedExpr(macroname),i+1)
    937 
    938     def is_call_or_ident(self):
    939         pass
    940 
    941     def parse(self, i):
    942         return None
    943 
    944 #####################################################################################
    945 #####################################################################################
    946 #####                                                                           #####
    947 #####           C P P   E X P R E S S I O N S                                   #####
    948 #####                                                                           #####
    949 #####################################################################################
    950 #####################################################################################
    951 
    952 class CppInvalidExpression(Exception):
    953     """an exception raised when an invalid/unsupported cpp expression is detected"""
    954     pass
    955 
    956 class CppExpr:
    957     """a class that models the condition of #if directives into
    958         an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
    959         where "op" is a string describing the operation"""
    960 
    961     unaries  = [ "!", "~" ]
    962     binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ]
    963     precedences = { "||": 1,
    964                     "&&": 2,
    965                      "|": 3,
    966                      "^": 4,
    967                      "&": 5,
    968                      "==":6, "!=":6,
    969                      "<":7, "<=":7, ">":7, ">=":7,
    970                      "<<":8, ">>":8,
    971                      "+":9, "-":9,
    972                      "*":10, "/":10, "%":10,
    973                      "!":11, "~":12
    974                      }
    975 
    976     def __init__(self, tokens):
    977         """initialize a CppExpr. 'tokens' must be a CppToken list"""
    978         self.tok  = tokens
    979         self.n    = len(tokens)
    980         if debugCppExpr:
    981             print "CppExpr: trying to parse %s" % repr(tokens)
    982         expr      = self.is_expr(0)
    983         if debugCppExpr:
    984             print "CppExpr: got " + repr(expr)
    985         self.expr = expr[0]
    986 
    987     re_cpp_constant = re.compile(r"((\d|\w|_)+)")
    988 
    989     def throw(self,exception,i,msg):
    990         if i < self.n:
    991             tok = self.tok[i]
    992             print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
    993         else:
    994             print "EOF: %s" % msg
    995         raise exception
    996 
    997     def skip_spaces(self,i):
    998         """skip spaces in input token list"""
    999         while i < self.n:
   1000             t = self.tok[i]
   1001             if t.id != tokSPACE and t.id != tokLN:
   1002                 break
   1003             i += 1
   1004         return i
   1005 
   1006     def expectId(self,i,id):
   1007         """check that a given token id is at the current position, then skip over it"""
   1008         i = self.skip_spaces(i)
   1009         if i >= self.n or self.tok[i].id != id:
   1010             self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
   1011         return i+1
   1012 
   1013     def expectIdent(self,i):
   1014         i = self.skip_spaces(i)
   1015         if i >= self.n or self.tok[i].id != tokIDENT:
   1016             self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
   1017         return i+1
   1018 
   1019     # the is_xxxxx function returns either None or a pair (e,nextpos)
   1020     # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
   1021     # the corresponding next position in the input token list
   1022     #
   1023 
   1024     def is_decimal(self,i):
   1025         v = self.tok[i].value[:]
   1026         while len(v) > 0 and v[-1] in "ULul":
   1027             v = v[:-1]
   1028         for digit in v:
   1029             if not digit.isdigit():
   1030                 return None
   1031 
   1032         # for an integer expression tuple, the argument
   1033         # is simply the value as an integer
   1034         val = string.atoi(v)
   1035         return ("int", val), i+1
   1036 
   1037     def is_hexadecimal(self,i):
   1038         v = self.tok[i].value[:]
   1039         while len(v) > 0 and v[-1] in "ULul":
   1040             v = v[:-1]
   1041         if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
   1042             for digit in v[2:]:
   1043                 if not digit in "0123456789abcdefABCDEF":
   1044                     return None
   1045 
   1046             # for an hex expression tuple, the argument
   1047             # is the value as an integer
   1048             val = int(v[2:], 16)
   1049             return ("hex", val), i+1
   1050 
   1051         return None
   1052 
   1053     def is_integer(self,i):
   1054         if self.tok[i].id != tokNUMBER:
   1055             return None
   1056 
   1057         c = self.is_decimal(i)
   1058         if c: return c
   1059 
   1060         c = self.is_hexadecimal(i)
   1061         if c: return c
   1062 
   1063         return None
   1064 
   1065     def is_number(self,i):
   1066         t = self.tok[i]
   1067         if t.id == tokMINUS and i+1 < self.n:
   1068             c = self.is_integer(i+1)
   1069             if c:
   1070                 e, i2 = c
   1071                 op, val  = e
   1072                 return (op, -val), i2
   1073         if t.id == tokPLUS and i+1 < self.n:
   1074             c = self.is_integer(i+1)
   1075             if c: return c
   1076 
   1077         return self.is_integer(i)
   1078 
   1079 
   1080     def is_alnum(self,i):
   1081         """test wether a given token is alpha-numeric"""
   1082         i = self.skip_spaces(i)
   1083         if i >= self.n:
   1084             return None
   1085         t = self.tok[i]
   1086         m = CppExpr.re_cpp_constant.match(t.id)
   1087         if m:
   1088             #print "... alnum '%s'" % m.group(1)
   1089             r = m.group(1)
   1090             return ("ident", r), i+1
   1091         return None
   1092 
   1093     def is_defined(self,i):
   1094         t = self.tok[i]
   1095         if t.id != tokDEFINED:
   1096             return None
   1097 
   1098         # we have the defined keyword, check the rest
   1099         i = self.skip_spaces(i+1)
   1100         use_parens = 0
   1101         if i < self.n and self.tok[i].id == tokLPAREN:
   1102             use_parens = 1
   1103             i = self.skip_spaces(i+1)
   1104 
   1105         if i >= self.n:
   1106             self.throw(CppConstantExpected,i,"### 'defined' must be followed  by macro name or left paren")
   1107 
   1108         t = self.tok[i]
   1109         if t.id != tokIDENT:
   1110             self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
   1111 
   1112         i += 1
   1113         if use_parens:
   1114             i = self.expectId(i,tokRPAREN)
   1115 
   1116         return ("defined",t.value), i
   1117 
   1118 
   1119     def is_call_or_ident(self,i):
   1120         i = self.skip_spaces(i)
   1121         if i >= self.n:
   1122             return None
   1123 
   1124         t = self.tok[i]
   1125         if t.id != tokIDENT:
   1126             return None
   1127 
   1128         name = t.value
   1129 
   1130         i = self.skip_spaces(i+1)
   1131         if i >= self.n or self.tok[i].id != tokLPAREN:
   1132             return ("ident", name), i
   1133 
   1134         params    = []
   1135         depth     = 1
   1136         i += 1
   1137         j  = i
   1138         while i < self.n:
   1139             id = self.tok[i].id
   1140             if id == tokLPAREN:
   1141                 depth += 1
   1142             elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
   1143                 while j < i and self.tok[j].id == tokSPACE:
   1144                     j += 1
   1145                 k = i
   1146                 while k > j and self.tok[k-1].id == tokSPACE:
   1147                     k -= 1
   1148                 param = self.tok[j:k]
   1149                 params.append( param )
   1150                 if id == tokRPAREN:
   1151                     break
   1152                 j = i+1
   1153             elif id == tokRPAREN:
   1154                 depth -= 1
   1155             i += 1
   1156 
   1157         if i >= self.n:
   1158             return None
   1159 
   1160         return ("call", (name, params)), i+1
   1161 
   1162     def is_token(self,i,token):
   1163         i = self.skip_spaces(i)
   1164         if i >= self.n or self.tok[i].id != token:
   1165             return None
   1166         return token, i+1
   1167 
   1168 
   1169     def is_value(self,i):
   1170         t = self.tok[i]
   1171         if t.id == tokSTRING:
   1172             return ("string", t.value), i+1
   1173 
   1174         c = self.is_number(i)
   1175         if c: return c
   1176 
   1177         c = self.is_defined(i)
   1178         if c: return c
   1179 
   1180         c = self.is_call_or_ident(i)
   1181         if c: return c
   1182 
   1183         i = self.skip_spaces(i)
   1184         if i >= self.n or self.tok[i].id != tokLPAREN:
   1185             return None
   1186 
   1187         popcount = 1
   1188         i2       = i+1
   1189         while i2 < self.n:
   1190             t = self.tok[i2]
   1191             if t.id == tokLPAREN:
   1192                 popcount += 1
   1193             elif t.id == tokRPAREN:
   1194                 popcount -= 1
   1195                 if popcount == 0:
   1196                     break
   1197             i2 += 1
   1198 
   1199         if popcount != 0:
   1200             self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
   1201 
   1202         if debugCppExpr:
   1203             print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
   1204         oldcount   = self.n
   1205         self.n     = i2
   1206         c          = self.is_expr(i+1)
   1207         self.n     = oldcount
   1208         if not c:
   1209             self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
   1210 
   1211         e, i = c
   1212         return e, i2+1
   1213 
   1214     def is_unary(self,i):
   1215         i = self.skip_spaces(i)
   1216         if i >= self.n:
   1217             return None
   1218 
   1219         t = self.tok[i]
   1220         if t.id in CppExpr.unaries:
   1221             c = self.is_unary(i+1)
   1222             if not c:
   1223                 self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
   1224             e, i = c
   1225             return (t.id, e), i
   1226 
   1227         return self.is_value(i)
   1228 
   1229     def is_binary(self,i):
   1230         i = self.skip_spaces(i)
   1231         if i >= self.n:
   1232             return None
   1233 
   1234         c = self.is_unary(i)
   1235         if not c:
   1236             return None
   1237 
   1238         e1, i2 = c
   1239         i2 = self.skip_spaces(i2)
   1240         if i2 >= self.n:
   1241             return c
   1242 
   1243         t = self.tok[i2]
   1244         if t.id in CppExpr.binaries:
   1245             c = self.is_binary(i2+1)
   1246             if not c:
   1247                 self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
   1248             e2, i3 = c
   1249             return (t.id, e1, e2), i3
   1250 
   1251         return None
   1252 
   1253     def is_expr(self,i):
   1254         return self.is_binary(i)
   1255 
   1256     def dump_node(self,e):
   1257         op = e[0]
   1258         line = "(" + op
   1259         if op == "int":
   1260             line += " %d)" % e[1]
   1261         elif op == "hex":
   1262             line += " 0x%x)" % e[1]
   1263         elif op == "ident":
   1264             line += " %s)" % e[1]
   1265         elif op == "defined":
   1266             line += " %s)" % e[1]
   1267         elif op == "call":
   1268             arg = e[1]
   1269             line += " %s [" % arg[0]
   1270             prefix = ""
   1271             for param in arg[1]:
   1272                 par = ""
   1273                 for tok in param:
   1274                     par += str(tok)
   1275                 line += "%s%s" % (prefix, par)
   1276                 prefix = ","
   1277             line += "])"
   1278         elif op in CppExpr.unaries:
   1279             line += " %s)" % self.dump_node(e[1])
   1280         elif op in CppExpr.binaries:
   1281             line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
   1282         else:
   1283             line += " ?%s)" % repr(e[1])
   1284 
   1285         return line
   1286 
   1287     def __repr__(self):
   1288         return self.dump_node(self.expr)
   1289 
   1290     def source_node(self,e):
   1291         op = e[0]
   1292         if op == "int":
   1293             return "%d" % e[1]
   1294         if op == "hex":
   1295             return "0x%x" % e[1]
   1296         if op == "ident":
   1297             # XXX: should try to expand
   1298             return e[1]
   1299         if op == "defined":
   1300             return "defined(%s)" % e[1]
   1301 
   1302         prec = CppExpr.precedences.get(op,1000)
   1303         arg  = e[1]
   1304         if op in CppExpr.unaries:
   1305             arg_src = self.source_node(arg)
   1306             arg_op  = arg[0]
   1307             arg_prec = CppExpr.precedences.get(arg[0],1000)
   1308             if arg_prec < prec:
   1309                 return "!(" + arg_src + ")"
   1310             else:
   1311                 return "!" + arg_src
   1312         if op in CppExpr.binaries:
   1313             arg2     = e[2]
   1314             arg1_op  = arg[0]
   1315             arg2_op  = arg2[0]
   1316             arg1_src = self.source_node(arg)
   1317             arg2_src = self.source_node(arg2)
   1318             if CppExpr.precedences.get(arg1_op,1000) < prec:
   1319                 arg1_src = "(%s)" % arg1_src
   1320             if CppExpr.precedences.get(arg2_op,1000) < prec:
   1321                 arg2_src = "(%s)" % arg2_src
   1322 
   1323             return "%s %s %s" % (arg1_src, op, arg2_src)
   1324         return "???"
   1325 
   1326     def __str__(self):
   1327         return self.source_node(self.expr)
   1328 
   1329     def int_node(self,e):
   1330         if e[0] == "int":
   1331             return e[1]
   1332         elif e[1] == "hex":
   1333             return int(e[1],16)
   1334         else:
   1335             return None
   1336 
   1337     def toInt(self):
   1338         return self.int_node(self.expr)
   1339 
   1340     def optimize_node(self,e,macros={}):
   1341         op = e[0]
   1342         if op == "defined":
   1343             name = e[1]
   1344             if macros.has_key(name):
   1345                 if macros[name] == kCppUndefinedMacro:
   1346                     return ("int", 0)
   1347                 else:
   1348                     return ("int", 1)
   1349 
   1350             if kernel_remove_config_macros and name.startswith("CONFIG_"):
   1351                 return ("int", 0)
   1352 
   1353         elif op == "!":
   1354             op, v = e
   1355             v = self.optimize_node(v, macros)
   1356             if v[0] == "int":
   1357                 if v[1] == 0:
   1358                     return ("int", 1)
   1359                 else:
   1360                     return ("int", 0)
   1361 
   1362         elif op == "&&":
   1363             op, l, r = e
   1364             l  = self.optimize_node(l, macros)
   1365             r  = self.optimize_node(r, macros)
   1366             li = self.int_node(l)
   1367             ri = self.int_node(r)
   1368             if li != None:
   1369                 if li == 0:
   1370                     return ("int", 0)
   1371                 else:
   1372                     return r
   1373 
   1374         elif op == "||":
   1375             op, l, r = e
   1376             l  = self.optimize_node(l, macros)
   1377             r  = self.optimize_node(r, macros)
   1378             li = self.int_node(l)
   1379             ri = self.int_node(r)
   1380             if li != None:
   1381                 if li == 0:
   1382                     return r
   1383                 else:
   1384                     return ("int", 1)
   1385             elif ri != None:
   1386                 if ri == 0:
   1387                     return l
   1388                 else:
   1389                     return ("int", 1)
   1390         return e
   1391 
   1392     def optimize(self,macros={}):
   1393         self.expr = self.optimize_node(self.expr,macros)
   1394 
   1395     def removePrefixedNode(self,e,prefix,names):
   1396         op = e[0]
   1397         if op == "defined":
   1398             name = e[1]
   1399             if name.startswith(prefix):
   1400                 if names.has_key[name] and names[name] == "y":
   1401                     return ("int", 1)
   1402                 else:
   1403                     return ("int", 0)
   1404 
   1405         elif op in CppExpr.unaries:
   1406             op, v = e
   1407             v = self.removePrefixedNode(v,prefix,names)
   1408             return (op, v)
   1409         elif op in CppExpr.binaries:
   1410             op, v1, v2 = e
   1411             v1 = self.removePrefixedNode(v1,prefix,names)
   1412             v2 = self.removePrefixedNode(v2,prefix,names)
   1413             return (op, v1, v2)
   1414         elif op == "call":
   1415             func, params = e[1]
   1416             params2 = []
   1417             for param in params:
   1418                 params2.append( self.removePrefixedNode(param,prefix,names) )
   1419             return (op, (func, params2))
   1420 
   1421         return e
   1422 
   1423     def removePrefixed(self,prefix,names={}):
   1424         self.expr = self.removePrefixedNode(self.expr,prefix,names)
   1425 
   1426     def is_equal_node(self,e1,e2):
   1427         if e1[0] != e2[0] or len(e1) != len(e2):
   1428             return False
   1429 
   1430         op = e1[0]
   1431         if op == "int" or op == "hex" or op == "!" or op == "defined":
   1432             return e1[0] == e2[0]
   1433 
   1434         return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
   1435 
   1436     def is_equal(self,other):
   1437         return self.is_equal_node(self.expr,other.expr)
   1438 
   1439 def test_cpp_expr(expr, expected):
   1440     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
   1441     #print repr(e.expr)
   1442     s1 = repr(e)
   1443     if s1 != expected:
   1444         print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
   1445     else:
   1446         #print "OK: expression '%s'" % expr
   1447         pass
   1448 
   1449 def test_cpp_expr_optim(expr, expected, macros={}):
   1450     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
   1451     e.optimize(macros)
   1452 
   1453     s1 = repr(e)
   1454     if s1 != expected:
   1455         print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
   1456     else:
   1457         #print "OK: optmized expression '%s'" % expr
   1458         pass
   1459 
   1460 def test_cpp_expr_source(expr, expected):
   1461     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
   1462     s1 = str(e)
   1463     if s1 != expected:
   1464         print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
   1465     else:
   1466         #print "OK: source expression '%s'" % expr
   1467         pass
   1468 
   1469 def test_CppExpr():
   1470     print "testing CppExpr"
   1471     test_cpp_expr( "0", "(int 0)" )
   1472     test_cpp_expr( "1", "(int 1)" )
   1473     test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
   1474     test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
   1475     test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
   1476     test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
   1477     test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
   1478     test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
   1479     test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
   1480     test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
   1481 
   1482     test_cpp_expr_optim( "0", "(int 0)" )
   1483     test_cpp_expr_optim( "1", "(int 1)" )
   1484     test_cpp_expr_optim( "1 && 1", "(int 1)" )
   1485     test_cpp_expr_optim( "1 && 0", "(int 0)" )
   1486     test_cpp_expr_optim( "0 && 1", "(int 0)" )
   1487     test_cpp_expr_optim( "0 && 0", "(int 0)" )
   1488     test_cpp_expr_optim( "1 || 1", "(int 1)" )
   1489     test_cpp_expr_optim( "1 || 0", "(int 1)" )
   1490     test_cpp_expr_optim( "0 || 1", "(int 1)" )
   1491     test_cpp_expr_optim( "0 || 0", "(int 0)" )
   1492     test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
   1493     test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
   1494     test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
   1495     test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
   1496     test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
   1497     test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
   1498     test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
   1499     test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
   1500     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
   1501     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } )
   1502     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
   1503     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
   1504     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
   1505 
   1506     test_cpp_expr_source( "0", "0" )
   1507     test_cpp_expr_source( "1", "1" )
   1508     test_cpp_expr_source( "1 && 1", "1 && 1" )
   1509     test_cpp_expr_source( "1 && 0", "1 && 0" )
   1510     test_cpp_expr_source( "0 && 1", "0 && 1" )
   1511     test_cpp_expr_source( "0 && 0", "0 && 0" )
   1512     test_cpp_expr_source( "1 || 1", "1 || 1" )
   1513     test_cpp_expr_source( "1 || 0", "1 || 0" )
   1514     test_cpp_expr_source( "0 || 1", "0 || 1" )
   1515     test_cpp_expr_source( "0 || 0", "0 || 0" )
   1516     test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
   1517     test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
   1518     test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
   1519     test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
   1520 
   1521 
   1522 #####################################################################################
   1523 #####################################################################################
   1524 #####                                                                           #####
   1525 #####          C P P   B L O C K                                                #####
   1526 #####                                                                           #####
   1527 #####################################################################################
   1528 #####################################################################################
   1529 
   1530 class Block:
   1531     """a class used to model a block of input source text. there are two block types:
   1532         - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
   1533         - text blocks, contain the tokens of non-directive blocks
   1534 
   1535        the cpp parser class below will transform an input source file into a list of Block
   1536        objects (grouped in a BlockList object for convenience)"""
   1537 
   1538     def __init__(self,tokens,directive=None,lineno=0):
   1539         """initialize a new block, if 'directive' is None, this is a text block
   1540            NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
   1541                  and '#ifndef MACRO' into '#if !defined(MACRO)'"""
   1542         if directive == "ifdef":
   1543             tok = Token()
   1544             tok.set(tokDEFINED)
   1545             tokens = [ tok ] + tokens
   1546             directive = "if"
   1547 
   1548         elif directive == "ifndef":
   1549             tok1 = Token()
   1550             tok2 = Token()
   1551             tok1.set(tokNOT)
   1552             tok2.set(tokDEFINED)
   1553             tokens = [ tok1, tok2 ] + tokens
   1554             directive = "if"
   1555 
   1556         self.tokens    = tokens
   1557         self.directive = directive
   1558         if lineno > 0:
   1559             self.lineno = lineno
   1560         else:
   1561             self.lineno = self.tokens[0].lineno
   1562 
   1563         if self.isIf():
   1564             self.expr = CppExpr( self.tokens )
   1565 
   1566     def isDirective(self):
   1567         """returns True iff this is a directive block"""
   1568         return self.directive != None
   1569 
   1570     def isConditional(self):
   1571         """returns True iff this is a conditional directive block"""
   1572         return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
   1573 
   1574     def isDefine(self):
   1575         """returns the macro name in a #define directive, or None otherwise"""
   1576         if self.directive != "define":
   1577             return None
   1578 
   1579         return self.tokens[0].value
   1580 
   1581     def isIf(self):
   1582         """returns True iff this is an #if-like directive block"""
   1583         return self.directive in ["if","ifdef","ifndef","elif"]
   1584 
   1585     def isInclude(self):
   1586         """checks wether this is a #include directive. if true, then returns the
   1587            corresponding file name (with brackets or double-qoutes). None otherwise"""
   1588         if self.directive != "include":
   1589             return None
   1590 
   1591         #print "iii " + repr(self.tokens)
   1592         if self.tokens[0].id == tokSTRING:
   1593             # a double-quote include, that's easy
   1594             return self.tokens[0].value
   1595 
   1596         # we only want the bracket part, not any comments or junk after it
   1597         if self.tokens[0].id == "<":
   1598             i   = 0
   1599             tok = self.tokens
   1600             n   = len(tok)
   1601             while i < n and tok[i].id != ">":
   1602                 i += 1
   1603 
   1604             if i >= n:
   1605                 return None
   1606 
   1607             return string.join([ str(x) for x in tok[:i+1] ],"")
   1608 
   1609         else:
   1610             return None
   1611 
   1612     def removeWhiteSpace(self):
   1613         # Remove trailing whitespace and empty lines
   1614         # All whitespace is also contracted to a single space
   1615         if self.directive != None:
   1616             return
   1617 
   1618         tokens = []
   1619         line   = 0     # index of line start
   1620         space  = -1    # index of first space, or -1
   1621         ii = 0
   1622         nn = len(self.tokens)
   1623         while ii < nn:
   1624             tok = self.tokens[ii]
   1625 
   1626             # If we find a space, record its position if this is the first
   1627             # one the line start or the previous character. Don't append
   1628             # anything to tokens array yet though.
   1629             if tok.id == tokSPACE:
   1630                 if space < 0:
   1631                     space = ii
   1632                 ii += 1
   1633                 continue
   1634 
   1635             # If this is a line space, ignore the spaces we found previously
   1636             # on the line, and remove empty lines.
   1637             if tok.id == tokLN:
   1638                 old_line  = line
   1639                 old_space = space
   1640                 #print "N line=%d space=%d ii=%d" % (line, space, ii)
   1641                 ii   += 1
   1642                 line  = ii
   1643                 space = -1
   1644                 if old_space == old_line:  # line only contains spaces
   1645                     #print "-s"
   1646                     continue
   1647                 if ii-1 == old_line:  # line is empty
   1648                     #print "-e"
   1649                     continue
   1650                 tokens.append(tok)
   1651                 continue
   1652 
   1653             # Other token, append any space range if any, converting each
   1654             # one to a single space character, then append the token.
   1655             if space >= 0:
   1656                 jj = space
   1657                 space = -1
   1658                 while jj < ii:
   1659                     tok2 = self.tokens[jj]
   1660                     tok2.value = " "
   1661                     tokens.append(tok2)
   1662                     jj += 1
   1663 
   1664             tokens.append(tok)
   1665             ii += 1
   1666 
   1667         self.tokens = tokens
   1668 
   1669     def writeWithWarning(self,out,warning,left_count,repeat_count):
   1670         # removeWhiteSpace() will sometimes creates non-directive blocks
   1671         # without any tokens. These come from blocks that only contained
   1672         # empty lines and spaces. They should not be printed in the final
   1673         # output, and then should not be counted for this operation.
   1674         #
   1675         if not self.directive and self.tokens == []:
   1676             return left_count
   1677 
   1678         if self.directive:
   1679             out.write(str(self) + "\n")
   1680             left_count -= 1
   1681             if left_count == 0:
   1682                 out.write(warning)
   1683                 left_count = repeat_count
   1684 
   1685         else:
   1686             for tok in self.tokens:
   1687                 out.write(str(tok))
   1688                 if tok.id == tokLN:
   1689                     left_count -= 1
   1690                     if left_count == 0:
   1691                         out.write(warning)
   1692                         left_count = repeat_count
   1693 
   1694         return left_count
   1695 
   1696 
   1697     def __repr__(self):
   1698         """generate the representation of a given block"""
   1699         if self.directive:
   1700             result = "#%s " % self.directive
   1701             if self.isIf():
   1702                 result += repr(self.expr)
   1703             else:
   1704                 for tok in self.tokens:
   1705                     result += repr(tok)
   1706         else:
   1707             result = ""
   1708             for tok in self.tokens:
   1709                 result += repr(tok)
   1710 
   1711         return result
   1712 
   1713     def __str__(self):
   1714         """generate the string representation of a given block"""
   1715         if self.directive:
   1716             if self.directive == "if":
   1717                 # small optimization to re-generate #ifdef and #ifndef
   1718                 e = self.expr.expr
   1719                 op = e[0]
   1720                 if op == "defined":
   1721                     result = "#ifdef %s" % e[1]
   1722                 elif op == "!" and e[1][0] == "defined":
   1723                     result = "#ifndef %s" % e[1][1]
   1724                 else:
   1725                     result = "#if " + str(self.expr)
   1726             else:
   1727                 result = "#%s" % self.directive
   1728                 if len(self.tokens):
   1729                     result += " "
   1730                 for tok in self.tokens:
   1731                     result += str(tok)
   1732         else:
   1733             result = ""
   1734             for tok in self.tokens:
   1735                 result += str(tok)
   1736 
   1737         return result
   1738 
   1739 class BlockList:
   1740     """a convenience class used to hold and process a list of blocks returned by
   1741        the cpp parser"""
   1742     def __init__(self,blocks):
   1743         self.blocks = blocks
   1744 
   1745     def __len__(self):
   1746         return len(self.blocks)
   1747 
   1748     def __getitem__(self,n):
   1749         return self.blocks[n]
   1750 
   1751     def __repr__(self):
   1752         return repr(self.blocks)
   1753 
   1754     def __str__(self):
   1755         result = ""
   1756         for b in self.blocks:
   1757             result += str(b)
   1758             if b.isDirective():
   1759                 result += '\n'
   1760         return result
   1761 
   1762     def  optimizeIf01(self):
   1763         """remove the code between #if 0 .. #endif in a BlockList"""
   1764         self.blocks = optimize_if01(self.blocks)
   1765 
   1766     def optimizeMacros(self, macros):
   1767         """remove known defined and undefined macros from a BlockList"""
   1768         for b in self.blocks:
   1769             if b.isIf():
   1770                 b.expr.optimize(macros)
   1771 
   1772     def removeMacroDefines(self,macros):
   1773         """remove known macro definitions from a BlockList"""
   1774         self.blocks = remove_macro_defines(self.blocks,macros)
   1775 
   1776     def removePrefixed(self,prefix,names):
   1777         for b in self.blocks:
   1778             if b.isIf():
   1779                 b.expr.removePrefixed(prefix,names)
   1780 
   1781     def removeWhiteSpace(self):
   1782         for b in self.blocks:
   1783             b.removeWhiteSpace()
   1784 
   1785     def optimizeAll(self,macros):
   1786         self.optimizeMacros(macros)
   1787         self.optimizeIf01()
   1788         return
   1789 
   1790     def findIncludes(self):
   1791         """return the list of included files in a BlockList"""
   1792         result = []
   1793         for b in self.blocks:
   1794             i = b.isInclude()
   1795             if i:
   1796                 result.append(i)
   1797 
   1798         return result
   1799 
   1800 
   1801     def write(self,out):
   1802         out.write(str(self))
   1803 
   1804     def writeWithWarning(self,out,warning,repeat_count):
   1805         left_count = repeat_count
   1806         for b in self.blocks:
   1807             left_count = b.writeWithWarning(out,warning,left_count,repeat_count)
   1808 
   1809     def removeComments(self):
   1810         for b in self.blocks:
   1811             for tok in b.tokens:
   1812                 if tok.id == tokSPACE:
   1813                     tok.value = " "
   1814 
   1815     def removeVarsAndFuncs(self,knownStatics=set()):
   1816         """remove all extern and static declarations corresponding
   1817            to variable and function declarations. we only accept typedefs
   1818            and enum/structs/union declarations.
   1819 
   1820            however, we keep the definitions corresponding to the set
   1821            of known static inline functions in the set 'knownStatics',
   1822            which is useful for optimized byteorder swap functions and
   1823            stuff like that.
   1824            """
   1825         # state = 0 => normal (i.e. LN + spaces)
   1826         # state = 1 => typedef/struct encountered, ends with ";"
   1827         # state = 2 => var declaration encountered, ends with ";"
   1828         # state = 3 => func declaration encountered, ends with "}"
   1829         state      = 0
   1830         depth      = 0
   1831         blocks2    = []
   1832         skipTokens = False
   1833         for b in self.blocks:
   1834             if b.isDirective():
   1835                 blocks2.append(b)
   1836             else:
   1837                 n     = len(b.tokens)
   1838                 i     = 0
   1839                 if skipTokens:
   1840                     first = n
   1841                 else:
   1842                     first = 0
   1843                 while i < n:
   1844                     tok = b.tokens[i]
   1845                     tokid = tok.id
   1846                     # If we are not looking for the start of a new
   1847                     # type/var/func, then skip over tokens until
   1848                     # we find our terminator, managing the depth of
   1849                     # accolades as we go.
   1850                     if state > 0:
   1851                         terminator = False
   1852                         if tokid == '{':
   1853                             depth += 1
   1854                         elif tokid == '}':
   1855                             if depth > 0:
   1856                                 depth -= 1
   1857                             if (depth == 0) and (state == 3):
   1858                                 terminator = True
   1859                         elif tokid == ';' and depth == 0:
   1860                             terminator = True
   1861 
   1862                         if terminator:
   1863                             # we found the terminator
   1864                             state = 0
   1865                             if skipTokens:
   1866                                 skipTokens = False
   1867                                 first = i+1
   1868 
   1869                         i = i+1
   1870                         continue
   1871 
   1872                     # We are looking for the start of a new type/func/var
   1873                     # ignore whitespace
   1874                     if tokid in [tokLN, tokSPACE]:
   1875                         i = i+1
   1876                         continue
   1877 
   1878                     # Is it a new type definition, then start recording it
   1879                     if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
   1880                         #print "$$$ keep type declr" + repr(b.tokens[i:])
   1881                         state = 1
   1882                         i     = i+1
   1883                         continue
   1884 
   1885                     # Is it a variable or function definition. If so, first
   1886                     # try to determine which type it is, and also extract
   1887                     # its name.
   1888                     #
   1889                     # We're going to parse the next tokens of the same block
   1890                     # until we find a semi-column or a left parenthesis.
   1891                     #
   1892                     # The semi-column corresponds to a variable definition,
   1893                     # the left-parenthesis to a function definition.
   1894                     #
   1895                     # We also assume that the var/func name is the last
   1896                     # identifier before the terminator.
   1897                     #
   1898                     j = i+1
   1899                     ident = ""
   1900                     while j < n:
   1901                         tokid = b.tokens[j].id
   1902                         if tokid == '(':  # a function declaration
   1903                             state = 3
   1904                             break
   1905                         elif tokid == ';': # a variable declaration
   1906                             state = 2
   1907                             break
   1908                         if tokid == tokIDENT:
   1909                             ident = b.tokens[j].value
   1910                         j += 1
   1911 
   1912                     if j >= n:
   1913                         # This can only happen when the declaration
   1914                         # does not end on the current block (e.g. with
   1915                         # a directive mixed inside it.
   1916                         #
   1917                         # We will treat it as malformed because
   1918                         # it's very hard to recover from this case
   1919                         # without making our parser much more
   1920                         # complex.
   1921                         #
   1922                         #print "### skip unterminated static '%s'" % ident
   1923                         break
   1924 
   1925                     if ident in knownStatics:
   1926                         #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j]))
   1927                         pass
   1928                     else:
   1929                         # We're going to skip the tokens for this declaration
   1930                         #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j]))
   1931                         if i > first:
   1932                             blocks2.append( Block(b.tokens[first:i]))
   1933                         skipTokens = True
   1934                         first      = n
   1935 
   1936                     i = i+1
   1937 
   1938                 if i > first:
   1939                     #print "### final '%s'" % repr(b.tokens[first:i])
   1940                     blocks2.append( Block(b.tokens[first:i]) )
   1941 
   1942         self.blocks = blocks2
   1943 
   1944     def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
   1945         """insert your standard issue disclaimer that this is an
   1946            auto-generated file, etc.."""
   1947         tokens = CppLineTokenizer( disclaimer ).toTokenList()
   1948         tokens = tokens[:-1]  # remove trailing tokLN
   1949         self.blocks = [ Block(tokens) ] + self.blocks
   1950 
   1951     def replaceTokens(self,replacements=dict()):
   1952         """replace tokens according to the given dict
   1953            """
   1954         for b in self.blocks:
   1955             if not b.isDirective():
   1956                 for tok in b.tokens:
   1957                     if tok.id == tokIDENT:
   1958                         if tok.value in replacements:
   1959                             tok.value = replacements[tok.value]
   1960 
   1961 class BlockParser:
   1962     """a class used to convert an input source file into a BlockList object"""
   1963 
   1964     def __init__(self,tokzer=None):
   1965         """initialize a block parser. the input source is provided through a Tokenizer
   1966            object"""
   1967         self.reset(tokzer)
   1968 
   1969     def reset(self,tokzer):
   1970         self.state  = 1
   1971         self.tokzer = tokzer
   1972 
   1973     def getBlocks(self,tokzer=None):
   1974         """tokenize and parse the input source, return a BlockList object
   1975            NOTE: empty and line-numbering directives are ignored and removed
   1976                  from the result. as a consequence, it is possible to have
   1977                  two successive text blocks in the result"""
   1978         # state 0 => in source code
   1979         # state 1 => in source code, after a LN
   1980         # state 2 => in source code, after LN then some space
   1981         state   = 1
   1982         lastLN  = 0
   1983         current = []
   1984         blocks  = []
   1985 
   1986         if tokzer == None:
   1987             tokzer = self.tokzer
   1988 
   1989         while 1:
   1990             tok = tokzer.getToken()
   1991             if tok.id == tokEOF:
   1992                 break
   1993 
   1994             if tok.id == tokLN:
   1995                 state    = 1
   1996                 current.append(tok)
   1997                 lastLN   = len(current)
   1998 
   1999             elif tok.id == tokSPACE:
   2000                 if state == 1:
   2001                     state = 2
   2002                 current.append(tok)
   2003 
   2004             elif tok.id == "#":
   2005                 if state > 0:
   2006                     # this is the start of a directive
   2007 
   2008                     if lastLN > 0:
   2009                         # record previous tokens as text block
   2010                         block   = Block(current[:lastLN])
   2011                         blocks.append(block)
   2012                         lastLN  = 0
   2013 
   2014                     current = []
   2015 
   2016                     # skip spaces after the #
   2017                     while 1:
   2018                         tok = tokzer.getToken()
   2019                         if tok.id != tokSPACE:
   2020                             break
   2021 
   2022                     if tok.id != tokIDENT:
   2023                         # empty or line-numbering, ignore it
   2024                         if tok.id != tokLN and tok.id != tokEOF:
   2025                             while 1:
   2026                                 tok = tokzer.getToken()
   2027                                 if tok.id == tokLN or tok.id == tokEOF:
   2028                                     break
   2029                         continue
   2030 
   2031                     directive = tok.value
   2032                     lineno    = tok.lineno
   2033 
   2034                     # skip spaces
   2035                     tok = tokzer.getToken()
   2036                     while tok.id == tokSPACE:
   2037                         tok = tokzer.getToken()
   2038 
   2039                     # then record tokens until LN
   2040                     dirtokens = []
   2041                     while tok.id != tokLN and tok.id != tokEOF:
   2042                         dirtokens.append(tok)
   2043                         tok = tokzer.getToken()
   2044 
   2045                     block = Block(dirtokens,directive,lineno)
   2046                     blocks.append(block)
   2047                     state   = 1
   2048 
   2049             else:
   2050                 state = 0
   2051                 current.append(tok)
   2052 
   2053         if len(current) > 0:
   2054             block = Block(current)
   2055             blocks.append(block)
   2056 
   2057         return BlockList(blocks)
   2058 
   2059     def parse(self,tokzer):
   2060         return self.getBlocks( tokzer )
   2061 
   2062     def parseLines(self,lines):
   2063         """parse a list of text lines into a BlockList object"""
   2064         return self.getBlocks( CppLinesTokenizer(lines) )
   2065 
   2066     def parseFile(self,path):
   2067         """parse a file into a BlockList object"""
   2068         file = open(path, "rt")
   2069         result = self.getBlocks( CppFileTokenizer(file) )
   2070         file.close()
   2071         return result
   2072 
   2073 
   2074 def test_block_parsing(lines,expected):
   2075     blocks = BlockParser().parse( CppLinesTokenizer(lines) )
   2076     if len(blocks) != len(expected):
   2077         raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
   2078               % (str(blocks), repr(expected))
   2079     for n in range(len(blocks)):
   2080         if str(blocks[n]) != expected[n]:
   2081             raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
   2082                   % (n, str(blocks[n]), expected[n])
   2083     #for block in blocks:
   2084     #    print block
   2085 
   2086 def test_BlockParser():
   2087     test_block_parsing(["#error hello"],["#error hello"])
   2088     test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
   2089     test_block_parsing([ "foo", "  #  ", "bar" ], [ "foo\n","bar\n" ])
   2090     test_block_parsing(\
   2091         [ "foo", "   #  ", "  #  /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
   2092         [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
   2093 
   2094 
   2095 #####################################################################################
   2096 #####################################################################################
   2097 #####                                                                           #####
   2098 #####        B L O C K   L I S T   O P T I M I Z A T I O N                      #####
   2099 #####                                                                           #####
   2100 #####################################################################################
   2101 #####################################################################################
   2102 
   2103 def  remove_macro_defines( blocks, excludedMacros=set() ):
   2104     """remove macro definitions like #define <macroName>  ...."""
   2105     result = []
   2106     for b in blocks:
   2107         macroName = b.isDefine()
   2108         if macroName == None or not macroName in excludedMacros:
   2109             result.append(b)
   2110 
   2111     return result
   2112 
   2113 def  find_matching_endif( blocks, i ):
   2114     n     = len(blocks)
   2115     depth = 1
   2116     while i < n:
   2117         if blocks[i].isDirective():
   2118             dir = blocks[i].directive
   2119             if dir in [ "if", "ifndef", "ifdef" ]:
   2120                 depth += 1
   2121             elif depth == 1 and dir in [ "else", "elif" ]:
   2122                 return i
   2123             elif dir == "endif":
   2124                 depth -= 1
   2125                 if depth == 0:
   2126                     return i
   2127         i += 1
   2128     return i
   2129 
   2130 def  optimize_if01( blocks ):
   2131     """remove the code between #if 0 .. #endif in a list of CppBlocks"""
   2132     i = 0
   2133     n = len(blocks)
   2134     result = []
   2135     while i < n:
   2136         j = i
   2137         while j < n and not blocks[j].isIf():
   2138             j += 1
   2139         if j > i:
   2140             D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
   2141             result += blocks[i:j]
   2142         if j >= n:
   2143             break
   2144         expr = blocks[j].expr
   2145         r    = expr.toInt()
   2146         if r == None:
   2147             result.append(blocks[j])
   2148             i = j + 1
   2149             continue
   2150 
   2151         if r == 0:
   2152             # if 0 => skip everything until the corresponding #endif
   2153             j = find_matching_endif( blocks, j+1 )
   2154             if j >= n:
   2155                 # unterminated #if 0, finish here
   2156                 break
   2157             dir = blocks[j].directive
   2158             if dir == "endif":
   2159                 D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
   2160                 i = j + 1
   2161             elif dir == "else":
   2162                 # convert 'else' into 'if 1'
   2163                 D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
   2164                 blocks[j].directive = "if"
   2165                 blocks[j].expr      = CppExpr( CppLineTokenizer("1").toTokenList() )
   2166                 i = j
   2167             elif dir == "elif":
   2168                 # convert 'elif' into 'if'
   2169                 D2("convert 'if 0' .. 'elif' into 'if'")
   2170                 blocks[j].directive = "if"
   2171                 i = j
   2172             continue
   2173 
   2174         # if 1 => find corresponding endif and remove/transform them
   2175         k = find_matching_endif( blocks, j+1 )
   2176         if k >= n:
   2177             # unterminated #if 1, finish here
   2178             D2("unterminated 'if 1'")
   2179             result += blocks[j+1:k]
   2180             break
   2181 
   2182         dir = blocks[k].directive
   2183         if dir == "endif":
   2184             D2("convert 'if 1' .. 'endif' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
   2185             result += optimize_if01(blocks[j+1:k])
   2186             i       = k+1
   2187         elif dir == "else":
   2188             # convert 'else' into 'if 0'
   2189             D2("convert 'if 1' .. 'else' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
   2190             result += optimize_if01(blocks[j+1:k])
   2191             blocks[k].directive = "if"
   2192             blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
   2193             i = k
   2194         elif dir == "elif":
   2195             # convert 'elif' into 'if 0'
   2196             D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
   2197             result += optimize_if01(blocks[j+1:k])
   2198             blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
   2199             i = k
   2200     return result
   2201 
   2202 def  test_optimizeAll():
   2203     text = """\
   2204 #if 1
   2205 #define  GOOD_1
   2206 #endif
   2207 #if 0
   2208 #define  BAD_2
   2209 #define  BAD_3
   2210 #endif
   2211 
   2212 #if 1
   2213 #define  GOOD_2
   2214 #else
   2215 #define  BAD_4
   2216 #endif
   2217 
   2218 #if 0
   2219 #define  BAD_5
   2220 #else
   2221 #define  GOOD_3
   2222 #endif
   2223 
   2224 #if 0
   2225 #if 1
   2226 #define  BAD_6
   2227 #endif
   2228 #endif\
   2229 """
   2230 
   2231     expected = """\
   2232 #define GOOD_1
   2233 
   2234 #define GOOD_2
   2235 
   2236 #define GOOD_3
   2237 
   2238 """
   2239 
   2240     print "running test_BlockList.optimizeAll"
   2241     out = StringOutput()
   2242     lines = string.split(text, '\n')
   2243     list = BlockParser().parse( CppLinesTokenizer(lines) )
   2244     #D_setlevel(2)
   2245     list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
   2246     #print repr(list)
   2247     list.write(out)
   2248     if out.get() != expected:
   2249         print "KO: macro optimization failed\n"
   2250         print "<<<< expecting '",
   2251         print expected,
   2252         print "'\n>>>> result '"
   2253         print out.get(),
   2254         print "'\n----"
   2255 
   2256 
   2257 #####################################################################################
   2258 #####################################################################################
   2259 #####                                                                           #####
   2260 #####                                                                           #####
   2261 #####                                                                           #####
   2262 #####################################################################################
   2263 #####################################################################################
   2264 
   2265 def runUnitTests():
   2266     """run all unit tests for this program"""
   2267     print "running unit tests"
   2268     test_CppTokenizer()
   2269     test_CppExpr()
   2270     test_optimizeAll()
   2271     test_BlockParser()
   2272     print "OK"
   2273 
   2274 if __name__ == "__main__":
   2275     runUnitTests()
   2276