Home | History | Annotate | Download | only in clinic
      1 import re
      2 import sys
      3 
      4 def negate(condition):
      5     """
      6     Returns a CPP conditional that is the opposite of the conditional passed in.
      7     """
      8     if condition.startswith('!'):
      9         return condition[1:]
     10     return "!" + condition
     11 
     12 class Monitor:
     13     """
     14     A simple C preprocessor that scans C source and computes, line by line,
     15     what the current C preprocessor #if state is.
     16 
     17     Doesn't handle everything--for example, if you have /* inside a C string,
     18     without a matching */ (also inside a C string), or with a */ inside a C
     19     string but on another line and with preprocessor macros in between...
     20     the parser will get lost.
     21 
     22     Anyway this implementation seems to work well enough for the CPython sources.
     23     """
     24 
     25     is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match
     26 
     27     def __init__(self, filename=None, *, verbose=False):
     28         self.stack = []
     29         self.in_comment = False
     30         self.continuation = None
     31         self.line_number = 0
     32         self.filename = filename
     33         self.verbose = verbose
     34 
     35     def __repr__(self):
     36         return ''.join((
     37             '<Monitor ',
     38             str(id(self)),
     39             " line=", str(self.line_number),
     40             " condition=", repr(self.condition()),
     41             ">"))
     42 
     43     def status(self):
     44         return str(self.line_number).rjust(4) + ": " + self.condition()
     45 
     46     def condition(self):
     47         """
     48         Returns the current preprocessor state, as a single #if condition.
     49         """
     50         return " && ".join(condition for token, condition in self.stack)
     51 
     52     def fail(self, *a):
     53         if self.filename:
     54             filename = " " + self.filename
     55         else:
     56             filename = ''
     57         print("Error at" + filename, "line", self.line_number, ":")
     58         print("   ", ' '.join(str(x) for x in a))
     59         sys.exit(-1)
     60 
     61     def close(self):
     62         if self.stack:
     63             self.fail("Ended file while still in a preprocessor conditional block!")
     64 
     65     def write(self, s):
     66         for line in s.split("\n"):
     67             self.writeline(line)
     68 
     69     def writeline(self, line):
     70         self.line_number += 1
     71         line = line.strip()
     72 
     73         def pop_stack():
     74             if not self.stack:
     75                 self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")
     76             return self.stack.pop()
     77 
     78         if self.continuation:
     79             line = self.continuation + line
     80             self.continuation = None
     81 
     82         if not line:
     83             return
     84 
     85         if line.endswith('\\'):
     86             self.continuation = line[:-1].rstrip() + " "
     87             return
     88 
     89         # we have to ignore preprocessor commands inside comments
     90         #
     91         # we also have to handle this:
     92         #     /* start
     93         #     ...
     94         #     */   /*    <-- tricky!
     95         #     ...
     96         #     */
     97         # and this:
     98         #     /* start
     99         #     ...
    100         #     */   /* also tricky! */
    101         if self.in_comment:
    102             if '*/' in line:
    103                 # snip out the comment and continue
    104                 #
    105                 # GCC allows
    106                 #    /* comment
    107                 #    */ #include <stdio.h>
    108                 # maybe other compilers too?
    109                 _, _, line = line.partition('*/')
    110                 self.in_comment = False
    111 
    112         while True:
    113             if '/*' in line:
    114                 if self.in_comment:
    115                     self.fail("Nested block comment!")
    116 
    117                 before, _, remainder = line.partition('/*')
    118                 comment, comment_ends, after = remainder.partition('*/')
    119                 if comment_ends:
    120                     # snip out the comment
    121                     line = before.rstrip() + ' ' + after.lstrip()
    122                     continue
    123                 # comment continues to eol
    124                 self.in_comment = True
    125                 line = before.rstrip()
    126             break
    127 
    128         # we actually have some // comments
    129         # (but block comments take precedence)
    130         before, line_comment, comment = line.partition('//')
    131         if line_comment:
    132             line = before.rstrip()
    133 
    134         if not line.startswith('#'):
    135             return
    136 
    137         line = line[1:].lstrip()
    138         assert line
    139 
    140         fields = line.split()
    141         token = fields[0].lower()
    142         condition = ' '.join(fields[1:]).strip()
    143 
    144         if_tokens = {'if', 'ifdef', 'ifndef'}
    145         all_tokens = if_tokens | {'elif', 'else', 'endif'}
    146 
    147         if token not in all_tokens:
    148             return
    149 
    150         # cheat a little here, to reuse the implementation of if
    151         if token == 'elif':
    152             pop_stack()
    153             token = 'if'
    154 
    155         if token in if_tokens:
    156             if not condition:
    157                 self.fail("Invalid format for #" + token + " line: no argument!")
    158             if token == 'if':
    159                 if not self.is_a_simple_defined(condition):
    160                     condition = "(" + condition + ")"
    161             else:
    162                 fields = condition.split()
    163                 if len(fields) != 1:
    164                     self.fail("Invalid format for #" + token + " line: should be exactly one argument!")
    165                 symbol = fields[0]
    166                 condition = 'defined(' + symbol + ')'
    167                 if token == 'ifndef':
    168                     condition = '!' + condition
    169 
    170             self.stack.append(("if", condition))
    171             if self.verbose:
    172                 print(self.status())
    173             return
    174 
    175         previous_token, previous_condition = pop_stack()
    176 
    177         if token == 'else':
    178             self.stack.append(('else', negate(previous_condition)))
    179         elif token == 'endif':
    180             pass
    181         if self.verbose:
    182             print(self.status())
    183 
    184 if __name__ == '__main__':
    185     for filename in sys.argv[1:]:
    186         with open(filename, "rt") as f:
    187             cpp = Monitor(filename, verbose=True)
    188             print()
    189             print(filename)
    190             for line_number, line in enumerate(f.read().split('\n'), 1):
    191                 cpp.writeline(line)
    192