1 #!/usr/bin/env python 2 3 import os 4 import re 5 import subprocess 6 import sys 7 import tempfile 8 9 ### 10 11 class DeltaAlgorithm(object): 12 def __init__(self): 13 self.cache = set() 14 15 def test(self, changes): 16 abstract 17 18 ### 19 20 def getTestResult(self, changes): 21 # There is no reason to cache successful tests because we will 22 # always reduce the changeset when we see one. 23 24 changeset = frozenset(changes) 25 if changeset in self.cache: 26 return False 27 elif not self.test(changes): 28 self.cache.add(changeset) 29 return False 30 else: 31 return True 32 33 def run(self, changes, force=False): 34 # Make sure the initial test passes, if not then (a) either 35 # the user doesn't expect monotonicity, and we may end up 36 # doing O(N^2) tests, or (b) the test is wrong. Avoid the 37 # O(N^2) case unless user requests it. 38 if not force: 39 if not self.getTestResult(changes): 40 raise ValueError,'Initial test passed to delta fails.' 41 42 # Check empty set first to quickly find poor test functions. 43 if self.getTestResult(set()): 44 return set() 45 else: 46 return self.delta(changes, self.split(changes)) 47 48 def split(self, S): 49 """split(set) -> [sets] 50 51 Partition a set into one or two pieces. 52 """ 53 54 # There are many ways to split, we could do a better job with more 55 # context information (but then the API becomes grosser). 56 L = list(S) 57 mid = len(L)//2 58 if mid==0: 59 return L, 60 else: 61 return L[:mid],L[mid:] 62 63 def delta(self, c, sets): 64 # assert(reduce(set.union, sets, set()) == c) 65 66 # If there is nothing left we can remove, we are done. 67 if len(sets) <= 1: 68 return c 69 70 # Look for a passing subset. 71 res = self.search(c, sets) 72 if res is not None: 73 return res 74 75 # Otherwise, partition sets if possible; if not we are done. 76 refined = sum(map(list, map(self.split, sets)), []) 77 if len(refined) == len(sets): 78 return c 79 80 return self.delta(c, refined) 81 82 def search(self, c, sets): 83 for i,S in enumerate(sets): 84 # If test passes on this subset alone, recurse. 85 if self.getTestResult(S): 86 return self.delta(S, self.split(S)) 87 88 # Otherwise if we have more than two sets, see if test 89 # pases without this subset. 90 if len(sets) > 2: 91 complement = sum(sets[:i] + sets[i+1:],[]) 92 if self.getTestResult(complement): 93 return self.delta(complement, sets[:i] + sets[i+1:]) 94 95 ### 96 97 class Token: 98 def __init__(self, type, data, flags, file, line, column): 99 self.type = type 100 self.data = data 101 self.flags = flags 102 self.file = file 103 self.line = line 104 self.column = column 105 106 kTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", 107 re.DOTALL | re.MULTILINE) 108 109 def getTokens(path): 110 p = subprocess.Popen(['clang','-dump-raw-tokens',path], 111 stdin=subprocess.PIPE, 112 stdout=subprocess.PIPE, 113 stderr=subprocess.PIPE) 114 out,err = p.communicate() 115 116 tokens = [] 117 collect = None 118 for ln in err.split('\n'): 119 # Silly programmers refuse to print in simple machine readable 120 # formats. Whatever. 121 if collect is None: 122 collect = ln 123 else: 124 collect = collect + '\n' + ln 125 if 'Loc=<' in ln and ln.endswith('>'): 126 ln,collect = collect,None 127 tokens.append(Token(*kTokenRE.match(ln).groups())) 128 129 return tokens 130 131 ### 132 133 class TMBDDelta(DeltaAlgorithm): 134 def __init__(self, testProgram, tokenLists, log): 135 def patchName(name, suffix): 136 base,ext = os.path.splitext(name) 137 return base + '.' + suffix + ext 138 super(TMBDDelta, self).__init__() 139 self.testProgram = testProgram 140 self.tokenLists = tokenLists 141 self.tempFiles = [patchName(f,'tmp') 142 for f,_ in self.tokenLists] 143 self.targetFiles = [patchName(f,'ok') 144 for f,_ in self.tokenLists] 145 self.log = log 146 self.numTests = 0 147 148 def writeFiles(self, changes, fileNames): 149 assert len(fileNames) == len(self.tokenLists) 150 byFile = [[] for i in self.tokenLists] 151 for i,j in changes: 152 byFile[i].append(j) 153 154 for i,(file,tokens) in enumerate(self.tokenLists): 155 f = open(fileNames[i],'w') 156 for j in byFile[i]: 157 f.write(tokens[j]) 158 f.close() 159 160 return byFile 161 162 def test(self, changes): 163 self.numTests += 1 164 165 byFile = self.writeFiles(changes, self.tempFiles) 166 167 if self.log: 168 print >>sys.stderr, 'TEST - ', 169 if self.log > 1: 170 for i,(file,_) in enumerate(self.tokenLists): 171 indices = byFile[i] 172 if i: 173 sys.stderr.write('\n ') 174 sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i]))) 175 prev = None 176 for j in byFile[i]: 177 if prev is None or j != prev + 1: 178 if prev: 179 sys.stderr.write('%d][' % prev) 180 sys.stderr.write(str(j)) 181 sys.stderr.write(':') 182 prev = j 183 if byFile[i]: 184 sys.stderr.write(str(byFile[i][-1])) 185 sys.stderr.write('] ') 186 else: 187 print >>sys.stderr, ', '.join(['%s:%d tokens' % (file, len(byFile[i])) 188 for i,(file,_) in enumerate(self.tokenLists)]), 189 190 p = subprocess.Popen([self.testProgram] + self.tempFiles) 191 res = p.wait() == 0 192 193 if res: 194 self.writeFiles(changes, self.targetFiles) 195 196 if self.log: 197 print >>sys.stderr, '=> %s' % res 198 else: 199 if res: 200 print '\nSUCCESS (%d tokens)' % len(changes) 201 else: 202 sys.stderr.write('.') 203 204 return res 205 206 def run(self): 207 res = super(TMBDDelta, self).run([(i,j) 208 for i,(file,tokens) in enumerate(self.tokenLists) 209 for j in range(len(tokens))]) 210 self.writeFiles(res, self.targetFiles) 211 if not self.log: 212 print >>sys.stderr 213 return res 214 215 def tokenBasedMultiDelta(program, files, log): 216 # Read in the lists of tokens. 217 tokenLists = [(file, [t.data for t in getTokens(file)]) 218 for file in files] 219 220 numTokens = sum([len(tokens) for _,tokens in tokenLists]) 221 print "Delta on %s with %d tokens." % (', '.join(files), numTokens) 222 223 tbmd = TMBDDelta(program, tokenLists, log) 224 225 res = tbmd.run() 226 227 print "Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles), 228 len(res), 229 tbmd.numTests) 230 231 def main(): 232 from optparse import OptionParser, OptionGroup 233 parser = OptionParser("%prog <test program> {files+}") 234 parser.add_option("", "--debug", dest="debugLevel", 235 help="set debug level [default %default]", 236 action="store", type=int, default=0) 237 (opts, args) = parser.parse_args() 238 239 if len(args) <= 1: 240 parser.error('Invalid number of arguments.') 241 242 program,files = args[0],args[1:] 243 244 md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) 245 246 if __name__ == '__main__': 247 try: 248 main() 249 except KeyboardInterrupt: 250 print >>sys.stderr,'Interrupted.' 251 os._exit(1) # Avoid freeing our giant cache. 252