1 from test.test_support import verbose, TESTFN 2 import random 3 import os 4 5 # From SF bug #422121: Insecurities in dict comparison. 6 7 # Safety of code doing comparisons has been an historical Python weak spot. 8 # The problem is that comparison of structures written in C *naturally* 9 # wants to hold on to things like the size of the container, or "the 10 # biggest" containee so far, across a traversal of the container; but 11 # code to do containee comparisons can call back into Python and mutate 12 # the container in arbitrary ways while the C loop is in midstream. If the 13 # C code isn't extremely paranoid about digging things out of memory on 14 # each trip, and artificially boosting refcounts for the duration, anything 15 # from infinite loops to OS crashes can result (yes, I use Windows <wink>). 16 # 17 # The other problem is that code designed to provoke a weakness is usually 18 # white-box code, and so catches only the particular vulnerabilities the 19 # author knew to protect against. For example, Python's list.sort() code 20 # went thru many iterations as one "new" vulnerability after another was 21 # discovered. 22 # 23 # So the dict comparison test here uses a black-box approach instead, 24 # generating dicts of various sizes at random, and performing random 25 # mutations on them at random times. This proved very effective, 26 # triggering at least six distinct failure modes the first 20 times I 27 # ran it. Indeed, at the start, the driver never got beyond 6 iterations 28 # before the test died. 29 30 # The dicts are global to make it easy to mutate tham from within functions. 31 dict1 = {} 32 dict2 = {} 33 34 # The current set of keys in dict1 and dict2. These are materialized as 35 # lists to make it easy to pick a dict key at random. 36 dict1keys = [] 37 dict2keys = [] 38 39 # Global flag telling maybe_mutate() whether to *consider* mutating. 40 mutate = 0 41 42 # If global mutate is true, consider mutating a dict. May or may not 43 # mutate a dict even if mutate is true. If it does decide to mutate a 44 # dict, it picks one of {dict1, dict2} at random, and deletes a random 45 # entry from it; or, more rarely, adds a random element. 46 47 def maybe_mutate(): 48 global mutate 49 if not mutate: 50 return 51 if random.random() < 0.5: 52 return 53 54 if random.random() < 0.5: 55 target, keys = dict1, dict1keys 56 else: 57 target, keys = dict2, dict2keys 58 59 if random.random() < 0.2: 60 # Insert a new key. 61 mutate = 0 # disable mutation until key inserted 62 while 1: 63 newkey = Horrid(random.randrange(100)) 64 if newkey not in target: 65 break 66 target[newkey] = Horrid(random.randrange(100)) 67 keys.append(newkey) 68 mutate = 1 69 70 elif keys: 71 # Delete a key at random. 72 mutate = 0 # disable mutation until key deleted 73 i = random.randrange(len(keys)) 74 key = keys[i] 75 del target[key] 76 del keys[i] 77 mutate = 1 78 79 # A horrid class that triggers random mutations of dict1 and dict2 when 80 # instances are compared. 81 82 class Horrid: 83 def __init__(self, i): 84 # Comparison outcomes are determined by the value of i. 85 self.i = i 86 87 # An artificial hashcode is selected at random so that we don't 88 # have any systematic relationship between comparison outcomes 89 # (based on self.i and other.i) and relative position within the 90 # hash vector (based on hashcode). 91 self.hashcode = random.randrange(1000000000) 92 93 def __hash__(self): 94 return 42 95 return self.hashcode 96 97 def __cmp__(self, other): 98 maybe_mutate() # The point of the test. 99 return cmp(self.i, other.i) 100 101 def __eq__(self, other): 102 maybe_mutate() # The point of the test. 103 return self.i == other.i 104 105 def __repr__(self): 106 return "Horrid(%d)" % self.i 107 108 # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, 109 # where i and j are selected at random from the candidates list. 110 # Return d.keys() after filling. 111 112 def fill_dict(d, candidates, numentries): 113 d.clear() 114 for i in xrange(numentries): 115 d[Horrid(random.choice(candidates))] = \ 116 Horrid(random.choice(candidates)) 117 return d.keys() 118 119 # Test one pair of randomly generated dicts, each with n entries. 120 # Note that dict comparison is trivial if they don't have the same number 121 # of entires (then the "shorter" dict is instantly considered to be the 122 # smaller one, without even looking at the entries). 123 124 def test_one(n): 125 global mutate, dict1, dict2, dict1keys, dict2keys 126 127 # Fill the dicts without mutating them. 128 mutate = 0 129 dict1keys = fill_dict(dict1, range(n), n) 130 dict2keys = fill_dict(dict2, range(n), n) 131 132 # Enable mutation, then compare the dicts so long as they have the 133 # same size. 134 mutate = 1 135 if verbose: 136 print "trying w/ lengths", len(dict1), len(dict2), 137 while dict1 and len(dict1) == len(dict2): 138 if verbose: 139 print ".", 140 if random.random() < 0.5: 141 c = cmp(dict1, dict2) 142 else: 143 c = dict1 == dict2 144 if verbose: 145 print 146 147 # Run test_one n times. At the start (before the bugs were fixed), 20 148 # consecutive runs of this test each blew up on or before the sixth time 149 # test_one was run. So n doesn't have to be large to get an interesting 150 # test. 151 # OTOH, calling with large n is also interesting, to ensure that the fixed 152 # code doesn't hold on to refcounts *too* long (in which case memory would 153 # leak). 154 155 def test(n): 156 for i in xrange(n): 157 test_one(random.randrange(1, 100)) 158 159 # See last comment block for clues about good values for n. 160 test(100) 161 162 ########################################################################## 163 # Another segfault bug, distilled by Michael Hudson from a c.l.py post. 164 165 class Child: 166 def __init__(self, parent): 167 self.__dict__['parent'] = parent 168 def __getattr__(self, attr): 169 self.parent.a = 1 170 self.parent.b = 1 171 self.parent.c = 1 172 self.parent.d = 1 173 self.parent.e = 1 174 self.parent.f = 1 175 self.parent.g = 1 176 self.parent.h = 1 177 self.parent.i = 1 178 return getattr(self.parent, attr) 179 180 class Parent: 181 def __init__(self): 182 self.a = Child(self) 183 184 # Hard to say what this will print! May vary from time to time. But 185 # we're specifically trying to test the tp_print slot here, and this is 186 # the clearest way to do it. We print the result to a temp file so that 187 # the expected-output file doesn't need to change. 188 189 f = open(TESTFN, "w") 190 print >> f, Parent().__dict__ 191 f.close() 192 os.unlink(TESTFN) 193 194 ########################################################################## 195 # And another core-dumper from Michael Hudson. 196 197 dict = {} 198 199 # Force dict to malloc its table. 200 for i in range(1, 10): 201 dict[i] = i 202 203 f = open(TESTFN, "w") 204 205 class Machiavelli: 206 def __repr__(self): 207 dict.clear() 208 209 # Michael sez: "doesn't crash without this. don't know why." 210 # Tim sez: "luck of the draw; crashes with or without for me." 211 print >> f 212 213 return repr("machiavelli") 214 215 def __hash__(self): 216 return 0 217 218 dict[Machiavelli()] = Machiavelli() 219 220 print >> f, str(dict) 221 f.close() 222 os.unlink(TESTFN) 223 del f, dict 224 225 226 ########################################################################## 227 # And another core-dumper from Michael Hudson. 228 229 dict = {} 230 231 # let's force dict to malloc its table 232 for i in range(1, 10): 233 dict[i] = i 234 235 class Machiavelli2: 236 def __eq__(self, other): 237 dict.clear() 238 return 1 239 240 def __hash__(self): 241 return 0 242 243 dict[Machiavelli2()] = Machiavelli2() 244 245 try: 246 dict[Machiavelli2()] 247 except KeyError: 248 pass 249 250 del dict 251 252 ########################################################################## 253 # And another core-dumper from Michael Hudson. 254 255 dict = {} 256 257 # let's force dict to malloc its table 258 for i in range(1, 10): 259 dict[i] = i 260 261 class Machiavelli3: 262 def __init__(self, id): 263 self.id = id 264 265 def __eq__(self, other): 266 if self.id == other.id: 267 dict.clear() 268 return 1 269 else: 270 return 0 271 272 def __repr__(self): 273 return "%s(%s)"%(self.__class__.__name__, self.id) 274 275 def __hash__(self): 276 return 0 277 278 dict[Machiavelli3(1)] = Machiavelli3(0) 279 dict[Machiavelli3(2)] = Machiavelli3(0) 280 281 f = open(TESTFN, "w") 282 try: 283 try: 284 print >> f, dict[Machiavelli3(2)] 285 except KeyError: 286 pass 287 finally: 288 f.close() 289 os.unlink(TESTFN) 290 291 del dict 292 del dict1, dict2, dict1keys, dict2keys 293