Home | History | Annotate | Download | only in test
      1 from test.test_support import verbose, TESTFN
      2 import random
      3 import os
      4 
      5 # From SF bug #422121:  Insecurities in dict comparison.
      6 
      7 # Safety of code doing comparisons has been an historical Python weak spot.
      8 # The problem is that comparison of structures written in C *naturally*
      9 # wants to hold on to things like the size of the container, or "the
     10 # biggest" containee so far, across a traversal of the container; but
     11 # code to do containee comparisons can call back into Python and mutate
     12 # the container in arbitrary ways while the C loop is in midstream.  If the
     13 # C code isn't extremely paranoid about digging things out of memory on
     14 # each trip, and artificially boosting refcounts for the duration, anything
     15 # from infinite loops to OS crashes can result (yes, I use Windows <wink>).
     16 #
     17 # The other problem is that code designed to provoke a weakness is usually
     18 # white-box code, and so catches only the particular vulnerabilities the
     19 # author knew to protect against.  For example, Python's list.sort() code
     20 # went thru many iterations as one "new" vulnerability after another was
     21 # discovered.
     22 #
     23 # So the dict comparison test here uses a black-box approach instead,
     24 # generating dicts of various sizes at random, and performing random
     25 # mutations on them at random times.  This proved very effective,
     26 # triggering at least six distinct failure modes the first 20 times I
     27 # ran it.  Indeed, at the start, the driver never got beyond 6 iterations
     28 # before the test died.
     29 
     30 # The dicts are global to make it easy to mutate tham from within functions.
     31 dict1 = {}
     32 dict2 = {}
     33 
     34 # The current set of keys in dict1 and dict2.  These are materialized as
     35 # lists to make it easy to pick a dict key at random.
     36 dict1keys = []
     37 dict2keys = []
     38 
     39 # Global flag telling maybe_mutate() whether to *consider* mutating.
     40 mutate = 0
     41 
     42 # If global mutate is true, consider mutating a dict.  May or may not
     43 # mutate a dict even if mutate is true.  If it does decide to mutate a
     44 # dict, it picks one of {dict1, dict2} at random, and deletes a random
     45 # entry from it; or, more rarely, adds a random element.
     46 
     47 def maybe_mutate():
     48     global mutate
     49     if not mutate:
     50         return
     51     if random.random() < 0.5:
     52         return
     53 
     54     if random.random() < 0.5:
     55         target, keys = dict1, dict1keys
     56     else:
     57         target, keys = dict2, dict2keys
     58 
     59     if random.random() < 0.2:
     60         # Insert a new key.
     61         mutate = 0   # disable mutation until key inserted
     62         while 1:
     63             newkey = Horrid(random.randrange(100))
     64             if newkey not in target:
     65                 break
     66         target[newkey] = Horrid(random.randrange(100))
     67         keys.append(newkey)
     68         mutate = 1
     69 
     70     elif keys:
     71         # Delete a key at random.
     72         mutate = 0   # disable mutation until key deleted
     73         i = random.randrange(len(keys))
     74         key = keys[i]
     75         del target[key]
     76         del keys[i]
     77         mutate = 1
     78 
     79 # A horrid class that triggers random mutations of dict1 and dict2 when
     80 # instances are compared.
     81 
     82 class Horrid:
     83     def __init__(self, i):
     84         # Comparison outcomes are determined by the value of i.
     85         self.i = i
     86 
     87         # An artificial hashcode is selected at random so that we don't
     88         # have any systematic relationship between comparison outcomes
     89         # (based on self.i and other.i) and relative position within the
     90         # hash vector (based on hashcode).
     91         self.hashcode = random.randrange(1000000000)
     92 
     93     def __hash__(self):
     94         return 42
     95         return self.hashcode
     96 
     97     def __cmp__(self, other):
     98         maybe_mutate()   # The point of the test.
     99         return cmp(self.i, other.i)
    100 
    101     def __eq__(self, other):
    102         maybe_mutate()   # The point of the test.
    103         return self.i == other.i
    104 
    105     def __repr__(self):
    106         return "Horrid(%d)" % self.i
    107 
    108 # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs,
    109 # where i and j are selected at random from the candidates list.
    110 # Return d.keys() after filling.
    111 
    112 def fill_dict(d, candidates, numentries):
    113     d.clear()
    114     for i in xrange(numentries):
    115         d[Horrid(random.choice(candidates))] = \
    116             Horrid(random.choice(candidates))
    117     return d.keys()
    118 
    119 # Test one pair of randomly generated dicts, each with n entries.
    120 # Note that dict comparison is trivial if they don't have the same number
    121 # of entires (then the "shorter" dict is instantly considered to be the
    122 # smaller one, without even looking at the entries).
    123 
    124 def test_one(n):
    125     global mutate, dict1, dict2, dict1keys, dict2keys
    126 
    127     # Fill the dicts without mutating them.
    128     mutate = 0
    129     dict1keys = fill_dict(dict1, range(n), n)
    130     dict2keys = fill_dict(dict2, range(n), n)
    131 
    132     # Enable mutation, then compare the dicts so long as they have the
    133     # same size.
    134     mutate = 1
    135     if verbose:
    136         print "trying w/ lengths", len(dict1), len(dict2),
    137     while dict1 and len(dict1) == len(dict2):
    138         if verbose:
    139             print ".",
    140         if random.random() < 0.5:
    141             c = cmp(dict1, dict2)
    142         else:
    143             c = dict1 == dict2
    144     if verbose:
    145         print
    146 
    147 # Run test_one n times.  At the start (before the bugs were fixed), 20
    148 # consecutive runs of this test each blew up on or before the sixth time
    149 # test_one was run.  So n doesn't have to be large to get an interesting
    150 # test.
    151 # OTOH, calling with large n is also interesting, to ensure that the fixed
    152 # code doesn't hold on to refcounts *too* long (in which case memory would
    153 # leak).
    154 
    155 def test(n):
    156     for i in xrange(n):
    157         test_one(random.randrange(1, 100))
    158 
    159 # See last comment block for clues about good values for n.
    160 test(100)
    161 
    162 ##########################################################################
    163 # Another segfault bug, distilled by Michael Hudson from a c.l.py post.
    164 
    165 class Child:
    166     def __init__(self, parent):
    167         self.__dict__['parent'] = parent
    168     def __getattr__(self, attr):
    169         self.parent.a = 1
    170         self.parent.b = 1
    171         self.parent.c = 1
    172         self.parent.d = 1
    173         self.parent.e = 1
    174         self.parent.f = 1
    175         self.parent.g = 1
    176         self.parent.h = 1
    177         self.parent.i = 1
    178         return getattr(self.parent, attr)
    179 
    180 class Parent:
    181     def __init__(self):
    182         self.a = Child(self)
    183 
    184 # Hard to say what this will print!  May vary from time to time.  But
    185 # we're specifically trying to test the tp_print slot here, and this is
    186 # the clearest way to do it.  We print the result to a temp file so that
    187 # the expected-output file doesn't need to change.
    188 
    189 f = open(TESTFN, "w")
    190 print >> f, Parent().__dict__
    191 f.close()
    192 os.unlink(TESTFN)
    193 
    194 ##########################################################################
    195 # And another core-dumper from Michael Hudson.
    196 
    197 dict = {}
    198 
    199 # Force dict to malloc its table.
    200 for i in range(1, 10):
    201     dict[i] = i
    202 
    203 f = open(TESTFN, "w")
    204 
    205 class Machiavelli:
    206     def __repr__(self):
    207         dict.clear()
    208 
    209         # Michael sez:  "doesn't crash without this.  don't know why."
    210         # Tim sez:  "luck of the draw; crashes with or without for me."
    211         print >> f
    212 
    213         return repr("machiavelli")
    214 
    215     def __hash__(self):
    216         return 0
    217 
    218 dict[Machiavelli()] = Machiavelli()
    219 
    220 print >> f, str(dict)
    221 f.close()
    222 os.unlink(TESTFN)
    223 del f, dict
    224 
    225 
    226 ##########################################################################
    227 # And another core-dumper from Michael Hudson.
    228 
    229 dict = {}
    230 
    231 # let's force dict to malloc its table
    232 for i in range(1, 10):
    233     dict[i] = i
    234 
    235 class Machiavelli2:
    236     def __eq__(self, other):
    237         dict.clear()
    238         return 1
    239 
    240     def __hash__(self):
    241         return 0
    242 
    243 dict[Machiavelli2()] = Machiavelli2()
    244 
    245 try:
    246     dict[Machiavelli2()]
    247 except KeyError:
    248     pass
    249 
    250 del dict
    251 
    252 ##########################################################################
    253 # And another core-dumper from Michael Hudson.
    254 
    255 dict = {}
    256 
    257 # let's force dict to malloc its table
    258 for i in range(1, 10):
    259     dict[i] = i
    260 
    261 class Machiavelli3:
    262     def __init__(self, id):
    263         self.id = id
    264 
    265     def __eq__(self, other):
    266         if self.id == other.id:
    267             dict.clear()
    268             return 1
    269         else:
    270             return 0
    271 
    272     def __repr__(self):
    273         return "%s(%s)"%(self.__class__.__name__, self.id)
    274 
    275     def __hash__(self):
    276         return 0
    277 
    278 dict[Machiavelli3(1)] = Machiavelli3(0)
    279 dict[Machiavelli3(2)] = Machiavelli3(0)
    280 
    281 f = open(TESTFN, "w")
    282 try:
    283     try:
    284         print >> f, dict[Machiavelli3(2)]
    285     except KeyError:
    286         pass
    287 finally:
    288     f.close()
    289     os.unlink(TESTFN)
    290 
    291 del dict
    292 del dict1, dict2, dict1keys, dict2keys
    293