1 """Sort performance test. 2 3 See main() for command line syntax. 4 See tabulate() for output format. 5 6 """ 7 8 import sys 9 import time 10 import random 11 import marshal 12 import tempfile 13 import os 14 15 td = tempfile.gettempdir() 16 17 def randfloats(n): 18 """Return a list of n random floats in [0, 1).""" 19 # Generating floats is expensive, so this writes them out to a file in 20 # a temp directory. If the file already exists, it just reads them 21 # back in and shuffles them a bit. 22 fn = os.path.join(td, "rr%06d" % n) 23 try: 24 fp = open(fn, "rb") 25 except IOError: 26 r = random.random 27 result = [r() for i in xrange(n)] 28 try: 29 try: 30 fp = open(fn, "wb") 31 marshal.dump(result, fp) 32 fp.close() 33 fp = None 34 finally: 35 if fp: 36 try: 37 os.unlink(fn) 38 except os.error: 39 pass 40 except IOError, msg: 41 print "can't write", fn, ":", msg 42 else: 43 result = marshal.load(fp) 44 fp.close() 45 # Shuffle it a bit... 46 for i in range(10): 47 i = random.randrange(n) 48 temp = result[:i] 49 del result[:i] 50 temp.reverse() 51 result.extend(temp) 52 del temp 53 assert len(result) == n 54 return result 55 56 def flush(): 57 sys.stdout.flush() 58 59 def doit(L): 60 t0 = time.clock() 61 L.sort() 62 t1 = time.clock() 63 print "%6.2f" % (t1-t0), 64 flush() 65 66 def tabulate(r): 67 """Tabulate sort speed for lists of various sizes. 68 69 The sizes are 2**i for i in r (the argument, a list). 70 71 The output displays i, 2**i, and the time to sort arrays of 2**i 72 floating point numbers with the following properties: 73 74 *sort: random data 75 \sort: descending data 76 /sort: ascending data 77 3sort: ascending, then 3 random exchanges 78 +sort: ascending, then 10 random at the end 79 %sort: ascending, then randomly replace 1% of the elements w/ random values 80 ~sort: many duplicates 81 =sort: all equal 82 !sort: worst case scenario 83 84 """ 85 cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) 86 fmt = ("%2s %7s" + " %6s"*len(cases)) 87 print fmt % (("i", "2**i") + cases) 88 for i in r: 89 n = 1 << i 90 L = randfloats(n) 91 print "%2d %7d" % (i, n), 92 flush() 93 doit(L) # *sort 94 L.reverse() 95 doit(L) # \sort 96 doit(L) # /sort 97 98 # Do 3 random exchanges. 99 for dummy in range(3): 100 i1 = random.randrange(n) 101 i2 = random.randrange(n) 102 L[i1], L[i2] = L[i2], L[i1] 103 doit(L) # 3sort 104 105 # Replace the last 10 with random floats. 106 if n >= 10: 107 L[-10:] = [random.random() for dummy in range(10)] 108 doit(L) # +sort 109 110 # Replace 1% of the elements at random. 111 for dummy in xrange(n // 100): 112 L[random.randrange(n)] = random.random() 113 doit(L) # %sort 114 115 # Arrange for lots of duplicates. 116 if n > 4: 117 del L[4:] 118 L = L * (n // 4) 119 # Force the elements to be distinct objects, else timings can be 120 # artificially low. 121 L = map(lambda x: --x, L) 122 doit(L) # ~sort 123 del L 124 125 # All equal. Again, force the elements to be distinct objects. 126 L = map(abs, [-0.5] * n) 127 doit(L) # =sort 128 del L 129 130 # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case 131 # for an older implementation of quicksort, which used the median 132 # of the first, last and middle elements as the pivot. 133 half = n // 2 134 L = range(half - 1, -1, -1) 135 L.extend(range(half)) 136 # Force to float, so that the timings are comparable. This is 137 # significantly faster if we leave tham as ints. 138 L = map(float, L) 139 doit(L) # !sort 140 print 141 142 def main(): 143 """Main program when invoked as a script. 144 145 One argument: tabulate a single row. 146 Two arguments: tabulate a range (inclusive). 147 Extra arguments are used to seed the random generator. 148 149 """ 150 # default range (inclusive) 151 k1 = 15 152 k2 = 20 153 if sys.argv[1:]: 154 # one argument: single point 155 k1 = k2 = int(sys.argv[1]) 156 if sys.argv[2:]: 157 # two arguments: specify range 158 k2 = int(sys.argv[2]) 159 if sys.argv[3:]: 160 # derive random seed from remaining arguments 161 x = 1 162 for a in sys.argv[3:]: 163 x = 69069 * x + hash(a) 164 random.seed(x) 165 r = range(k1, k2+1) # include the end point 166 tabulate(r) 167 168 if __name__ == '__main__': 169 main() 170