Home | History | Annotate | Download | only in test
      1 """Sort performance test.
      2 
      3 See main() for command line syntax.
      4 See tabulate() for output format.
      5 
      6 """
      7 
      8 import sys
      9 import time
     10 import random
     11 import marshal
     12 import tempfile
     13 import os
     14 
     15 td = tempfile.gettempdir()
     16 
     17 def randfloats(n):
     18     """Return a list of n random floats in [0, 1)."""
     19     # Generating floats is expensive, so this writes them out to a file in
     20     # a temp directory.  If the file already exists, it just reads them
     21     # back in and shuffles them a bit.
     22     fn = os.path.join(td, "rr%06d" % n)
     23     try:
     24         fp = open(fn, "rb")
     25     except OSError:
     26         r = random.random
     27         result = [r() for i in range(n)]
     28         try:
     29             try:
     30                 fp = open(fn, "wb")
     31                 marshal.dump(result, fp)
     32                 fp.close()
     33                 fp = None
     34             finally:
     35                 if fp:
     36                     try:
     37                         os.unlink(fn)
     38                     except OSError:
     39                         pass
     40         except OSError as msg:
     41             print("can't write", fn, ":", msg)
     42     else:
     43         result = marshal.load(fp)
     44         fp.close()
     45         # Shuffle it a bit...
     46         for i in range(10):
     47             i = random.randrange(n)
     48             temp = result[:i]
     49             del result[:i]
     50             temp.reverse()
     51             result.extend(temp)
     52             del temp
     53     assert len(result) == n
     54     return result
     55 
     56 def flush():
     57     sys.stdout.flush()
     58 
     59 def doit(L):
     60     t0 = time.perf_counter()
     61     L.sort()
     62     t1 = time.perf_counter()
     63     print("%6.2f" % (t1-t0), end=' ')
     64     flush()
     65 
     66 def tabulate(r):
     67     r"""Tabulate sort speed for lists of various sizes.
     68 
     69     The sizes are 2**i for i in r (the argument, a list).
     70 
     71     The output displays i, 2**i, and the time to sort arrays of 2**i
     72     floating point numbers with the following properties:
     73 
     74     *sort: random data
     75     \sort: descending data
     76     /sort: ascending data
     77     3sort: ascending, then 3 random exchanges
     78     +sort: ascending, then 10 random at the end
     79     %sort: ascending, then randomly replace 1% of the elements w/ random values
     80     ~sort: many duplicates
     81     =sort: all equal
     82     !sort: worst case scenario
     83 
     84     """
     85     cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
     86     fmt = ("%2s %7s" + " %6s"*len(cases))
     87     print(fmt % (("i", "2**i") + cases))
     88     for i in r:
     89         n = 1 << i
     90         L = randfloats(n)
     91         print("%2d %7d" % (i, n), end=' ')
     92         flush()
     93         doit(L) # *sort
     94         L.reverse()
     95         doit(L) # \sort
     96         doit(L) # /sort
     97 
     98         # Do 3 random exchanges.
     99         for dummy in range(3):
    100             i1 = random.randrange(n)
    101             i2 = random.randrange(n)
    102             L[i1], L[i2] = L[i2], L[i1]
    103         doit(L) # 3sort
    104 
    105         # Replace the last 10 with random floats.
    106         if n >= 10:
    107             L[-10:] = [random.random() for dummy in range(10)]
    108         doit(L) # +sort
    109 
    110         # Replace 1% of the elements at random.
    111         for dummy in range(n // 100):
    112             L[random.randrange(n)] = random.random()
    113         doit(L) # %sort
    114 
    115         # Arrange for lots of duplicates.
    116         if n > 4:
    117             del L[4:]
    118             L = L * (n // 4)
    119             # Force the elements to be distinct objects, else timings can be
    120             # artificially low.
    121             L = list(map(lambda x: --x, L))
    122         doit(L) # ~sort
    123         del L
    124 
    125         # All equal.  Again, force the elements to be distinct objects.
    126         L = list(map(abs, [-0.5] * n))
    127         doit(L) # =sort
    128         del L
    129 
    130         # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
    131         # for an older implementation of quicksort, which used the median
    132         # of the first, last and middle elements as the pivot.
    133         half = n // 2
    134         L = list(range(half - 1, -1, -1))
    135         L.extend(range(half))
    136         # Force to float, so that the timings are comparable.  This is
    137         # significantly faster if we leave tham as ints.
    138         L = list(map(float, L))
    139         doit(L) # !sort
    140         print()
    141 
    142 def main():
    143     """Main program when invoked as a script.
    144 
    145     One argument: tabulate a single row.
    146     Two arguments: tabulate a range (inclusive).
    147     Extra arguments are used to seed the random generator.
    148 
    149     """
    150     # default range (inclusive)
    151     k1 = 15
    152     k2 = 20
    153     if sys.argv[1:]:
    154         # one argument: single point
    155         k1 = k2 = int(sys.argv[1])
    156         if sys.argv[2:]:
    157             # two arguments: specify range
    158             k2 = int(sys.argv[2])
    159             if sys.argv[3:]:
    160                 # derive random seed from remaining arguments
    161                 x = 1
    162                 for a in sys.argv[3:]:
    163                     x = 69069 * x + hash(a)
    164                 random.seed(x)
    165     r = range(k1, k2+1)                 # include the end point
    166     tabulate(r)
    167 
    168 if __name__ == '__main__':
    169     main()
    170