Home | History | Annotate | Download | only in iobench
      1 # -*- coding: utf-8 -*-
      2 # This file should be kept compatible with both Python 2.6 and Python >= 3.0.
      3 
      4 import time
      5 import os
      6 import re
      7 import sys
      8 import hashlib
      9 import functools
     10 import itertools
     11 from optparse import OptionParser
     12 
     13 out = sys.stdout
     14 
     15 TEXT_ENCODING = 'utf8'
     16 NEWLINES = 'lf'
     17 
     18 # Compatibility
     19 try:
     20     xrange
     21 except NameError:
     22     xrange = range
     23 
     24 def text_open(fn, mode, encoding=None):
     25     try:
     26         return open(fn, mode, encoding=encoding or TEXT_ENCODING)
     27     except TypeError:
     28         return open(fn, mode)
     29 
     30 def get_file_sizes():
     31     for s in ['20 KB', '400 KB', '10 MB']:
     32         size, unit = s.split()
     33         size = int(size) * {'KB': 1024, 'MB': 1024 ** 2}[unit]
     34         yield s.replace(' ', ''), size
     35 
     36 def get_binary_files():
     37     return ((name + ".bin", size) for name, size in get_file_sizes())
     38 
     39 def get_text_files():
     40     return (("%s-%s-%s.txt" % (name, TEXT_ENCODING, NEWLINES), size)
     41         for name, size in get_file_sizes())
     42 
     43 def with_open_mode(mode):
     44     def decorate(f):
     45         f.file_open_mode = mode
     46         return f
     47     return decorate
     48 
     49 def with_sizes(*sizes):
     50     def decorate(f):
     51         f.file_sizes = sizes
     52         return f
     53     return decorate
     54 
     55 
     56 # Here begin the tests
     57 
     58 @with_open_mode("r")
     59 @with_sizes("medium")
     60 def read_bytewise(f):
     61     """ read one unit at a time """
     62     f.seek(0)
     63     while f.read(1):
     64         pass
     65 
     66 @with_open_mode("r")
     67 @with_sizes("medium")
     68 def read_small_chunks(f):
     69     """ read 20 units at a time """
     70     f.seek(0)
     71     while f.read(20):
     72         pass
     73 
     74 @with_open_mode("r")
     75 @with_sizes("medium")
     76 def read_big_chunks(f):
     77     """ read 4096 units at a time """
     78     f.seek(0)
     79     while f.read(4096):
     80         pass
     81 
     82 @with_open_mode("r")
     83 @with_sizes("small", "medium", "large")
     84 def read_whole_file(f):
     85     """ read whole contents at once """
     86     f.seek(0)
     87     while f.read():
     88         pass
     89 
     90 @with_open_mode("rt")
     91 @with_sizes("medium")
     92 def read_lines(f):
     93     """ read one line at a time """
     94     f.seek(0)
     95     for line in f:
     96         pass
     97 
     98 @with_open_mode("r")
     99 @with_sizes("medium")
    100 def seek_forward_bytewise(f):
    101     """ seek forward one unit at a time """
    102     f.seek(0, 2)
    103     size = f.tell()
    104     f.seek(0, 0)
    105     for i in xrange(0, size - 1):
    106         f.seek(i, 0)
    107 
    108 @with_open_mode("r")
    109 @with_sizes("medium")
    110 def seek_forward_blockwise(f):
    111     """ seek forward 1000 units at a time """
    112     f.seek(0, 2)
    113     size = f.tell()
    114     f.seek(0, 0)
    115     for i in xrange(0, size - 1, 1000):
    116         f.seek(i, 0)
    117 
    118 @with_open_mode("rb")
    119 @with_sizes("medium")
    120 def read_seek_bytewise(f):
    121     """ alternate read & seek one unit """
    122     f.seek(0)
    123     while f.read(1):
    124         f.seek(1, 1)
    125 
    126 @with_open_mode("rb")
    127 @with_sizes("medium")
    128 def read_seek_blockwise(f):
    129     """ alternate read & seek 1000 units """
    130     f.seek(0)
    131     while f.read(1000):
    132         f.seek(1000, 1)
    133 
    134 
    135 @with_open_mode("w")
    136 @with_sizes("small")
    137 def write_bytewise(f, source):
    138     """ write one unit at a time """
    139     for i in xrange(0, len(source)):
    140         f.write(source[i:i+1])
    141 
    142 @with_open_mode("w")
    143 @with_sizes("medium")
    144 def write_small_chunks(f, source):
    145     """ write 20 units at a time """
    146     for i in xrange(0, len(source), 20):
    147         f.write(source[i:i+20])
    148 
    149 @with_open_mode("w")
    150 @with_sizes("medium")
    151 def write_medium_chunks(f, source):
    152     """ write 4096 units at a time """
    153     for i in xrange(0, len(source), 4096):
    154         f.write(source[i:i+4096])
    155 
    156 @with_open_mode("w")
    157 @with_sizes("large")
    158 def write_large_chunks(f, source):
    159     """ write 1e6 units at a time """
    160     for i in xrange(0, len(source), 1000000):
    161         f.write(source[i:i+1000000])
    162 
    163 
    164 @with_open_mode("w+")
    165 @with_sizes("small")
    166 def modify_bytewise(f, source):
    167     """ modify one unit at a time """
    168     f.seek(0)
    169     for i in xrange(0, len(source)):
    170         f.write(source[i:i+1])
    171 
    172 @with_open_mode("w+")
    173 @with_sizes("medium")
    174 def modify_small_chunks(f, source):
    175     """ modify 20 units at a time """
    176     f.seek(0)
    177     for i in xrange(0, len(source), 20):
    178         f.write(source[i:i+20])
    179 
    180 @with_open_mode("w+")
    181 @with_sizes("medium")
    182 def modify_medium_chunks(f, source):
    183     """ modify 4096 units at a time """
    184     f.seek(0)
    185     for i in xrange(0, len(source), 4096):
    186         f.write(source[i:i+4096])
    187 
    188 @with_open_mode("wb+")
    189 @with_sizes("medium")
    190 def modify_seek_forward_bytewise(f, source):
    191     """ alternate write & seek one unit """
    192     f.seek(0)
    193     for i in xrange(0, len(source), 2):
    194         f.write(source[i:i+1])
    195         f.seek(i+2)
    196 
    197 @with_open_mode("wb+")
    198 @with_sizes("medium")
    199 def modify_seek_forward_blockwise(f, source):
    200     """ alternate write & seek 1000 units """
    201     f.seek(0)
    202     for i in xrange(0, len(source), 2000):
    203         f.write(source[i:i+1000])
    204         f.seek(i+2000)
    205 
    206 # XXX the 2 following tests don't work with py3k's text IO
    207 @with_open_mode("wb+")
    208 @with_sizes("medium")
    209 def read_modify_bytewise(f, source):
    210     """ alternate read & write one unit """
    211     f.seek(0)
    212     for i in xrange(0, len(source), 2):
    213         f.read(1)
    214         f.write(source[i+1:i+2])
    215 
    216 @with_open_mode("wb+")
    217 @with_sizes("medium")
    218 def read_modify_blockwise(f, source):
    219     """ alternate read & write 1000 units """
    220     f.seek(0)
    221     for i in xrange(0, len(source), 2000):
    222         f.read(1000)
    223         f.write(source[i+1000:i+2000])
    224 
    225 
    226 read_tests = [
    227     read_bytewise, read_small_chunks, read_lines, read_big_chunks,
    228     None, read_whole_file, None,
    229     seek_forward_bytewise, seek_forward_blockwise,
    230     read_seek_bytewise, read_seek_blockwise,
    231 ]
    232 
    233 write_tests = [
    234     write_bytewise, write_small_chunks, write_medium_chunks, write_large_chunks,
    235 ]
    236 
    237 modify_tests = [
    238     modify_bytewise, modify_small_chunks, modify_medium_chunks,
    239     None,
    240     modify_seek_forward_bytewise, modify_seek_forward_blockwise,
    241     read_modify_bytewise, read_modify_blockwise,
    242 ]
    243 
    244 def run_during(duration, func):
    245     _t = time.time
    246     n = 0
    247     start = os.times()
    248     start_timestamp = _t()
    249     real_start = start[4] or start_timestamp
    250     while True:
    251         func()
    252         n += 1
    253         if _t() - start_timestamp > duration:
    254             break
    255     end = os.times()
    256     real = (end[4] if start[4] else time.time()) - real_start
    257     return n, real, sum(end[0:2]) - sum(start[0:2])
    258 
    259 def warm_cache(filename):
    260     with open(filename, "rb") as f:
    261         f.read()
    262 
    263 
    264 def run_all_tests(options):
    265     def print_label(filename, func):
    266         name = re.split(r'[-.]', filename)[0]
    267         out.write(
    268             ("[%s] %s... "
    269                 % (name.center(7), func.__doc__.strip())
    270             ).ljust(52))
    271         out.flush()
    272 
    273     def print_results(size, n, real, cpu):
    274         bw = n * float(size) / 1024 ** 2 / real
    275         bw = ("%4d MB/s" if bw > 100 else "%.3g MB/s") % bw
    276         out.write(bw.rjust(12) + "\n")
    277         if cpu < 0.90 * real:
    278             out.write("   warning: test above used only %d%% CPU, "
    279                 "result may be flawed!\n" % (100.0 * cpu / real))
    280 
    281     def run_one_test(name, size, open_func, test_func, *args):
    282         mode = test_func.file_open_mode
    283         print_label(name, test_func)
    284         if "w" not in mode or "+" in mode:
    285             warm_cache(name)
    286         with open_func(name) as f:
    287             n, real, cpu = run_during(1.5, lambda: test_func(f, *args))
    288         print_results(size, n, real, cpu)
    289 
    290     def run_test_family(tests, mode_filter, files, open_func, *make_args):
    291         for test_func in tests:
    292             if test_func is None:
    293                 out.write("\n")
    294                 continue
    295             if mode_filter in test_func.file_open_mode:
    296                 continue
    297             for s in test_func.file_sizes:
    298                 name, size = files[size_names[s]]
    299                 #name += file_ext
    300                 args = tuple(f(name, size) for f in make_args)
    301                 run_one_test(name, size,
    302                     open_func, test_func, *args)
    303 
    304     size_names = {
    305         "small": 0,
    306         "medium": 1,
    307         "large": 2,
    308     }
    309 
    310     binary_files = list(get_binary_files())
    311     text_files = list(get_text_files())
    312     if "b" in options:
    313         print("Binary unit = one byte")
    314     if "t" in options:
    315         print("Text unit = one character (%s-decoded)" % TEXT_ENCODING)
    316 
    317     # Binary reads
    318     if "b" in options and "r" in options:
    319         print("\n** Binary input **\n")
    320         run_test_family(read_tests, "t", binary_files, lambda fn: open(fn, "rb"))
    321 
    322     # Text reads
    323     if "t" in options and "r" in options:
    324         print("\n** Text input **\n")
    325         run_test_family(read_tests, "b", text_files, lambda fn: text_open(fn, "r"))
    326 
    327     # Binary writes
    328     if "b" in options and "w" in options:
    329         print("\n** Binary append **\n")
    330         def make_test_source(name, size):
    331             with open(name, "rb") as f:
    332                 return f.read()
    333         run_test_family(write_tests, "t", binary_files,
    334             lambda fn: open(os.devnull, "wb"), make_test_source)
    335 
    336     # Text writes
    337     if "t" in options and "w" in options:
    338         print("\n** Text append **\n")
    339         def make_test_source(name, size):
    340             with text_open(name, "r") as f:
    341                 return f.read()
    342         run_test_family(write_tests, "b", text_files,
    343             lambda fn: text_open(os.devnull, "w"), make_test_source)
    344 
    345     # Binary overwrites
    346     if "b" in options and "w" in options:
    347         print("\n** Binary overwrite **\n")
    348         def make_test_source(name, size):
    349             with open(name, "rb") as f:
    350                 return f.read()
    351         run_test_family(modify_tests, "t", binary_files,
    352             lambda fn: open(fn, "r+b"), make_test_source)
    353 
    354     # Text overwrites
    355     if "t" in options and "w" in options:
    356         print("\n** Text overwrite **\n")
    357         def make_test_source(name, size):
    358             with text_open(name, "r") as f:
    359                 return f.read()
    360         run_test_family(modify_tests, "b", text_files,
    361             lambda fn: text_open(fn, "r+"), make_test_source)
    362 
    363 
    364 def prepare_files():
    365     print("Preparing files...")
    366     # Binary files
    367     for name, size in get_binary_files():
    368         if os.path.isfile(name) and os.path.getsize(name) == size:
    369             continue
    370         with open(name, "wb") as f:
    371             f.write(os.urandom(size))
    372     # Text files
    373     chunk = []
    374     with text_open(__file__, "rU", encoding='utf8') as f:
    375         for line in f:
    376             if line.startswith("# <iobench text chunk marker>"):
    377                 break
    378         else:
    379             raise RuntimeError(
    380                 "Couldn't find chunk marker in %s !" % __file__)
    381         if NEWLINES == "all":
    382             it = itertools.cycle(["\n", "\r", "\r\n"])
    383         else:
    384             it = itertools.repeat(
    385                 {"cr": "\r", "lf": "\n", "crlf": "\r\n"}[NEWLINES])
    386         chunk = "".join(line.replace("\n", next(it)) for line in f)
    387         if isinstance(chunk, bytes):
    388             chunk = chunk.decode('utf8')
    389         chunk = chunk.encode(TEXT_ENCODING)
    390     for name, size in get_text_files():
    391         if os.path.isfile(name) and os.path.getsize(name) == size:
    392             continue
    393         head = chunk * (size // len(chunk))
    394         tail = chunk[:size % len(chunk)]
    395         # Adjust tail to end on a character boundary
    396         while True:
    397             try:
    398                 tail.decode(TEXT_ENCODING)
    399                 break
    400             except UnicodeDecodeError:
    401                 tail = tail[:-1]
    402         with open(name, "wb") as f:
    403             f.write(head)
    404             f.write(tail)
    405 
    406 def main():
    407     global TEXT_ENCODING, NEWLINES
    408 
    409     usage = "usage: %prog [-h|--help] [options]"
    410     parser = OptionParser(usage=usage)
    411     parser.add_option("-b", "--binary",
    412                       action="store_true", dest="binary", default=False,
    413                       help="run binary I/O tests")
    414     parser.add_option("-t", "--text",
    415                       action="store_true", dest="text", default=False,
    416                       help="run text I/O tests")
    417     parser.add_option("-r", "--read",
    418                       action="store_true", dest="read", default=False,
    419                       help="run read tests")
    420     parser.add_option("-w", "--write",
    421                       action="store_true", dest="write", default=False,
    422                       help="run write & modify tests")
    423     parser.add_option("-E", "--encoding",
    424                       action="store", dest="encoding", default=None,
    425                       help="encoding for text tests (default: %s)" % TEXT_ENCODING)
    426     parser.add_option("-N", "--newlines",
    427                       action="store", dest="newlines", default='lf',
    428                       help="line endings for text tests "
    429                            "(one of: {lf (default), cr, crlf, all})")
    430     options, args = parser.parse_args()
    431     if args:
    432         parser.error("unexpected arguments")
    433     NEWLINES = options.newlines.lower()
    434     if NEWLINES not in ('lf', 'cr', 'crlf', 'all'):
    435         parser.error("invalid 'newlines' option: %r" % NEWLINES)
    436 
    437     test_options = ""
    438     if options.read:
    439         test_options += "r"
    440     if options.write:
    441         test_options += "w"
    442     elif not options.read:
    443         test_options += "rw"
    444     if options.text:
    445         test_options += "t"
    446     if options.binary:
    447         test_options += "b"
    448     elif not options.text:
    449         test_options += "tb"
    450 
    451     if options.encoding:
    452         TEXT_ENCODING = options.encoding
    453 
    454     prepare_files()
    455     run_all_tests(test_options)
    456 
    457 if __name__ == "__main__":
    458     main()
    459 
    460 
    461 # -- This part to exercise text reading. Don't change anything! --
    462 # <iobench text chunk marker>
    463 
    464 """
    465 1.
    466 Gttir allar,
    467 r gangi fram,
    468 um skoask skyli,
    469 um skyggnast skyli,
    470 v at vst er at vita,
    471 hvar vinir
    472 sitja  fleti fyrir.
    473 
    474 2.
    475 Gefendr heilir!
    476 Gestr er inn kominn,
    477 hvar skal sitja sj?
    478 Mjk er brr,
    479 s er  brndum skal
    480 sns of freista frama.
    481 
    482 3.
    483 Elds er rf,
    484 eims inn er kominn
    485 ok  kn kalinn;
    486 matar ok va
    487 er manni rf,
    488 eim er hefr um fjall farit.
    489 
    490 4.
    491 Vatns er rf,
    492 eim er til verar kemr,
    493 erru ok jlaar,
    494 gs of is,
    495 ef sr geta mtti,
    496 ors ok endrgu.
    497 
    498 5.
    499 Vits er rf,
    500 eim er va ratar;
    501 dlt er heima hvat;
    502 at augabragi verr,
    503 s er ekki kann
    504 ok me snotrum sitr.
    505 
    506 6.
    507 At hyggjandi sinni
    508 skyli-t mar hrsinn vera,
    509 heldr gtinn at gei;
    510  er horskr ok gull
    511 kemr heimisgara til,
    512 sjaldan verr vti vrum,
    513 v at brigra vin
    514 fr mar aldregi
    515 en mannvit mikit.
    516 
    517 7.
    518 Inn vari gestr,
    519 er til verar kemr,
    520 unnu hlji egir,
    521 eyrum hlir,
    522 en augum skoar;
    523 sv nsisk frra hverr fyrir.
    524 
    525 8.
    526 Hinn er sll,
    527 er sr of getr
    528 lof ok lknstafi;
    529 dlla er vi at,
    530 er mar eiga skal
    531 annars brjstum .
    532 """
    533 
    534 """
    535 C'est revenir tard, je le sens, sur un sujet trop rebattu et dj presque oubli. Mon tat, qui ne me permet plus aucun travail suivi, mon aversion pour le genre polmique, ont caus ma lenteur  crire et ma rpugnance  publier. J'aurais mme tout  fait supprim ces Lettres, ou plutt je lie les aurais point crites, s'il n'et t question que de moi : Mais ma patrie ne m'est pas tellement devenue trangre que je puisse voir tranquillement opprimer ses citoyens, surtout lorsqu'ils n'ont compromis leurs droits qu'en dfendant ma cause. Je serais le dernier des hommes si dans une telle occasion j'coutais un sentiment qui n'est plus ni douceur ni patience, mais faiblesse et lchet, dans celui qu'il empche de remplir son devoir.
    536 Rien de moins important pour le public, j'en conviens, que la matire de ces lettres. La constitution d'une petite Rpublique, le sort d'un petit particulier, l'expos de quelques injustices, la rfutation de quelques sophismes ; tout cela n'a rien en soi d'assez considrable pour mriter beaucoup de lecteurs : mais si mes sujets sont petits mes objets sont grands, et dignes de l'attention de tout honnte homme. Laissons Genve  sa place, et Rousseau dans sa dpression ; mais la religion, mais la libert, la justice ! voil, qui que vous soyez, ce qui n'est pas au-dessous de vous.
    537 Qu'on ne cherche pas mme ici dans le style le ddommagement de l'aridit de la matire. Ceux que quelques traits heureux de ma plume ont si fort irrits trouveront de quoi s'apaiser dans ces lettres, L'honneur de dfendre un opprim et enflamm mon coeur si j'avais parl pour un autre. Rduit au triste emploi de me dfendre moi-mme, j'ai d me borner  raisonner ; m'chauffer et t m'avilir. J'aurai donc trouv grce en ce point devant ceux qui s'imaginent qu'il est essentiel  la vrit d'tre dite froidement ; opinion que pourtant j'ai peine  comprendre. Lorsqu'une vive persuasion nous anime, le moyen d'employer un langage glac ? Quand Archimde tout transport courait nu dans les rues de Syracuse, en avait-il moins trouv la vrit parce qu'il se passionnait pour elle ? Tout au contraire, celui qui la sent ne peut s'abstenir de l'adorer ; celui qui demeure froid ne l'a pas vue.
    538 Quoi qu'il en soit, je prie les lecteurs de vouloir bien mettre  part mon beau style, et d'examiner seulement si je raisonne bien ou mal ; car enfin, de cela seul qu'un auteur s'exprime en bons termes, je ne vois pas comment il peut s'ensuivre que cet auteur ne sait ce qu'il dit.
    539 """
    540