1 # -*- coding: utf-8 -*- 2 # This file should be kept compatible with both Python 2.6 and Python >= 3.0. 3 4 import time 5 import os 6 import re 7 import sys 8 import hashlib 9 import functools 10 import itertools 11 from optparse import OptionParser 12 13 out = sys.stdout 14 15 TEXT_ENCODING = 'utf8' 16 NEWLINES = 'lf' 17 18 # Compatibility 19 try: 20 xrange 21 except NameError: 22 xrange = range 23 24 def text_open(fn, mode, encoding=None): 25 try: 26 return open(fn, mode, encoding=encoding or TEXT_ENCODING) 27 except TypeError: 28 return open(fn, mode) 29 30 def get_file_sizes(): 31 for s in ['20 KB', '400 KB', '10 MB']: 32 size, unit = s.split() 33 size = int(size) * {'KB': 1024, 'MB': 1024 ** 2}[unit] 34 yield s.replace(' ', ''), size 35 36 def get_binary_files(): 37 return ((name + ".bin", size) for name, size in get_file_sizes()) 38 39 def get_text_files(): 40 return (("%s-%s-%s.txt" % (name, TEXT_ENCODING, NEWLINES), size) 41 for name, size in get_file_sizes()) 42 43 def with_open_mode(mode): 44 def decorate(f): 45 f.file_open_mode = mode 46 return f 47 return decorate 48 49 def with_sizes(*sizes): 50 def decorate(f): 51 f.file_sizes = sizes 52 return f 53 return decorate 54 55 56 # Here begin the tests 57 58 @with_open_mode("r") 59 @with_sizes("medium") 60 def read_bytewise(f): 61 """ read one unit at a time """ 62 f.seek(0) 63 while f.read(1): 64 pass 65 66 @with_open_mode("r") 67 @with_sizes("medium") 68 def read_small_chunks(f): 69 """ read 20 units at a time """ 70 f.seek(0) 71 while f.read(20): 72 pass 73 74 @with_open_mode("r") 75 @with_sizes("medium") 76 def read_big_chunks(f): 77 """ read 4096 units at a time """ 78 f.seek(0) 79 while f.read(4096): 80 pass 81 82 @with_open_mode("r") 83 @with_sizes("small", "medium", "large") 84 def read_whole_file(f): 85 """ read whole contents at once """ 86 f.seek(0) 87 while f.read(): 88 pass 89 90 @with_open_mode("rt") 91 @with_sizes("medium") 92 def read_lines(f): 93 """ read one line at a time """ 94 f.seek(0) 95 for line in f: 96 pass 97 98 @with_open_mode("r") 99 @with_sizes("medium") 100 def seek_forward_bytewise(f): 101 """ seek forward one unit at a time """ 102 f.seek(0, 2) 103 size = f.tell() 104 f.seek(0, 0) 105 for i in xrange(0, size - 1): 106 f.seek(i, 0) 107 108 @with_open_mode("r") 109 @with_sizes("medium") 110 def seek_forward_blockwise(f): 111 """ seek forward 1000 units at a time """ 112 f.seek(0, 2) 113 size = f.tell() 114 f.seek(0, 0) 115 for i in xrange(0, size - 1, 1000): 116 f.seek(i, 0) 117 118 @with_open_mode("rb") 119 @with_sizes("medium") 120 def read_seek_bytewise(f): 121 """ alternate read & seek one unit """ 122 f.seek(0) 123 while f.read(1): 124 f.seek(1, 1) 125 126 @with_open_mode("rb") 127 @with_sizes("medium") 128 def read_seek_blockwise(f): 129 """ alternate read & seek 1000 units """ 130 f.seek(0) 131 while f.read(1000): 132 f.seek(1000, 1) 133 134 135 @with_open_mode("w") 136 @with_sizes("small") 137 def write_bytewise(f, source): 138 """ write one unit at a time """ 139 for i in xrange(0, len(source)): 140 f.write(source[i:i+1]) 141 142 @with_open_mode("w") 143 @with_sizes("medium") 144 def write_small_chunks(f, source): 145 """ write 20 units at a time """ 146 for i in xrange(0, len(source), 20): 147 f.write(source[i:i+20]) 148 149 @with_open_mode("w") 150 @with_sizes("medium") 151 def write_medium_chunks(f, source): 152 """ write 4096 units at a time """ 153 for i in xrange(0, len(source), 4096): 154 f.write(source[i:i+4096]) 155 156 @with_open_mode("w") 157 @with_sizes("large") 158 def write_large_chunks(f, source): 159 """ write 1e6 units at a time """ 160 for i in xrange(0, len(source), 1000000): 161 f.write(source[i:i+1000000]) 162 163 164 @with_open_mode("w+") 165 @with_sizes("small") 166 def modify_bytewise(f, source): 167 """ modify one unit at a time """ 168 f.seek(0) 169 for i in xrange(0, len(source)): 170 f.write(source[i:i+1]) 171 172 @with_open_mode("w+") 173 @with_sizes("medium") 174 def modify_small_chunks(f, source): 175 """ modify 20 units at a time """ 176 f.seek(0) 177 for i in xrange(0, len(source), 20): 178 f.write(source[i:i+20]) 179 180 @with_open_mode("w+") 181 @with_sizes("medium") 182 def modify_medium_chunks(f, source): 183 """ modify 4096 units at a time """ 184 f.seek(0) 185 for i in xrange(0, len(source), 4096): 186 f.write(source[i:i+4096]) 187 188 @with_open_mode("wb+") 189 @with_sizes("medium") 190 def modify_seek_forward_bytewise(f, source): 191 """ alternate write & seek one unit """ 192 f.seek(0) 193 for i in xrange(0, len(source), 2): 194 f.write(source[i:i+1]) 195 f.seek(i+2) 196 197 @with_open_mode("wb+") 198 @with_sizes("medium") 199 def modify_seek_forward_blockwise(f, source): 200 """ alternate write & seek 1000 units """ 201 f.seek(0) 202 for i in xrange(0, len(source), 2000): 203 f.write(source[i:i+1000]) 204 f.seek(i+2000) 205 206 # XXX the 2 following tests don't work with py3k's text IO 207 @with_open_mode("wb+") 208 @with_sizes("medium") 209 def read_modify_bytewise(f, source): 210 """ alternate read & write one unit """ 211 f.seek(0) 212 for i in xrange(0, len(source), 2): 213 f.read(1) 214 f.write(source[i+1:i+2]) 215 216 @with_open_mode("wb+") 217 @with_sizes("medium") 218 def read_modify_blockwise(f, source): 219 """ alternate read & write 1000 units """ 220 f.seek(0) 221 for i in xrange(0, len(source), 2000): 222 f.read(1000) 223 f.write(source[i+1000:i+2000]) 224 225 226 read_tests = [ 227 read_bytewise, read_small_chunks, read_lines, read_big_chunks, 228 None, read_whole_file, None, 229 seek_forward_bytewise, seek_forward_blockwise, 230 read_seek_bytewise, read_seek_blockwise, 231 ] 232 233 write_tests = [ 234 write_bytewise, write_small_chunks, write_medium_chunks, write_large_chunks, 235 ] 236 237 modify_tests = [ 238 modify_bytewise, modify_small_chunks, modify_medium_chunks, 239 None, 240 modify_seek_forward_bytewise, modify_seek_forward_blockwise, 241 read_modify_bytewise, read_modify_blockwise, 242 ] 243 244 def run_during(duration, func): 245 _t = time.time 246 n = 0 247 start = os.times() 248 start_timestamp = _t() 249 real_start = start[4] or start_timestamp 250 while True: 251 func() 252 n += 1 253 if _t() - start_timestamp > duration: 254 break 255 end = os.times() 256 real = (end[4] if start[4] else time.time()) - real_start 257 return n, real, sum(end[0:2]) - sum(start[0:2]) 258 259 def warm_cache(filename): 260 with open(filename, "rb") as f: 261 f.read() 262 263 264 def run_all_tests(options): 265 def print_label(filename, func): 266 name = re.split(r'[-.]', filename)[0] 267 out.write( 268 ("[%s] %s... " 269 % (name.center(7), func.__doc__.strip()) 270 ).ljust(52)) 271 out.flush() 272 273 def print_results(size, n, real, cpu): 274 bw = n * float(size) / 1024 ** 2 / real 275 bw = ("%4d MB/s" if bw > 100 else "%.3g MB/s") % bw 276 out.write(bw.rjust(12) + "\n") 277 if cpu < 0.90 * real: 278 out.write(" warning: test above used only %d%% CPU, " 279 "result may be flawed!\n" % (100.0 * cpu / real)) 280 281 def run_one_test(name, size, open_func, test_func, *args): 282 mode = test_func.file_open_mode 283 print_label(name, test_func) 284 if "w" not in mode or "+" in mode: 285 warm_cache(name) 286 with open_func(name) as f: 287 n, real, cpu = run_during(1.5, lambda: test_func(f, *args)) 288 print_results(size, n, real, cpu) 289 290 def run_test_family(tests, mode_filter, files, open_func, *make_args): 291 for test_func in tests: 292 if test_func is None: 293 out.write("\n") 294 continue 295 if mode_filter in test_func.file_open_mode: 296 continue 297 for s in test_func.file_sizes: 298 name, size = files[size_names[s]] 299 #name += file_ext 300 args = tuple(f(name, size) for f in make_args) 301 run_one_test(name, size, 302 open_func, test_func, *args) 303 304 size_names = { 305 "small": 0, 306 "medium": 1, 307 "large": 2, 308 } 309 310 binary_files = list(get_binary_files()) 311 text_files = list(get_text_files()) 312 if "b" in options: 313 print("Binary unit = one byte") 314 if "t" in options: 315 print("Text unit = one character (%s-decoded)" % TEXT_ENCODING) 316 317 # Binary reads 318 if "b" in options and "r" in options: 319 print("\n** Binary input **\n") 320 run_test_family(read_tests, "t", binary_files, lambda fn: open(fn, "rb")) 321 322 # Text reads 323 if "t" in options and "r" in options: 324 print("\n** Text input **\n") 325 run_test_family(read_tests, "b", text_files, lambda fn: text_open(fn, "r")) 326 327 # Binary writes 328 if "b" in options and "w" in options: 329 print("\n** Binary append **\n") 330 def make_test_source(name, size): 331 with open(name, "rb") as f: 332 return f.read() 333 run_test_family(write_tests, "t", binary_files, 334 lambda fn: open(os.devnull, "wb"), make_test_source) 335 336 # Text writes 337 if "t" in options and "w" in options: 338 print("\n** Text append **\n") 339 def make_test_source(name, size): 340 with text_open(name, "r") as f: 341 return f.read() 342 run_test_family(write_tests, "b", text_files, 343 lambda fn: text_open(os.devnull, "w"), make_test_source) 344 345 # Binary overwrites 346 if "b" in options and "w" in options: 347 print("\n** Binary overwrite **\n") 348 def make_test_source(name, size): 349 with open(name, "rb") as f: 350 return f.read() 351 run_test_family(modify_tests, "t", binary_files, 352 lambda fn: open(fn, "r+b"), make_test_source) 353 354 # Text overwrites 355 if "t" in options and "w" in options: 356 print("\n** Text overwrite **\n") 357 def make_test_source(name, size): 358 with text_open(name, "r") as f: 359 return f.read() 360 run_test_family(modify_tests, "b", text_files, 361 lambda fn: text_open(fn, "r+"), make_test_source) 362 363 364 def prepare_files(): 365 print("Preparing files...") 366 # Binary files 367 for name, size in get_binary_files(): 368 if os.path.isfile(name) and os.path.getsize(name) == size: 369 continue 370 with open(name, "wb") as f: 371 f.write(os.urandom(size)) 372 # Text files 373 chunk = [] 374 with text_open(__file__, "rU", encoding='utf8') as f: 375 for line in f: 376 if line.startswith("# <iobench text chunk marker>"): 377 break 378 else: 379 raise RuntimeError( 380 "Couldn't find chunk marker in %s !" % __file__) 381 if NEWLINES == "all": 382 it = itertools.cycle(["\n", "\r", "\r\n"]) 383 else: 384 it = itertools.repeat( 385 {"cr": "\r", "lf": "\n", "crlf": "\r\n"}[NEWLINES]) 386 chunk = "".join(line.replace("\n", next(it)) for line in f) 387 if isinstance(chunk, bytes): 388 chunk = chunk.decode('utf8') 389 chunk = chunk.encode(TEXT_ENCODING) 390 for name, size in get_text_files(): 391 if os.path.isfile(name) and os.path.getsize(name) == size: 392 continue 393 head = chunk * (size // len(chunk)) 394 tail = chunk[:size % len(chunk)] 395 # Adjust tail to end on a character boundary 396 while True: 397 try: 398 tail.decode(TEXT_ENCODING) 399 break 400 except UnicodeDecodeError: 401 tail = tail[:-1] 402 with open(name, "wb") as f: 403 f.write(head) 404 f.write(tail) 405 406 def main(): 407 global TEXT_ENCODING, NEWLINES 408 409 usage = "usage: %prog [-h|--help] [options]" 410 parser = OptionParser(usage=usage) 411 parser.add_option("-b", "--binary", 412 action="store_true", dest="binary", default=False, 413 help="run binary I/O tests") 414 parser.add_option("-t", "--text", 415 action="store_true", dest="text", default=False, 416 help="run text I/O tests") 417 parser.add_option("-r", "--read", 418 action="store_true", dest="read", default=False, 419 help="run read tests") 420 parser.add_option("-w", "--write", 421 action="store_true", dest="write", default=False, 422 help="run write & modify tests") 423 parser.add_option("-E", "--encoding", 424 action="store", dest="encoding", default=None, 425 help="encoding for text tests (default: %s)" % TEXT_ENCODING) 426 parser.add_option("-N", "--newlines", 427 action="store", dest="newlines", default='lf', 428 help="line endings for text tests " 429 "(one of: {lf (default), cr, crlf, all})") 430 options, args = parser.parse_args() 431 if args: 432 parser.error("unexpected arguments") 433 NEWLINES = options.newlines.lower() 434 if NEWLINES not in ('lf', 'cr', 'crlf', 'all'): 435 parser.error("invalid 'newlines' option: %r" % NEWLINES) 436 437 test_options = "" 438 if options.read: 439 test_options += "r" 440 if options.write: 441 test_options += "w" 442 elif not options.read: 443 test_options += "rw" 444 if options.text: 445 test_options += "t" 446 if options.binary: 447 test_options += "b" 448 elif not options.text: 449 test_options += "tb" 450 451 if options.encoding: 452 TEXT_ENCODING = options.encoding 453 454 prepare_files() 455 run_all_tests(test_options) 456 457 if __name__ == "__main__": 458 main() 459 460 461 # -- This part to exercise text reading. Don't change anything! -- 462 # <iobench text chunk marker> 463 464 """ 465 1. 466 Gttir allar, 467 r gangi fram, 468 um skoask skyli, 469 um skyggnast skyli, 470 v at vst er at vita, 471 hvar vinir 472 sitja fleti fyrir. 473 474 2. 475 Gefendr heilir! 476 Gestr er inn kominn, 477 hvar skal sitja sj? 478 Mjk er brr, 479 s er brndum skal 480 sns of freista frama. 481 482 3. 483 Elds er rf, 484 eims inn er kominn 485 ok kn kalinn; 486 matar ok va 487 er manni rf, 488 eim er hefr um fjall farit. 489 490 4. 491 Vatns er rf, 492 eim er til verar kemr, 493 erru ok jlaar, 494 gs of is, 495 ef sr geta mtti, 496 ors ok endrgu. 497 498 5. 499 Vits er rf, 500 eim er va ratar; 501 dlt er heima hvat; 502 at augabragi verr, 503 s er ekki kann 504 ok me snotrum sitr. 505 506 6. 507 At hyggjandi sinni 508 skyli-t mar hrsinn vera, 509 heldr gtinn at gei; 510 er horskr ok gull 511 kemr heimisgara til, 512 sjaldan verr vti vrum, 513 v at brigra vin 514 fr mar aldregi 515 en mannvit mikit. 516 517 7. 518 Inn vari gestr, 519 er til verar kemr, 520 unnu hlji egir, 521 eyrum hlir, 522 en augum skoar; 523 sv nsisk frra hverr fyrir. 524 525 8. 526 Hinn er sll, 527 er sr of getr 528 lof ok lknstafi; 529 dlla er vi at, 530 er mar eiga skal 531 annars brjstum . 532 """ 533 534 """ 535 C'est revenir tard, je le sens, sur un sujet trop rebattu et dj presque oubli. Mon tat, qui ne me permet plus aucun travail suivi, mon aversion pour le genre polmique, ont caus ma lenteur crire et ma rpugnance publier. J'aurais mme tout fait supprim ces Lettres, ou plutt je lie les aurais point crites, s'il n'et t question que de moi : Mais ma patrie ne m'est pas tellement devenue trangre que je puisse voir tranquillement opprimer ses citoyens, surtout lorsqu'ils n'ont compromis leurs droits qu'en dfendant ma cause. Je serais le dernier des hommes si dans une telle occasion j'coutais un sentiment qui n'est plus ni douceur ni patience, mais faiblesse et lchet, dans celui qu'il empche de remplir son devoir. 536 Rien de moins important pour le public, j'en conviens, que la matire de ces lettres. La constitution d'une petite Rpublique, le sort d'un petit particulier, l'expos de quelques injustices, la rfutation de quelques sophismes ; tout cela n'a rien en soi d'assez considrable pour mriter beaucoup de lecteurs : mais si mes sujets sont petits mes objets sont grands, et dignes de l'attention de tout honnte homme. Laissons Genve sa place, et Rousseau dans sa dpression ; mais la religion, mais la libert, la justice ! voil, qui que vous soyez, ce qui n'est pas au-dessous de vous. 537 Qu'on ne cherche pas mme ici dans le style le ddommagement de l'aridit de la matire. Ceux que quelques traits heureux de ma plume ont si fort irrits trouveront de quoi s'apaiser dans ces lettres, L'honneur de dfendre un opprim et enflamm mon coeur si j'avais parl pour un autre. Rduit au triste emploi de me dfendre moi-mme, j'ai d me borner raisonner ; m'chauffer et t m'avilir. J'aurai donc trouv grce en ce point devant ceux qui s'imaginent qu'il est essentiel la vrit d'tre dite froidement ; opinion que pourtant j'ai peine comprendre. Lorsqu'une vive persuasion nous anime, le moyen d'employer un langage glac ? Quand Archimde tout transport courait nu dans les rues de Syracuse, en avait-il moins trouv la vrit parce qu'il se passionnait pour elle ? Tout au contraire, celui qui la sent ne peut s'abstenir de l'adorer ; celui qui demeure froid ne l'a pas vue. 538 Quoi qu'il en soit, je prie les lecteurs de vouloir bien mettre part mon beau style, et d'examiner seulement si je raisonne bien ou mal ; car enfin, de cela seul qu'un auteur s'exprime en bons termes, je ne vois pas comment il peut s'ensuivre que cet auteur ne sait ce qu'il dit. 539 """ 540