Home | History | Annotate | Download | only in stringbench
      1 
      2 # Various microbenchmarks comparing unicode and byte string performance
      3 # Please keep this file both 2.x and 3.x compatible!
      4 
      5 import timeit
      6 import itertools
      7 import operator
      8 import re
      9 import sys
     10 import datetime
     11 import optparse
     12 
     13 VERSION = '2.0'
     14 
     15 def p(*args):
     16     sys.stdout.write(' '.join(str(s) for s in args) + '\n')
     17 
     18 if sys.version_info >= (3,):
     19     BYTES = bytes_from_str = lambda x: x.encode('ascii')
     20     UNICODE = unicode_from_str = lambda x: x
     21 else:
     22     BYTES = bytes_from_str = lambda x: x
     23     UNICODE = unicode_from_str = lambda x: x.decode('ascii')
     24 
     25 class UnsupportedType(TypeError):
     26     pass
     27 
     28 
     29 p('stringbench v%s' % VERSION)
     30 p(sys.version)
     31 p(datetime.datetime.now())
     32 
     33 REPEAT = 1
     34 REPEAT = 3
     35 #REPEAT = 7
     36 
     37 if __name__ != "__main__":
     38     raise SystemExit("Must run as main program")
     39 
     40 parser = optparse.OptionParser()
     41 parser.add_option("-R", "--skip-re", dest="skip_re",
     42                   action="store_true",
     43                   help="skip regular expression tests")
     44 parser.add_option("-8", "--8-bit", dest="bytes_only",
     45                   action="store_true",
     46                   help="only do 8-bit string benchmarks")
     47 parser.add_option("-u", "--unicode", dest="unicode_only",
     48                   action="store_true",
     49                   help="only do Unicode string benchmarks")
     50 
     51 
     52 _RANGE_1000 = list(range(1000))
     53 _RANGE_100 = list(range(100))
     54 _RANGE_10 = list(range(10))
     55 
     56 dups = {}
     57 def bench(s, group, repeat_count):
     58     def blah(f):
     59         if f.__name__ in dups:
     60             raise AssertionError("Multiple functions with same name: %r" %
     61                                  (f.__name__,))
     62         dups[f.__name__] = 1
     63         f.comment = s
     64         f.is_bench = True
     65         f.group = group
     66         f.repeat_count = repeat_count
     67         return f
     68     return blah
     69 
     70 def uses_re(f):
     71     f.uses_re = True
     72 
     73 ####### 'in' comparisons
     74 
     75 @bench('"A" in "A"*1000', "early match, single character", 1000)
     76 def in_test_quick_match_single_character(STR):
     77     s1 = STR("A" * 1000)
     78     s2 = STR("A")
     79     for x in _RANGE_1000:
     80         s2 in s1
     81 
     82 @bench('"B" in "A"*1000', "no match, single character", 1000)
     83 def in_test_no_match_single_character(STR):
     84     s1 = STR("A" * 1000)
     85     s2 = STR("B")
     86     for x in _RANGE_1000:
     87         s2 in s1
     88 
     89 
     90 @bench('"AB" in "AB"*1000', "early match, two characters", 1000)
     91 def in_test_quick_match_two_characters(STR):
     92     s1 = STR("AB" * 1000)
     93     s2 = STR("AB")
     94     for x in _RANGE_1000:
     95         s2 in s1
     96 
     97 @bench('"BC" in "AB"*1000', "no match, two characters", 1000)
     98 def in_test_no_match_two_character(STR):
     99     s1 = STR("AB" * 1000)
    100     s2 = STR("BC")
    101     for x in _RANGE_1000:
    102         s2 in s1
    103 
    104 @bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
    105 def in_test_slow_match_two_characters(STR):
    106     s1 = STR("AB" * 300+"C")
    107     s2 = STR("BC")
    108     for x in _RANGE_1000:
    109         s2 in s1
    110 
    111 @bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
    112        "late match, 100 characters", 100)
    113 def in_test_slow_match_100_characters(STR):
    114     m = STR("ABC"*33)
    115     d = STR("D")
    116     e = STR("E")
    117     s1 = (m+d)*300 + m+e
    118     s2 = m+e
    119     for x in _RANGE_100:
    120         s2 in s1
    121 
    122 # Try with regex
    123 @uses_re
    124 @bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
    125        "late match, 100 characters", 100)
    126 def re_test_slow_match_100_characters(STR):
    127     m = STR("ABC"*33)
    128     d = STR("D")
    129     e = STR("E")
    130     s1 = (m+d)*300 + m+e
    131     s2 = m+e
    132     pat = re.compile(s2)
    133     search = pat.search
    134     for x in _RANGE_100:
    135         search(s1)
    136 
    137 
    138 #### same tests as 'in' but use 'find'
    139 
    140 @bench('("A"*1000).find("A")', "early match, single character", 1000)
    141 def find_test_quick_match_single_character(STR):
    142     s1 = STR("A" * 1000)
    143     s2 = STR("A")
    144     s1_find = s1.find
    145     for x in _RANGE_1000:
    146         s1_find(s2)
    147 
    148 @bench('("A"*1000).find("B")', "no match, single character", 1000)
    149 def find_test_no_match_single_character(STR):
    150     s1 = STR("A" * 1000)
    151     s2 = STR("B")
    152     s1_find = s1.find
    153     for x in _RANGE_1000:
    154         s1_find(s2)
    155 
    156 
    157 @bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
    158 def find_test_quick_match_two_characters(STR):
    159     s1 = STR("AB" * 1000)
    160     s2 = STR("AB")
    161     s1_find = s1.find
    162     for x in _RANGE_1000:
    163         s1_find(s2)
    164 
    165 @bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
    166 def find_test_no_match_two_character(STR):
    167     s1 = STR("AB" * 1000)
    168     s2 = STR("BC")
    169     s1_find = s1.find
    170     for x in _RANGE_1000:
    171         s1_find(s2)
    172 
    173 @bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
    174 def find_test_no_match_two_character_bis(STR):
    175     s1 = STR("AB" * 1000)
    176     s2 = STR("CA")
    177     s1_find = s1.find
    178     for x in _RANGE_1000:
    179         s1_find(s2)
    180 
    181 @bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
    182 def find_test_slow_match_two_characters(STR):
    183     s1 = STR("AB" * 300+"C")
    184     s2 = STR("BC")
    185     s1_find = s1.find
    186     for x in _RANGE_1000:
    187         s1_find(s2)
    188 
    189 @bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
    190 def find_test_slow_match_two_characters_bis(STR):
    191     s1 = STR("AB" * 300+"CA")
    192     s2 = STR("CA")
    193     s1_find = s1.find
    194     for x in _RANGE_1000:
    195         s1_find(s2)
    196 
    197 @bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
    198        "late match, 100 characters", 100)
    199 def find_test_slow_match_100_characters(STR):
    200     m = STR("ABC"*33)
    201     d = STR("D")
    202     e = STR("E")
    203     s1 = (m+d)*500 + m+e
    204     s2 = m+e
    205     s1_find = s1.find
    206     for x in _RANGE_100:
    207         s1_find(s2)
    208 
    209 @bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
    210        "late match, 100 characters", 100)
    211 def find_test_slow_match_100_characters_bis(STR):
    212     m = STR("ABC"*33)
    213     d = STR("D")
    214     e = STR("E")
    215     s1 = (m+d)*500 + e+m
    216     s2 = e+m
    217     s1_find = s1.find
    218     for x in _RANGE_100:
    219         s1_find(s2)
    220 
    221 
    222 #### Same tests for 'rfind'
    223 
    224 @bench('("A"*1000).rfind("A")', "early match, single character", 1000)
    225 def rfind_test_quick_match_single_character(STR):
    226     s1 = STR("A" * 1000)
    227     s2 = STR("A")
    228     s1_rfind = s1.rfind
    229     for x in _RANGE_1000:
    230         s1_rfind(s2)
    231 
    232 @bench('("A"*1000).rfind("B")', "no match, single character", 1000)
    233 def rfind_test_no_match_single_character(STR):
    234     s1 = STR("A" * 1000)
    235     s2 = STR("B")
    236     s1_rfind = s1.rfind
    237     for x in _RANGE_1000:
    238         s1_rfind(s2)
    239 
    240 
    241 @bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
    242 def rfind_test_quick_match_two_characters(STR):
    243     s1 = STR("AB" * 1000)
    244     s2 = STR("AB")
    245     s1_rfind = s1.rfind
    246     for x in _RANGE_1000:
    247         s1_rfind(s2)
    248 
    249 @bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
    250 def rfind_test_no_match_two_character(STR):
    251     s1 = STR("AB" * 1000)
    252     s2 = STR("BC")
    253     s1_rfind = s1.rfind
    254     for x in _RANGE_1000:
    255         s1_rfind(s2)
    256 
    257 @bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
    258 def rfind_test_no_match_two_character_bis(STR):
    259     s1 = STR("AB" * 1000)
    260     s2 = STR("CA")
    261     s1_rfind = s1.rfind
    262     for x in _RANGE_1000:
    263         s1_rfind(s2)
    264 
    265 @bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
    266 def rfind_test_slow_match_two_characters(STR):
    267     s1 = STR("C" + "AB" * 300)
    268     s2 = STR("CA")
    269     s1_rfind = s1.rfind
    270     for x in _RANGE_1000:
    271         s1_rfind(s2)
    272 
    273 @bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
    274 def rfind_test_slow_match_two_characters_bis(STR):
    275     s1 = STR("BC" + "AB" * 300)
    276     s2 = STR("BC")
    277     s1_rfind = s1.rfind
    278     for x in _RANGE_1000:
    279         s1_rfind(s2)
    280 
    281 @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
    282        "late match, 100 characters", 100)
    283 def rfind_test_slow_match_100_characters(STR):
    284     m = STR("ABC"*33)
    285     d = STR("D")
    286     e = STR("E")
    287     s1 = e+m + (d+m)*500
    288     s2 = e+m
    289     s1_rfind = s1.rfind
    290     for x in _RANGE_100:
    291         s1_rfind(s2)
    292 
    293 @bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
    294        "late match, 100 characters", 100)
    295 def rfind_test_slow_match_100_characters_bis(STR):
    296     m = STR("ABC"*33)
    297     d = STR("D")
    298     e = STR("E")
    299     s1 = m+e + (d+m)*500
    300     s2 = m+e
    301     s1_rfind = s1.rfind
    302     for x in _RANGE_100:
    303         s1_rfind(s2)
    304 
    305 
    306 #### Now with index.
    307 # Skip the ones which fail because that would include exception overhead.
    308 
    309 @bench('("A"*1000).index("A")', "early match, single character", 1000)
    310 def index_test_quick_match_single_character(STR):
    311     s1 = STR("A" * 1000)
    312     s2 = STR("A")
    313     s1_index = s1.index
    314     for x in _RANGE_1000:
    315         s1_index(s2)
    316 
    317 @bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
    318 def index_test_quick_match_two_characters(STR):
    319     s1 = STR("AB" * 1000)
    320     s2 = STR("AB")
    321     s1_index = s1.index
    322     for x in _RANGE_1000:
    323         s1_index(s2)
    324 
    325 @bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
    326 def index_test_slow_match_two_characters(STR):
    327     s1 = STR("AB" * 300+"C")
    328     s2 = STR("BC")
    329     s1_index = s1.index
    330     for x in _RANGE_1000:
    331         s1_index(s2)
    332 
    333 @bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
    334        "late match, 100 characters", 100)
    335 def index_test_slow_match_100_characters(STR):
    336     m = STR("ABC"*33)
    337     d = STR("D")
    338     e = STR("E")
    339     s1 = (m+d)*500 + m+e
    340     s2 = m+e
    341     s1_index = s1.index
    342     for x in _RANGE_100:
    343         s1_index(s2)
    344 
    345 
    346 #### Same for rindex
    347 
    348 @bench('("A"*1000).rindex("A")', "early match, single character", 1000)
    349 def rindex_test_quick_match_single_character(STR):
    350     s1 = STR("A" * 1000)
    351     s2 = STR("A")
    352     s1_rindex = s1.rindex
    353     for x in _RANGE_1000:
    354         s1_rindex(s2)
    355 
    356 @bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
    357 def rindex_test_quick_match_two_characters(STR):
    358     s1 = STR("AB" * 1000)
    359     s2 = STR("AB")
    360     s1_rindex = s1.rindex
    361     for x in _RANGE_1000:
    362         s1_rindex(s2)
    363 
    364 @bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
    365 def rindex_test_slow_match_two_characters(STR):
    366     s1 = STR("C" + "AB" * 300)
    367     s2 = STR("CA")
    368     s1_rindex = s1.rindex
    369     for x in _RANGE_1000:
    370         s1_rindex(s2)
    371 
    372 @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
    373        "late match, 100 characters", 100)
    374 def rindex_test_slow_match_100_characters(STR):
    375     m = STR("ABC"*33)
    376     d = STR("D")
    377     e = STR("E")
    378     s1 = e + m + (d+m)*500
    379     s2 = e + m
    380     s1_rindex = s1.rindex
    381     for x in _RANGE_100:
    382         s1_rindex(s2)
    383 
    384 
    385 #### Same for partition
    386 
    387 @bench('("A"*1000).partition("A")', "early match, single character", 1000)
    388 def partition_test_quick_match_single_character(STR):
    389     s1 = STR("A" * 1000)
    390     s2 = STR("A")
    391     s1_partition = s1.partition
    392     for x in _RANGE_1000:
    393         s1_partition(s2)
    394 
    395 @bench('("A"*1000).partition("B")', "no match, single character", 1000)
    396 def partition_test_no_match_single_character(STR):
    397     s1 = STR("A" * 1000)
    398     s2 = STR("B")
    399     s1_partition = s1.partition
    400     for x in _RANGE_1000:
    401         s1_partition(s2)
    402 
    403 
    404 @bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
    405 def partition_test_quick_match_two_characters(STR):
    406     s1 = STR("AB" * 1000)
    407     s2 = STR("AB")
    408     s1_partition = s1.partition
    409     for x in _RANGE_1000:
    410         s1_partition(s2)
    411 
    412 @bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
    413 def partition_test_no_match_two_character(STR):
    414     s1 = STR("AB" * 1000)
    415     s2 = STR("BC")
    416     s1_partition = s1.partition
    417     for x in _RANGE_1000:
    418         s1_partition(s2)
    419 
    420 @bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
    421 def partition_test_slow_match_two_characters(STR):
    422     s1 = STR("AB" * 300+"C")
    423     s2 = STR("BC")
    424     s1_partition = s1.partition
    425     for x in _RANGE_1000:
    426         s1_partition(s2)
    427 
    428 @bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
    429        "late match, 100 characters", 100)
    430 def partition_test_slow_match_100_characters(STR):
    431     m = STR("ABC"*33)
    432     d = STR("D")
    433     e = STR("E")
    434     s1 = (m+d)*500 + m+e
    435     s2 = m+e
    436     s1_partition = s1.partition
    437     for x in _RANGE_100:
    438         s1_partition(s2)
    439 
    440 
    441 #### Same for rpartition
    442 
    443 @bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
    444 def rpartition_test_quick_match_single_character(STR):
    445     s1 = STR("A" * 1000)
    446     s2 = STR("A")
    447     s1_rpartition = s1.rpartition
    448     for x in _RANGE_1000:
    449         s1_rpartition(s2)
    450 
    451 @bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
    452 def rpartition_test_no_match_single_character(STR):
    453     s1 = STR("A" * 1000)
    454     s2 = STR("B")
    455     s1_rpartition = s1.rpartition
    456     for x in _RANGE_1000:
    457         s1_rpartition(s2)
    458 
    459 
    460 @bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
    461 def rpartition_test_quick_match_two_characters(STR):
    462     s1 = STR("AB" * 1000)
    463     s2 = STR("AB")
    464     s1_rpartition = s1.rpartition
    465     for x in _RANGE_1000:
    466         s1_rpartition(s2)
    467 
    468 @bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
    469 def rpartition_test_no_match_two_character(STR):
    470     s1 = STR("AB" * 1000)
    471     s2 = STR("BC")
    472     s1_rpartition = s1.rpartition
    473     for x in _RANGE_1000:
    474         s1_rpartition(s2)
    475 
    476 @bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
    477 def rpartition_test_slow_match_two_characters(STR):
    478     s1 = STR("C" + "AB" * 300)
    479     s2 = STR("CA")
    480     s1_rpartition = s1.rpartition
    481     for x in _RANGE_1000:
    482         s1_rpartition(s2)
    483 
    484 @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
    485        "late match, 100 characters", 100)
    486 def rpartition_test_slow_match_100_characters(STR):
    487     m = STR("ABC"*33)
    488     d = STR("D")
    489     e = STR("E")
    490     s1 = e + m + (d+m)*500
    491     s2 = e + m
    492     s1_rpartition = s1.rpartition
    493     for x in _RANGE_100:
    494         s1_rpartition(s2)
    495 
    496 
    497 #### Same for split(s, 1)
    498 
    499 @bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
    500 def split_test_quick_match_single_character(STR):
    501     s1 = STR("A" * 1000)
    502     s2 = STR("A")
    503     s1_split = s1.split
    504     for x in _RANGE_1000:
    505         s1_split(s2, 1)
    506 
    507 @bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
    508 def split_test_no_match_single_character(STR):
    509     s1 = STR("A" * 1000)
    510     s2 = STR("B")
    511     s1_split = s1.split
    512     for x in _RANGE_1000:
    513         s1_split(s2, 1)
    514 
    515 
    516 @bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
    517 def split_test_quick_match_two_characters(STR):
    518     s1 = STR("AB" * 1000)
    519     s2 = STR("AB")
    520     s1_split = s1.split
    521     for x in _RANGE_1000:
    522         s1_split(s2, 1)
    523 
    524 @bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
    525 def split_test_no_match_two_character(STR):
    526     s1 = STR("AB" * 1000)
    527     s2 = STR("BC")
    528     s1_split = s1.split
    529     for x in _RANGE_1000:
    530         s1_split(s2, 1)
    531 
    532 @bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
    533 def split_test_slow_match_two_characters(STR):
    534     s1 = STR("AB" * 300+"C")
    535     s2 = STR("BC")
    536     s1_split = s1.split
    537     for x in _RANGE_1000:
    538         s1_split(s2, 1)
    539 
    540 @bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
    541        "late match, 100 characters", 100)
    542 def split_test_slow_match_100_characters(STR):
    543     m = STR("ABC"*33)
    544     d = STR("D")
    545     e = STR("E")
    546     s1 = (m+d)*500 + m+e
    547     s2 = m+e
    548     s1_split = s1.split
    549     for x in _RANGE_100:
    550         s1_split(s2, 1)
    551 
    552 
    553 #### Same for rsplit(s, 1)
    554 
    555 @bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
    556 def rsplit_test_quick_match_single_character(STR):
    557     s1 = STR("A" * 1000)
    558     s2 = STR("A")
    559     s1_rsplit = s1.rsplit
    560     for x in _RANGE_1000:
    561         s1_rsplit(s2, 1)
    562 
    563 @bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
    564 def rsplit_test_no_match_single_character(STR):
    565     s1 = STR("A" * 1000)
    566     s2 = STR("B")
    567     s1_rsplit = s1.rsplit
    568     for x in _RANGE_1000:
    569         s1_rsplit(s2, 1)
    570 
    571 
    572 @bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
    573 def rsplit_test_quick_match_two_characters(STR):
    574     s1 = STR("AB" * 1000)
    575     s2 = STR("AB")
    576     s1_rsplit = s1.rsplit
    577     for x in _RANGE_1000:
    578         s1_rsplit(s2, 1)
    579 
    580 @bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
    581 def rsplit_test_no_match_two_character(STR):
    582     s1 = STR("AB" * 1000)
    583     s2 = STR("BC")
    584     s1_rsplit = s1.rsplit
    585     for x in _RANGE_1000:
    586         s1_rsplit(s2, 1)
    587 
    588 @bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
    589 def rsplit_test_slow_match_two_characters(STR):
    590     s1 = STR("C" + "AB" * 300)
    591     s2 = STR("CA")
    592     s1_rsplit = s1.rsplit
    593     for x in _RANGE_1000:
    594         s1_rsplit(s2, 1)
    595 
    596 @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
    597        "late match, 100 characters", 100)
    598 def rsplit_test_slow_match_100_characters(STR):
    599     m = STR("ABC"*33)
    600     d = STR("D")
    601     e = STR("E")
    602     s1 = e + m + (d+m)*500
    603     s2 = e + m
    604     s1_rsplit = s1.rsplit
    605     for x in _RANGE_100:
    606         s1_rsplit(s2, 1)
    607 
    608 
    609 #### Benchmark the operator-based methods
    610 
    611 @bench('"A"*10', "repeat 1 character 10 times", 1000)
    612 def repeat_single_10_times(STR):
    613     s = STR("A")
    614     for x in _RANGE_1000:
    615         s * 10
    616 
    617 @bench('"A"*1000', "repeat 1 character 1000 times", 1000)
    618 def repeat_single_1000_times(STR):
    619     s = STR("A")
    620     for x in _RANGE_1000:
    621         s * 1000
    622 
    623 @bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
    624 def repeat_5_10_times(STR):
    625     s = STR("ABCDE")
    626     for x in _RANGE_1000:
    627         s * 10
    628 
    629 @bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
    630 def repeat_5_1000_times(STR):
    631     s = STR("ABCDE")
    632     for x in _RANGE_1000:
    633         s * 1000
    634 
    635 # + for concat
    636 
    637 @bench('"Andrew"+"Dalke"', "concat two strings", 1000)
    638 def concat_two_strings(STR):
    639     s1 = STR("Andrew")
    640     s2 = STR("Dalke")
    641     for x in _RANGE_1000:
    642         s1+s2
    643 
    644 @bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
    645        1000)
    646 def concat_many_strings(STR):
    647     s1=STR('TIXSGYNREDCVBHJ')
    648     s2=STR('PUMTLXBZVDO')
    649     s3=STR('FVZNJ')
    650     s4=STR('OGDXUW')
    651     s5=STR('WEIMRNCOYVGHKB')
    652     s6=STR('FCQTNMXPUZH')
    653     s7=STR('TICZJYRLBNVUEAK')
    654     s8=STR('REYB')
    655     s9=STR('PWUOQ')
    656     s10=STR('EQHCMKBS')
    657     s11=STR('AEVDFOH')
    658     s12=STR('IFHVD')
    659     s13=STR('JGTCNLXWOHQ')
    660     s14=STR('ITSKEPYLROZAWXF')
    661     s15=STR('THEK')
    662     s16=STR('GHPZFBUYCKMNJIT')
    663     s17=STR('JMUZ')
    664     s18=STR('WLZQMTB')
    665     s19=STR('KPADCBW')
    666     s20=STR('TNJHZQAGBU')
    667     for x in _RANGE_1000:
    668         (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
    669          s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
    670 
    671 
    672 #### Benchmark join
    673 
    674 def get_bytes_yielding_seq(STR, arg):
    675     if STR is BYTES and sys.version_info >= (3,):
    676         raise UnsupportedType
    677     return STR(arg)
    678 
    679 @bench('"A".join("")',
    680        "join empty string, with 1 character sep", 100)
    681 def join_empty_single(STR):
    682     sep = STR("A")
    683     s2 = get_bytes_yielding_seq(STR, "")
    684     sep_join = sep.join
    685     for x in _RANGE_100:
    686         sep_join(s2)
    687 
    688 @bench('"ABCDE".join("")',
    689        "join empty string, with 5 character sep", 100)
    690 def join_empty_5(STR):
    691     sep = STR("ABCDE")
    692     s2 = get_bytes_yielding_seq(STR, "")
    693     sep_join = sep.join
    694     for x in _RANGE_100:
    695         sep_join(s2)
    696 
    697 @bench('"A".join("ABC..Z")',
    698        "join string with 26 characters, with 1 character sep", 1000)
    699 def join_alphabet_single(STR):
    700     sep = STR("A")
    701     s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
    702     sep_join = sep.join
    703     for x in _RANGE_1000:
    704         sep_join(s2)
    705 
    706 @bench('"ABCDE".join("ABC..Z")',
    707        "join string with 26 characters, with 5 character sep", 1000)
    708 def join_alphabet_5(STR):
    709     sep = STR("ABCDE")
    710     s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
    711     sep_join = sep.join
    712     for x in _RANGE_1000:
    713         sep_join(s2)
    714 
    715 @bench('"A".join(list("ABC..Z"))',
    716        "join list of 26 characters, with 1 character sep", 1000)
    717 def join_alphabet_list_single(STR):
    718     sep = STR("A")
    719     s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
    720     sep_join = sep.join
    721     for x in _RANGE_1000:
    722         sep_join(s2)
    723 
    724 @bench('"ABCDE".join(list("ABC..Z"))',
    725        "join list of 26 characters, with 5 character sep", 1000)
    726 def join_alphabet_list_five(STR):
    727     sep = STR("ABCDE")
    728     s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
    729     sep_join = sep.join
    730     for x in _RANGE_1000:
    731         sep_join(s2)
    732 
    733 @bench('"A".join(["Bob"]*100))',
    734        "join list of 100 words, with 1 character sep", 1000)
    735 def join_100_words_single(STR):
    736     sep = STR("A")
    737     s2 = [STR("Bob")]*100
    738     sep_join = sep.join
    739     for x in _RANGE_1000:
    740         sep_join(s2)
    741 
    742 @bench('"ABCDE".join(["Bob"]*100))',
    743        "join list of 100 words, with 5 character sep", 1000)
    744 def join_100_words_5(STR):
    745     sep = STR("ABCDE")
    746     s2 = [STR("Bob")]*100
    747     sep_join = sep.join
    748     for x in _RANGE_1000:
    749         sep_join(s2)
    750 
    751 #### split tests
    752 
    753 @bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
    754 def whitespace_split(STR):
    755     s = STR("Here are some words. "*2)
    756     s_split = s.split
    757     for x in _RANGE_1000:
    758         s_split()
    759 
    760 @bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
    761 def whitespace_rsplit(STR):
    762     s = STR("Here are some words. "*2)
    763     s_rsplit = s.rsplit
    764     for x in _RANGE_1000:
    765         s_rsplit()
    766 
    767 @bench('("Here are some words. "*2).split(None, 1)',
    768        "split 1 whitespace", 1000)
    769 def whitespace_split_1(STR):
    770     s = STR("Here are some words. "*2)
    771     s_split = s.split
    772     N = None
    773     for x in _RANGE_1000:
    774         s_split(N, 1)
    775 
    776 @bench('("Here are some words. "*2).rsplit(None, 1)',
    777        "split 1 whitespace", 1000)
    778 def whitespace_rsplit_1(STR):
    779     s = STR("Here are some words. "*2)
    780     s_rsplit = s.rsplit
    781     N = None
    782     for x in _RANGE_1000:
    783         s_rsplit(N, 1)
    784 
    785 @bench('("Here are some words. "*2).partition(" ")',
    786        "split 1 whitespace", 1000)
    787 def whitespace_partition(STR):
    788     sep = STR(" ")
    789     s = STR("Here are some words. "*2)
    790     s_partition = s.partition
    791     for x in _RANGE_1000:
    792         s_partition(sep)
    793 
    794 @bench('("Here are some words. "*2).rpartition(" ")',
    795        "split 1 whitespace", 1000)
    796 def whitespace_rpartition(STR):
    797     sep = STR(" ")
    798     s = STR("Here are some words. "*2)
    799     s_rpartition = s.rpartition
    800     for x in _RANGE_1000:
    801         s_rpartition(sep)
    802 
    803 human_text = """\
    804 Python is a dynamic object-oriented programming language that can be
    805 used for many kinds of software development. It offers strong support
    806 for integration with other languages and tools, comes with extensive
    807 standard libraries, and can be learned in a few days. Many Python
    808 programmers report substantial productivity gains and feel the language
    809 encourages the development of higher quality, more maintainable code.
    810 
    811 Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm
    812 Handhelds, and Nokia mobile phones. Python has also been ported to the
    813 Java and .NET virtual machines.
    814 
    815 Python is distributed under an OSI-approved open source license that
    816 makes it free to use, even for commercial products.
    817 """*25
    818 human_text_bytes = bytes_from_str(human_text)
    819 human_text_unicode = unicode_from_str(human_text)
    820 def _get_human_text(STR):
    821     if STR is UNICODE:
    822         return human_text_unicode
    823     if STR is BYTES:
    824         return human_text_bytes
    825     raise AssertionError
    826 
    827 @bench('human_text.split()', "split whitespace (huge)", 10)
    828 def whitespace_split_huge(STR):
    829     s = _get_human_text(STR)
    830     s_split = s.split
    831     for x in _RANGE_10:
    832         s_split()
    833 
    834 @bench('human_text.rsplit()', "split whitespace (huge)", 10)
    835 def whitespace_rsplit_huge(STR):
    836     s = _get_human_text(STR)
    837     s_rsplit = s.rsplit
    838     for x in _RANGE_10:
    839         s_rsplit()
    840 
    841 
    842 
    843 @bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
    844 def newlines_split(STR):
    845     s = STR("this\nis\na\ntest\n")
    846     s_split = s.split
    847     nl = STR("\n")
    848     for x in _RANGE_1000:
    849         s_split(nl)
    850 
    851 
    852 @bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
    853 def newlines_rsplit(STR):
    854     s = STR("this\nis\na\ntest\n")
    855     s_rsplit = s.rsplit
    856     nl = STR("\n")
    857     for x in _RANGE_1000:
    858         s_rsplit(nl)
    859 
    860 @bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
    861 def newlines_splitlines(STR):
    862     s = STR("this\nis\na\ntest\n")
    863     s_splitlines = s.splitlines
    864     for x in _RANGE_1000:
    865         s_splitlines()
    866 
    867 ## split text with 2000 newlines
    868 
    869 def _make_2000_lines():
    870     import random
    871     r = random.Random(100)
    872     chars = list(map(chr, range(32, 128)))
    873     i = 0
    874     while i < len(chars):
    875         chars[i] = " "
    876         i += r.randrange(9)
    877     s = "".join(chars)
    878     s = s*4
    879     words = []
    880     for i in range(2000):
    881         start = r.randrange(96)
    882         n = r.randint(5, 65)
    883         words.append(s[start:start+n])
    884     return "\n".join(words)+"\n"
    885 
    886 _text_with_2000_lines = _make_2000_lines()
    887 _text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
    888 _text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
    889 def _get_2000_lines(STR):
    890     if STR is UNICODE:
    891         return _text_with_2000_lines_unicode
    892     if STR is BYTES:
    893         return _text_with_2000_lines_bytes
    894     raise AssertionError
    895 
    896 
    897 @bench('"...text...".split("\\n")', "split 2000 newlines", 10)
    898 def newlines_split_2000(STR):
    899     s = _get_2000_lines(STR)
    900     s_split = s.split
    901     nl = STR("\n")
    902     for x in _RANGE_10:
    903         s_split(nl)
    904 
    905 @bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
    906 def newlines_rsplit_2000(STR):
    907     s = _get_2000_lines(STR)
    908     s_rsplit = s.rsplit
    909     nl = STR("\n")
    910     for x in _RANGE_10:
    911         s_rsplit(nl)
    912 
    913 @bench('"...text...".splitlines()', "split 2000 newlines", 10)
    914 def newlines_splitlines_2000(STR):
    915     s = _get_2000_lines(STR)
    916     s_splitlines = s.splitlines
    917     for x in _RANGE_10:
    918         s_splitlines()
    919 
    920 
    921 ## split text on "--" characters
    922 @bench(
    923     '"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
    924     "split on multicharacter separator (small)", 1000)
    925 def split_multichar_sep_small(STR):
    926     s = STR("this--is--a--test--of--the--emergency--broadcast--system")
    927     s_split = s.split
    928     pat = STR("--")
    929     for x in _RANGE_1000:
    930         s_split(pat)
    931 @bench(
    932     '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
    933     "split on multicharacter separator (small)", 1000)
    934 def rsplit_multichar_sep_small(STR):
    935     s = STR("this--is--a--test--of--the--emergency--broadcast--system")
    936     s_rsplit = s.rsplit
    937     pat = STR("--")
    938     for x in _RANGE_1000:
    939         s_rsplit(pat)
    940 
    941 ## split dna text on "ACTAT" characters
    942 @bench('dna.split("ACTAT")',
    943        "split on multicharacter separator (dna)", 10)
    944 def split_multichar_sep_dna(STR):
    945     s = _get_dna(STR)
    946     s_split = s.split
    947     pat = STR("ACTAT")
    948     for x in _RANGE_10:
    949         s_split(pat)
    950 
    951 @bench('dna.rsplit("ACTAT")',
    952        "split on multicharacter separator (dna)", 10)
    953 def rsplit_multichar_sep_dna(STR):
    954     s = _get_dna(STR)
    955     s_rsplit = s.rsplit
    956     pat = STR("ACTAT")
    957     for x in _RANGE_10:
    958         s_rsplit(pat)
    959 
    960 
    961 
    962 ## split with limits
    963 
    964 GFF3_example = "\t".join([
    965     "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
    966     "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
    967 
    968 @bench('GFF3_example.split("\\t")', "tab split", 1000)
    969 def tab_split_no_limit(STR):
    970     sep = STR("\t")
    971     s = STR(GFF3_example)
    972     s_split = s.split
    973     for x in _RANGE_1000:
    974         s_split(sep)
    975 
    976 @bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
    977 def tab_split_limit(STR):
    978     sep = STR("\t")
    979     s = STR(GFF3_example)
    980     s_split = s.split
    981     for x in _RANGE_1000:
    982         s_split(sep, 8)
    983 
    984 @bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
    985 def tab_rsplit_no_limit(STR):
    986     sep = STR("\t")
    987     s = STR(GFF3_example)
    988     s_rsplit = s.rsplit
    989     for x in _RANGE_1000:
    990         s_rsplit(sep)
    991 
    992 @bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
    993 def tab_rsplit_limit(STR):
    994     sep = STR("\t")
    995     s = STR(GFF3_example)
    996     s_rsplit = s.rsplit
    997     for x in _RANGE_1000:
    998         s_rsplit(sep, 8)
    999 
   1000 #### Count characters
   1001 
   1002 @bench('...text.with.2000.newlines.count("\\n")',
   1003        "count newlines", 10)
   1004 def count_newlines(STR):
   1005     s = _get_2000_lines(STR)
   1006     s_count = s.count
   1007     nl = STR("\n")
   1008     for x in _RANGE_10:
   1009         s_count(nl)
   1010 
   1011 # Orchid sequences concatenated, from Biopython
   1012 _dna = """
   1013 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
   1014 AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
   1015 AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
   1016 TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
   1017 AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
   1018 TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
   1019 CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
   1020 TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
   1021 GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
   1022 TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
   1023 GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
   1024 ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
   1025 CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
   1026 ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
   1027 ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
   1028 TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
   1029 CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
   1030 GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
   1031 ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
   1032 ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
   1033 ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
   1034 GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
   1035 TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
   1036 TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
   1037 TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
   1038 GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
   1039 GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
   1040 AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
   1041 GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
   1042 TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
   1043 CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
   1044 TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
   1045 TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
   1046 AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
   1047 GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
   1048 GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
   1049 CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
   1050 GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
   1051 TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
   1052 ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
   1053 GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
   1054 AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
   1055 AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
   1056 ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
   1057 GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
   1058 GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
   1059 AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
   1060 GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
   1061 ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
   1062 GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
   1063 """
   1064 _dna = "".join(_dna.splitlines())
   1065 _dna = _dna * 25
   1066 _dna_bytes = bytes_from_str(_dna)
   1067 _dna_unicode = unicode_from_str(_dna)
   1068 
   1069 def _get_dna(STR):
   1070     if STR is UNICODE:
   1071         return _dna_unicode
   1072     if STR is BYTES:
   1073         return _dna_bytes
   1074     raise AssertionError
   1075 
   1076 @bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
   1077 def count_aact(STR):
   1078     seq = _get_dna(STR)
   1079     seq_count = seq.count
   1080     needle = STR("AACT")
   1081     for x in _RANGE_10:
   1082         seq_count(needle)
   1083 
   1084 ##### startswith and endswith
   1085 
   1086 @bench('"Andrew".startswith("A")', 'startswith single character', 1000)
   1087 def startswith_single(STR):
   1088     s1 = STR("Andrew")
   1089     s2 = STR("A")
   1090     s1_startswith = s1.startswith
   1091     for x in _RANGE_1000:
   1092         s1_startswith(s2)
   1093 
   1094 @bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
   1095        1000)
   1096 def startswith_multiple(STR):
   1097     s1 = STR("Andrew")
   1098     s2 = STR("Andrew")
   1099     s1_startswith = s1.startswith
   1100     for x in _RANGE_1000:
   1101         s1_startswith(s2)
   1102 
   1103 @bench('"Andrew".startswith("Anders")',
   1104        'startswith multiple characters - not!', 1000)
   1105 def startswith_multiple_not(STR):
   1106     s1 = STR("Andrew")
   1107     s2 = STR("Anders")
   1108     s1_startswith = s1.startswith
   1109     for x in _RANGE_1000:
   1110         s1_startswith(s2)
   1111 
   1112 
   1113 # endswith
   1114 
   1115 @bench('"Andrew".endswith("w")', 'endswith single character', 1000)
   1116 def endswith_single(STR):
   1117     s1 = STR("Andrew")
   1118     s2 = STR("w")
   1119     s1_endswith = s1.endswith
   1120     for x in _RANGE_1000:
   1121         s1_endswith(s2)
   1122 
   1123 @bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
   1124 def endswith_multiple(STR):
   1125     s1 = STR("Andrew")
   1126     s2 = STR("Andrew")
   1127     s1_endswith = s1.endswith
   1128     for x in _RANGE_1000:
   1129         s1_endswith(s2)
   1130 
   1131 @bench('"Andrew".endswith("Anders")',
   1132        'endswith multiple characters - not!', 1000)
   1133 def endswith_multiple_not(STR):
   1134     s1 = STR("Andrew")
   1135     s2 = STR("Anders")
   1136     s1_endswith = s1.endswith
   1137     for x in _RANGE_1000:
   1138         s1_endswith(s2)
   1139 
   1140 #### Strip
   1141 
   1142 @bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
   1143 def terminal_newline_strip_right(STR):
   1144     s = STR("Hello!\n")
   1145     s_strip = s.strip
   1146     for x in _RANGE_1000:
   1147         s_strip()
   1148 
   1149 @bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
   1150 def terminal_newline_rstrip(STR):
   1151     s = STR("Hello!\n")
   1152     s_rstrip = s.rstrip
   1153     for x in _RANGE_1000:
   1154         s_rstrip()
   1155 
   1156 @bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
   1157 def terminal_newline_strip_left(STR):
   1158     s = STR("\nHello!")
   1159     s_strip = s.strip
   1160     for x in _RANGE_1000:
   1161         s_strip()
   1162 
   1163 @bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
   1164 def terminal_newline_strip_both(STR):
   1165     s = STR("\nHello!\n")
   1166     s_strip = s.strip
   1167     for x in _RANGE_1000:
   1168         s_strip()
   1169 
   1170 @bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
   1171 def terminal_newline_lstrip(STR):
   1172     s = STR("\nHello!")
   1173     s_lstrip = s.lstrip
   1174     for x in _RANGE_1000:
   1175         s_lstrip()
   1176 
   1177 @bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
   1178        'strip terminal newline', 1000)
   1179 def terminal_newline_if_else(STR):
   1180     s = STR("Hello!\n")
   1181     NL = STR("\n")
   1182     for x in _RANGE_1000:
   1183         s[:-1] if (s[-1] == NL) else s
   1184 
   1185 
   1186 # Strip multiple spaces or tabs
   1187 
   1188 @bench('"Hello\\t   \\t".strip()', 'strip terminal spaces and tabs', 1000)
   1189 def terminal_space_strip(STR):
   1190     s = STR("Hello\t   \t!")
   1191     s_strip = s.strip
   1192     for x in _RANGE_1000:
   1193         s_strip()
   1194 
   1195 @bench('"Hello\\t   \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
   1196 def terminal_space_rstrip(STR):
   1197     s = STR("Hello!\t   \t")
   1198     s_rstrip = s.rstrip
   1199     for x in _RANGE_1000:
   1200         s_rstrip()
   1201 
   1202 @bench('"\\t   \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
   1203 def terminal_space_lstrip(STR):
   1204     s = STR("\t   \tHello!")
   1205     s_lstrip = s.lstrip
   1206     for x in _RANGE_1000:
   1207         s_lstrip()
   1208 
   1209 
   1210 #### replace
   1211 @bench('"This is a test".replace(" ", "\\t")', 'replace single character',
   1212        1000)
   1213 def replace_single_character(STR):
   1214     s = STR("This is a test!")
   1215     from_str = STR(" ")
   1216     to_str = STR("\t")
   1217     s_replace = s.replace
   1218     for x in _RANGE_1000:
   1219         s_replace(from_str, to_str)
   1220 
   1221 @uses_re
   1222 @bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
   1223        1000)
   1224 def replace_single_character_re(STR):
   1225     s = STR("This is a test!")
   1226     pat = re.compile(STR(" "))
   1227     to_str = STR("\t")
   1228     pat_sub = pat.sub
   1229     for x in _RANGE_1000:
   1230         pat_sub(to_str, s)
   1231 
   1232 @bench('"...text.with.2000.lines...replace("\\n", " ")',
   1233        'replace single character, big string', 10)
   1234 def replace_single_character_big(STR):
   1235     s = _get_2000_lines(STR)
   1236     from_str = STR("\n")
   1237     to_str = STR(" ")
   1238     s_replace = s.replace
   1239     for x in _RANGE_10:
   1240         s_replace(from_str, to_str)
   1241 
   1242 @uses_re
   1243 @bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
   1244        'replace single character, big string', 10)
   1245 def replace_single_character_big_re(STR):
   1246     s = _get_2000_lines(STR)
   1247     pat = re.compile(STR("\n"))
   1248     to_str = STR(" ")
   1249     pat_sub = pat.sub
   1250     for x in _RANGE_10:
   1251         pat_sub(to_str, s)
   1252 
   1253 
   1254 @bench('dna.replace("ATC", "ATT")',
   1255        'replace multiple characters, dna', 10)
   1256 def replace_multiple_characters_dna(STR):
   1257     seq = _get_dna(STR)
   1258     from_str = STR("ATC")
   1259     to_str = STR("ATT")
   1260     seq_replace = seq.replace
   1261     for x in _RANGE_10:
   1262         seq_replace(from_str, to_str)
   1263 
   1264 # This increases the character count
   1265 @bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
   1266        'replace and expand multiple characters, big string', 10)
   1267 def replace_multiple_character_big(STR):
   1268     s = _get_2000_lines(STR)
   1269     from_str = STR("\n")
   1270     to_str = STR("\r\n")
   1271     s_replace = s.replace
   1272     for x in _RANGE_10:
   1273         s_replace(from_str, to_str)
   1274 
   1275 
   1276 # This decreases the character count
   1277 @bench('"When shall we three meet again?".replace("ee", "")',
   1278        'replace/remove multiple characters', 1000)
   1279 def replace_multiple_character_remove(STR):
   1280     s = STR("When shall we three meet again?")
   1281     from_str = STR("ee")
   1282     to_str = STR("")
   1283     s_replace = s.replace
   1284     for x in _RANGE_1000:
   1285         s_replace(from_str, to_str)
   1286 
   1287 
   1288 big_s = "A" + ("Z"*128*1024)
   1289 big_s_bytes = bytes_from_str(big_s)
   1290 big_s_unicode = unicode_from_str(big_s)
   1291 def _get_big_s(STR):
   1292     if STR is UNICODE: return big_s_unicode
   1293     if STR is BYTES: return big_s_bytes
   1294     raise AssertionError
   1295 
   1296 # The older replace implementation counted all matches in
   1297 # the string even when it only needed to make one replacement.
   1298 @bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
   1299        'quick replace single character match', 10)
   1300 def quick_replace_single_match(STR):
   1301     s = _get_big_s(STR)
   1302     from_str = STR("A")
   1303     to_str = STR("BB")
   1304     s_replace = s.replace
   1305     for x in _RANGE_10:
   1306         s_replace(from_str, to_str, 1)
   1307 
   1308 @bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
   1309        'quick replace multiple character match', 10)
   1310 def quick_replace_multiple_match(STR):
   1311     s = _get_big_s(STR)
   1312     from_str = STR("AZZ")
   1313     to_str = STR("BBZZ")
   1314     s_replace = s.replace
   1315     for x in _RANGE_10:
   1316         s_replace(from_str, to_str, 1)
   1317 
   1318 
   1319 ####
   1320 
   1321 # CCP does a lot of this, for internationalisation of ingame messages.
   1322 _format = "The %(thing)s is %(place)s the %(location)s."
   1323 _format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
   1324 _format_bytes = bytes_from_str(_format)
   1325 _format_unicode = unicode_from_str(_format)
   1326 _format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
   1327 _format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
   1328 
   1329 def _get_format(STR):
   1330     if STR is UNICODE:
   1331         return _format_unicode
   1332     if STR is BYTES:
   1333         if sys.version_info >= (3,):
   1334             raise UnsupportedType
   1335         return _format_bytes
   1336     raise AssertionError
   1337 
   1338 def _get_format_dict(STR):
   1339     if STR is UNICODE:
   1340         return _format_dict_unicode
   1341     if STR is BYTES:
   1342         if sys.version_info >= (3,):
   1343             raise UnsupportedType
   1344         return _format_dict_bytes
   1345     raise AssertionError
   1346 
   1347 # Formatting.
   1348 @bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
   1349        'formatting a string type with a dict', 1000)
   1350 def format_with_dict(STR):
   1351     s = _get_format(STR)
   1352     d = _get_format_dict(STR)
   1353     for x in _RANGE_1000:
   1354         s % d
   1355 
   1356 
   1357 #### Upper- and lower- case conversion
   1358 
   1359 @bench('("Where in the world is Carmen San Deigo?"*10).lower()',
   1360        "case conversion -- rare", 1000)
   1361 def lower_conversion_rare(STR):
   1362     s = STR("Where in the world is Carmen San Deigo?"*10)
   1363     s_lower = s.lower
   1364     for x in _RANGE_1000:
   1365         s_lower()
   1366 
   1367 @bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
   1368        "case conversion -- dense", 1000)
   1369 def lower_conversion_dense(STR):
   1370     s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
   1371     s_lower = s.lower
   1372     for x in _RANGE_1000:
   1373         s_lower()
   1374 
   1375 
   1376 @bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
   1377        "case conversion -- rare", 1000)
   1378 def upper_conversion_rare(STR):
   1379     s = STR("Where in the world is Carmen San Deigo?"*10)
   1380     s_upper = s.upper
   1381     for x in _RANGE_1000:
   1382         s_upper()
   1383 
   1384 @bench('("where in the world is carmen san deigo?"*10).upper()',
   1385        "case conversion -- dense", 1000)
   1386 def upper_conversion_dense(STR):
   1387     s = STR("where in the world is carmen san deigo?"*10)
   1388     s_upper = s.upper
   1389     for x in _RANGE_1000:
   1390         s_upper()
   1391 
   1392 
   1393 # end of benchmarks
   1394 
   1395 #################
   1396 
   1397 class BenchTimer(timeit.Timer):
   1398     def best(self, repeat=1):
   1399         for i in range(1, 10):
   1400             number = 10**i
   1401             x = self.timeit(number)
   1402             if x > 0.02:
   1403                 break
   1404         times = [x]
   1405         for i in range(1, repeat):
   1406             times.append(self.timeit(number))
   1407         return min(times) / number
   1408 
   1409 def main():
   1410     (options, test_names) = parser.parse_args()
   1411     if options.bytes_only and options.unicode_only:
   1412         raise SystemExit("Only one of --8-bit and --unicode are allowed")
   1413 
   1414     bench_functions = []
   1415     for (k,v) in globals().items():
   1416         if hasattr(v, "is_bench"):
   1417             if test_names:
   1418                 for name in test_names:
   1419                     if name in v.group:
   1420                         break
   1421                 else:
   1422                     # Not selected, ignore
   1423                     continue
   1424             if options.skip_re and hasattr(v, "uses_re"):
   1425                 continue
   1426 
   1427             bench_functions.append( (v.group, k, v) )
   1428     bench_functions.sort()
   1429 
   1430     p("bytes\tunicode")
   1431     p("(in ms)\t(in ms)\t%\tcomment")
   1432 
   1433     bytes_total = uni_total = 0.0
   1434 
   1435     for title, group in itertools.groupby(bench_functions,
   1436                                       operator.itemgetter(0)):
   1437         # Flush buffer before each group
   1438         sys.stdout.flush()
   1439         p("="*10, title)
   1440         for (_, k, v) in group:
   1441             if hasattr(v, "is_bench"):
   1442                 bytes_time = 0.0
   1443                 bytes_time_s = " - "
   1444                 if not options.unicode_only:
   1445                     try:
   1446                         bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
   1447                                                 "import __main__").best(REPEAT)
   1448                         bytes_time_s = "%.2f" % (1000 * bytes_time)
   1449                         bytes_total += bytes_time
   1450                     except UnsupportedType:
   1451                         bytes_time_s = "N/A"
   1452                 uni_time = 0.0
   1453                 uni_time_s = " - "
   1454                 if not options.bytes_only:
   1455                     try:
   1456                         uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
   1457                                               "import __main__").best(REPEAT)
   1458                         uni_time_s = "%.2f" % (1000 * uni_time)
   1459                         uni_total += uni_time
   1460                     except UnsupportedType:
   1461                         uni_time_s = "N/A"
   1462                 try:
   1463                     average = bytes_time/uni_time
   1464                 except (TypeError, ZeroDivisionError):
   1465                     average = 0.0
   1466                 p("%s\t%s\t%.1f\t%s (*%d)" % (
   1467                     bytes_time_s, uni_time_s, 100.*average,
   1468                     v.comment, v.repeat_count))
   1469 
   1470     if bytes_total == uni_total == 0.0:
   1471         p("That was zippy!")
   1472     else:
   1473         try:
   1474             ratio = bytes_total/uni_total
   1475         except ZeroDivisionError:
   1476             ratio = 0.0
   1477         p("%.2f\t%.2f\t%.1f\t%s" % (
   1478             1000*bytes_total, 1000*uni_total, 100.*ratio,
   1479             "TOTAL"))
   1480 
   1481 if __name__ == "__main__":
   1482     main()
   1483