Home | History | Annotate | Download | only in futures
      1 """Compare the speed of downloading URLs sequentially vs. using futures."""
      2 
      3 import functools
      4 import time
      5 import timeit
      6 import sys
      7 
      8 try:
      9     from urllib2 import urlopen
     10 except ImportError:
     11     from urllib.request import urlopen
     12 
     13 from concurrent.futures import (as_completed, ThreadPoolExecutor,
     14                                 ProcessPoolExecutor)
     15 
     16 URLS = ['http://www.google.com/',
     17         'http://www.apple.com/',
     18         'http://www.ibm.com',
     19         'http://www.thisurlprobablydoesnotexist.com',
     20         'http://www.slashdot.org/',
     21         'http://www.python.org/',
     22         'http://www.bing.com/',
     23         'http://www.facebook.com/',
     24         'http://www.yahoo.com/',
     25         'http://www.youtube.com/',
     26         'http://www.blogger.com/']
     27 
     28 def load_url(url, timeout):
     29     kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
     30     return urlopen(url, **kwargs).read()
     31 
     32 def download_urls_sequential(urls, timeout=60):
     33     url_to_content = {}
     34     for url in urls:
     35         try:
     36             url_to_content[url] = load_url(url, timeout=timeout)
     37         except:
     38             pass
     39     return url_to_content
     40 
     41 def download_urls_with_executor(urls, executor, timeout=60):
     42     try:
     43         url_to_content = {}
     44         future_to_url = dict((executor.submit(load_url, url, timeout), url)
     45                              for url in urls)
     46 
     47         for future in as_completed(future_to_url):
     48             try:
     49                 url_to_content[future_to_url[future]] = future.result()
     50             except:
     51                 pass
     52         return url_to_content
     53     finally:
     54         executor.shutdown()
     55 
     56 def main():
     57     for name, fn in [('sequential',
     58                       functools.partial(download_urls_sequential, URLS)),
     59                      ('processes',
     60                       functools.partial(download_urls_with_executor,
     61                                         URLS,
     62                                         ProcessPoolExecutor(10))),
     63                      ('threads',
     64                       functools.partial(download_urls_with_executor,
     65                                         URLS,
     66                                         ThreadPoolExecutor(10)))]:
     67         sys.stdout.write('%s: ' % name.ljust(12))
     68         start = time.time()
     69         url_map = fn()
     70         sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %
     71                          (time.time() - start, len(url_map), len(URLS)))
     72 
     73 if __name__ == '__main__':
     74     main()
     75