1 # Copyright (C) 2011 Google Inc. All rights reserved. 2 # 3 # Redistribution and use in source and binary forms, with or without 4 # modification, are permitted provided that the following conditions are 5 # met: 6 # 7 # * Redistributions of source code must retain the above copyright 8 # notice, this list of conditions and the following disclaimer. 9 # * Redistributions in binary form must reproduce the above 10 # copyright notice, this list of conditions and the following disclaimer 11 # in the documentation and/or other materials provided with the 12 # distribution. 13 # * Neither the name of Google Inc. nor the names of its 14 # contributors may be used to endorse or promote products derived from 15 # this software without specific prior written permission. 16 # 17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 """Wrapper module for fetching URLs.""" 30 31 import urllib 32 33 34 class UrlFetcher(object): 35 """Class with restricted interface to fetch URLs (makes testing easier)""" 36 def __init__(self, filesystem): 37 self._filesystem = filesystem 38 39 def fetch(self, url): 40 """Fetches the contents of the URL as a string.""" 41 file_object = urllib.urlopen(url) 42 content = file_object.read() 43 file_object.close() 44 return content 45 46 def fetch_into_file(self, url): 47 """Fetches the contents of the URL into a temporary file and return the filename. 48 49 This is the equivalent of urllib.retrieve() except that we don't return any headers. 50 """ 51 file_object, filename = self._filesystem.open_binary_tempfile('-fetched') 52 contents = self.fetch(url) 53 file_object.write(contents) 54 file_object.close() 55 return filename 56