Home | History | Annotate | Download | only in python2.7
      1 """Open an arbitrary URL.
      2 
      3 See the following document for more info on URLs:
      4 "Names and Addresses, URIs, URLs, URNs, URCs", at
      5 http://www.w3.org/pub/WWW/Addressing/Overview.html
      6 
      7 See also the HTTP spec (from which the error codes are derived):
      8 "HTTP - Hypertext Transfer Protocol", at
      9 http://www.w3.org/pub/WWW/Protocols/
     10 
     11 Related standards and specs:
     12 - RFC1808: the "relative URL" spec. (authoritative status)
     13 - RFC1738 - the "URL standard". (authoritative status)
     14 - RFC1630 - the "URI spec". (informational status)
     15 
     16 The object returned by URLopener().open(file) will differ per
     17 protocol.  All you know is that is has methods read(), readline(),
     18 readlines(), fileno(), close() and info().  The read*(), fileno()
     19 and close() methods work like those of open files.
     20 The info() method returns a mimetools.Message object which can be
     21 used to query various info about the object, if available.
     22 (mimetools.Message objects are queried with the getheader() method.)
     23 """
     24 
     25 import string
     26 import socket
     27 import os
     28 import time
     29 import sys
     30 import base64
     31 import re
     32 
     33 from urlparse import urljoin as basejoin
     34 
     35 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
     36            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
     37            "urlencode", "url2pathname", "pathname2url", "splittag",
     38            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
     39            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
     40            "splitnport", "splitquery", "splitattr", "splitvalue",
     41            "getproxies"]
     42 
     43 __version__ = '1.17'    # XXX This version is not always updated :-(
     44 
     45 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
     46 
     47 # Helper for non-unix systems
     48 if os.name == 'nt':
     49     from nturl2path import url2pathname, pathname2url
     50 elif os.name == 'riscos':
     51     from rourl2path import url2pathname, pathname2url
     52 else:
     53     def url2pathname(pathname):
     54         """OS-specific conversion from a relative URL of the 'file' scheme
     55         to a file system path; not recommended for general use."""
     56         return unquote(pathname)
     57 
     58     def pathname2url(pathname):
     59         """OS-specific conversion from a file system path to a relative URL
     60         of the 'file' scheme; not recommended for general use."""
     61         return quote(pathname)
     62 
     63 # This really consists of two pieces:
     64 # (1) a class which handles opening of all sorts of URLs
     65 #     (plus assorted utilities etc.)
     66 # (2) a set of functions for parsing URLs
     67 # XXX Should these be separated out into different modules?
     68 
     69 
     70 # Shortcut for basic usage
     71 _urlopener = None
     72 def urlopen(url, data=None, proxies=None):
     73     """Create a file-like object for the specified URL to read from."""
     74     from warnings import warnpy3k
     75     warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
     76              "favor of urllib2.urlopen()", stacklevel=2)
     77 
     78     global _urlopener
     79     if proxies is not None:
     80         opener = FancyURLopener(proxies=proxies)
     81     elif not _urlopener:
     82         opener = FancyURLopener()
     83         _urlopener = opener
     84     else:
     85         opener = _urlopener
     86     if data is None:
     87         return opener.open(url)
     88     else:
     89         return opener.open(url, data)
     90 def urlretrieve(url, filename=None, reporthook=None, data=None):
     91     global _urlopener
     92     if not _urlopener:
     93         _urlopener = FancyURLopener()
     94     return _urlopener.retrieve(url, filename, reporthook, data)
     95 def urlcleanup():
     96     if _urlopener:
     97         _urlopener.cleanup()
     98     _safe_quoters.clear()
     99     ftpcache.clear()
    100 
    101 # check for SSL
    102 try:
    103     import ssl
    104 except:
    105     _have_ssl = False
    106 else:
    107     _have_ssl = True
    108 
    109 # exception raised when downloaded size does not match content-length
    110 class ContentTooShortError(IOError):
    111     def __init__(self, message, content):
    112         IOError.__init__(self, message)
    113         self.content = content
    114 
    115 ftpcache = {}
    116 class URLopener:
    117     """Class to open URLs.
    118     This is a class rather than just a subroutine because we may need
    119     more than one set of global protocol-specific options.
    120     Note -- this is a base class for those who don't want the
    121     automatic handling of errors type 302 (relocated) and 401
    122     (authorization needed)."""
    123 
    124     __tempfiles = None
    125 
    126     version = "Python-urllib/%s" % __version__
    127 
    128     # Constructor
    129     def __init__(self, proxies=None, **x509):
    130         if proxies is None:
    131             proxies = getproxies()
    132         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
    133         self.proxies = proxies
    134         self.key_file = x509.get('key_file')
    135         self.cert_file = x509.get('cert_file')
    136         self.addheaders = [('User-Agent', self.version)]
    137         self.__tempfiles = []
    138         self.__unlink = os.unlink # See cleanup()
    139         self.tempcache = None
    140         # Undocumented feature: if you assign {} to tempcache,
    141         # it is used to cache files retrieved with
    142         # self.retrieve().  This is not enabled by default
    143         # since it does not work for changing documents (and I
    144         # haven't got the logic to check expiration headers
    145         # yet).
    146         self.ftpcache = ftpcache
    147         # Undocumented feature: you can use a different
    148         # ftp cache by assigning to the .ftpcache member;
    149         # in case you want logically independent URL openers
    150         # XXX This is not threadsafe.  Bah.
    151 
    152     def __del__(self):
    153         self.close()
    154 
    155     def close(self):
    156         self.cleanup()
    157 
    158     def cleanup(self):
    159         # This code sometimes runs when the rest of this module
    160         # has already been deleted, so it can't use any globals
    161         # or import anything.
    162         if self.__tempfiles:
    163             for file in self.__tempfiles:
    164                 try:
    165                     self.__unlink(file)
    166                 except OSError:
    167                     pass
    168             del self.__tempfiles[:]
    169         if self.tempcache:
    170             self.tempcache.clear()
    171 
    172     def addheader(self, *args):
    173         """Add a header to be used by the HTTP interface only
    174         e.g. u.addheader('Accept', 'sound/basic')"""
    175         self.addheaders.append(args)
    176 
    177     # External interface
    178     def open(self, fullurl, data=None):
    179         """Use URLopener().open(file) instead of open(file, 'r')."""
    180         fullurl = unwrap(toBytes(fullurl))
    181         # percent encode url, fixing lame server errors for e.g, like space
    182         # within url paths.
    183         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
    184         if self.tempcache and fullurl in self.tempcache:
    185             filename, headers = self.tempcache[fullurl]
    186             fp = open(filename, 'rb')
    187             return addinfourl(fp, headers, fullurl)
    188         urltype, url = splittype(fullurl)
    189         if not urltype:
    190             urltype = 'file'
    191         if urltype in self.proxies:
    192             proxy = self.proxies[urltype]
    193             urltype, proxyhost = splittype(proxy)
    194             host, selector = splithost(proxyhost)
    195             url = (host, fullurl) # Signal special case to open_*()
    196         else:
    197             proxy = None
    198         name = 'open_' + urltype
    199         self.type = urltype
    200         name = name.replace('-', '_')
    201         if not hasattr(self, name):
    202             if proxy:
    203                 return self.open_unknown_proxy(proxy, fullurl, data)
    204             else:
    205                 return self.open_unknown(fullurl, data)
    206         try:
    207             if data is None:
    208                 return getattr(self, name)(url)
    209             else:
    210                 return getattr(self, name)(url, data)
    211         except socket.error, msg:
    212             raise IOError, ('socket error', msg), sys.exc_info()[2]
    213 
    214     def open_unknown(self, fullurl, data=None):
    215         """Overridable interface to open unknown URL type."""
    216         type, url = splittype(fullurl)
    217         raise IOError, ('url error', 'unknown url type', type)
    218 
    219     def open_unknown_proxy(self, proxy, fullurl, data=None):
    220         """Overridable interface to open unknown URL type."""
    221         type, url = splittype(fullurl)
    222         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
    223 
    224     # External interface
    225     def retrieve(self, url, filename=None, reporthook=None, data=None):
    226         """retrieve(url) returns (filename, headers) for a local object
    227         or (tempfilename, headers) for a remote object."""
    228         url = unwrap(toBytes(url))
    229         if self.tempcache and url in self.tempcache:
    230             return self.tempcache[url]
    231         type, url1 = splittype(url)
    232         if filename is None and (not type or type == 'file'):
    233             try:
    234                 fp = self.open_local_file(url1)
    235                 hdrs = fp.info()
    236                 fp.close()
    237                 return url2pathname(splithost(url1)[1]), hdrs
    238             except IOError:
    239                 pass
    240         fp = self.open(url, data)
    241         try:
    242             headers = fp.info()
    243             if filename:
    244                 tfp = open(filename, 'wb')
    245             else:
    246                 import tempfile
    247                 garbage, path = splittype(url)
    248                 garbage, path = splithost(path or "")
    249                 path, garbage = splitquery(path or "")
    250                 path, garbage = splitattr(path or "")
    251                 suffix = os.path.splitext(path)[1]
    252                 (fd, filename) = tempfile.mkstemp(suffix)
    253                 self.__tempfiles.append(filename)
    254                 tfp = os.fdopen(fd, 'wb')
    255             try:
    256                 result = filename, headers
    257                 if self.tempcache is not None:
    258                     self.tempcache[url] = result
    259                 bs = 1024*8
    260                 size = -1
    261                 read = 0
    262                 blocknum = 0
    263                 if "content-length" in headers:
    264                     size = int(headers["Content-Length"])
    265                 if reporthook:
    266                     reporthook(blocknum, bs, size)
    267                 while 1:
    268                     block = fp.read(bs)
    269                     if block == "":
    270                         break
    271                     read += len(block)
    272                     tfp.write(block)
    273                     blocknum += 1
    274                     if reporthook:
    275                         reporthook(blocknum, bs, size)
    276             finally:
    277                 tfp.close()
    278         finally:
    279             fp.close()
    280 
    281         # raise exception if actual size does not match content-length header
    282         if size >= 0 and read < size:
    283             raise ContentTooShortError("retrieval incomplete: got only %i out "
    284                                        "of %i bytes" % (read, size), result)
    285 
    286         return result
    287 
    288     # Each method named open_<type> knows how to open that type of URL
    289 
    290     def open_http(self, url, data=None):
    291         """Use HTTP protocol."""
    292         import httplib
    293         user_passwd = None
    294         proxy_passwd= None
    295         if isinstance(url, str):
    296             host, selector = splithost(url)
    297             if host:
    298                 user_passwd, host = splituser(host)
    299                 host = unquote(host)
    300             realhost = host
    301         else:
    302             host, selector = url
    303             # check whether the proxy contains authorization information
    304             proxy_passwd, host = splituser(host)
    305             # now we proceed with the url we want to obtain
    306             urltype, rest = splittype(selector)
    307             url = rest
    308             user_passwd = None
    309             if urltype.lower() != 'http':
    310                 realhost = None
    311             else:
    312                 realhost, rest = splithost(rest)
    313                 if realhost:
    314                     user_passwd, realhost = splituser(realhost)
    315                 if user_passwd:
    316                     selector = "%s://%s%s" % (urltype, realhost, rest)
    317                 if proxy_bypass(realhost):
    318                     host = realhost
    319 
    320             #print "proxy via http:", host, selector
    321         if not host: raise IOError, ('http error', 'no host given')
    322 
    323         if proxy_passwd:
    324             proxy_passwd = unquote(proxy_passwd)
    325             proxy_auth = base64.b64encode(proxy_passwd).strip()
    326         else:
    327             proxy_auth = None
    328 
    329         if user_passwd:
    330             user_passwd = unquote(user_passwd)
    331             auth = base64.b64encode(user_passwd).strip()
    332         else:
    333             auth = None
    334         h = httplib.HTTP(host)
    335         if data is not None:
    336             h.putrequest('POST', selector)
    337             h.putheader('Content-Type', 'application/x-www-form-urlencoded')
    338             h.putheader('Content-Length', '%d' % len(data))
    339         else:
    340             h.putrequest('GET', selector)
    341         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
    342         if auth: h.putheader('Authorization', 'Basic %s' % auth)
    343         if realhost: h.putheader('Host', realhost)
    344         for args in self.addheaders: h.putheader(*args)
    345         h.endheaders(data)
    346         errcode, errmsg, headers = h.getreply()
    347         fp = h.getfile()
    348         if errcode == -1:
    349             if fp: fp.close()
    350             # something went wrong with the HTTP status line
    351             raise IOError, ('http protocol error', 0,
    352                             'got a bad status line', None)
    353         # According to RFC 2616, "2xx" code indicates that the client's
    354         # request was successfully received, understood, and accepted.
    355         if (200 <= errcode < 300):
    356             return addinfourl(fp, headers, "http:" + url, errcode)
    357         else:
    358             if data is None:
    359                 return self.http_error(url, fp, errcode, errmsg, headers)
    360             else:
    361                 return self.http_error(url, fp, errcode, errmsg, headers, data)
    362 
    363     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
    364         """Handle http errors.
    365         Derived class can override this, or provide specific handlers
    366         named http_error_DDD where DDD is the 3-digit error code."""
    367         # First check if there's a specific handler for this error
    368         name = 'http_error_%d' % errcode
    369         if hasattr(self, name):
    370             method = getattr(self, name)
    371             if data is None:
    372                 result = method(url, fp, errcode, errmsg, headers)
    373             else:
    374                 result = method(url, fp, errcode, errmsg, headers, data)
    375             if result: return result
    376         return self.http_error_default(url, fp, errcode, errmsg, headers)
    377 
    378     def http_error_default(self, url, fp, errcode, errmsg, headers):
    379         """Default error handler: close the connection and raise IOError."""
    380         fp.close()
    381         raise IOError, ('http error', errcode, errmsg, headers)
    382 
    383     if _have_ssl:
    384         def open_https(self, url, data=None):
    385             """Use HTTPS protocol."""
    386 
    387             import httplib
    388             user_passwd = None
    389             proxy_passwd = None
    390             if isinstance(url, str):
    391                 host, selector = splithost(url)
    392                 if host:
    393                     user_passwd, host = splituser(host)
    394                     host = unquote(host)
    395                 realhost = host
    396             else:
    397                 host, selector = url
    398                 # here, we determine, whether the proxy contains authorization information
    399                 proxy_passwd, host = splituser(host)
    400                 urltype, rest = splittype(selector)
    401                 url = rest
    402                 user_passwd = None
    403                 if urltype.lower() != 'https':
    404                     realhost = None
    405                 else:
    406                     realhost, rest = splithost(rest)
    407                     if realhost:
    408                         user_passwd, realhost = splituser(realhost)
    409                     if user_passwd:
    410                         selector = "%s://%s%s" % (urltype, realhost, rest)
    411                 #print "proxy via https:", host, selector
    412             if not host: raise IOError, ('https error', 'no host given')
    413             if proxy_passwd:
    414                 proxy_passwd = unquote(proxy_passwd)
    415                 proxy_auth = base64.b64encode(proxy_passwd).strip()
    416             else:
    417                 proxy_auth = None
    418             if user_passwd:
    419                 user_passwd = unquote(user_passwd)
    420                 auth = base64.b64encode(user_passwd).strip()
    421             else:
    422                 auth = None
    423             h = httplib.HTTPS(host, 0,
    424                               key_file=self.key_file,
    425                               cert_file=self.cert_file)
    426             if data is not None:
    427                 h.putrequest('POST', selector)
    428                 h.putheader('Content-Type',
    429                             'application/x-www-form-urlencoded')
    430                 h.putheader('Content-Length', '%d' % len(data))
    431             else:
    432                 h.putrequest('GET', selector)
    433             if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
    434             if auth: h.putheader('Authorization', 'Basic %s' % auth)
    435             if realhost: h.putheader('Host', realhost)
    436             for args in self.addheaders: h.putheader(*args)
    437             h.endheaders(data)
    438             errcode, errmsg, headers = h.getreply()
    439             fp = h.getfile()
    440             if errcode == -1:
    441                 if fp: fp.close()
    442                 # something went wrong with the HTTP status line
    443                 raise IOError, ('http protocol error', 0,
    444                                 'got a bad status line', None)
    445             # According to RFC 2616, "2xx" code indicates that the client's
    446             # request was successfully received, understood, and accepted.
    447             if (200 <= errcode < 300):
    448                 return addinfourl(fp, headers, "https:" + url, errcode)
    449             else:
    450                 if data is None:
    451                     return self.http_error(url, fp, errcode, errmsg, headers)
    452                 else:
    453                     return self.http_error(url, fp, errcode, errmsg, headers,
    454                                            data)
    455 
    456     def open_file(self, url):
    457         """Use local file or FTP depending on form of URL."""
    458         if not isinstance(url, str):
    459             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
    460         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
    461             return self.open_ftp(url)
    462         else:
    463             return self.open_local_file(url)
    464 
    465     def open_local_file(self, url):
    466         """Use local file."""
    467         import mimetypes, mimetools, email.utils
    468         try:
    469             from cStringIO import StringIO
    470         except ImportError:
    471             from StringIO import StringIO
    472         host, file = splithost(url)
    473         localname = url2pathname(file)
    474         try:
    475             stats = os.stat(localname)
    476         except OSError, e:
    477             raise IOError(e.errno, e.strerror, e.filename)
    478         size = stats.st_size
    479         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
    480         mtype = mimetypes.guess_type(url)[0]
    481         headers = mimetools.Message(StringIO(
    482             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
    483             (mtype or 'text/plain', size, modified)))
    484         if not host:
    485             urlfile = file
    486             if file[:1] == '/':
    487                 urlfile = 'file://' + file
    488             elif file[:2] == './':
    489                 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
    490             return addinfourl(open(localname, 'rb'),
    491                               headers, urlfile)
    492         host, port = splitport(host)
    493         if not port \
    494            and socket.gethostbyname(host) in (localhost(), thishost()):
    495             urlfile = file
    496             if file[:1] == '/':
    497                 urlfile = 'file://' + file
    498             return addinfourl(open(localname, 'rb'),
    499                               headers, urlfile)
    500         raise IOError, ('local file error', 'not on local host')
    501 
    502     def open_ftp(self, url):
    503         """Use FTP protocol."""
    504         if not isinstance(url, str):
    505             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
    506         import mimetypes, mimetools
    507         try:
    508             from cStringIO import StringIO
    509         except ImportError:
    510             from StringIO import StringIO
    511         host, path = splithost(url)
    512         if not host: raise IOError, ('ftp error', 'no host given')
    513         host, port = splitport(host)
    514         user, host = splituser(host)
    515         if user: user, passwd = splitpasswd(user)
    516         else: passwd = None
    517         host = unquote(host)
    518         user = user or ''
    519         passwd = passwd or ''
    520         host = socket.gethostbyname(host)
    521         if not port:
    522             import ftplib
    523             port = ftplib.FTP_PORT
    524         else:
    525             port = int(port)
    526         path, attrs = splitattr(path)
    527         path = unquote(path)
    528         dirs = path.split('/')
    529         dirs, file = dirs[:-1], dirs[-1]
    530         if dirs and not dirs[0]: dirs = dirs[1:]
    531         if dirs and not dirs[0]: dirs[0] = '/'
    532         key = user, host, port, '/'.join(dirs)
    533         # XXX thread unsafe!
    534         if len(self.ftpcache) > MAXFTPCACHE:
    535             # Prune the cache, rather arbitrarily
    536             for k in self.ftpcache.keys():
    537                 if k != key:
    538                     v = self.ftpcache[k]
    539                     del self.ftpcache[k]
    540                     v.close()
    541         try:
    542             if not key in self.ftpcache:
    543                 self.ftpcache[key] = \
    544                     ftpwrapper(user, passwd, host, port, dirs)
    545             if not file: type = 'D'
    546             else: type = 'I'
    547             for attr in attrs:
    548                 attr, value = splitvalue(attr)
    549                 if attr.lower() == 'type' and \
    550                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
    551                     type = value.upper()
    552             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
    553             mtype = mimetypes.guess_type("ftp:" + url)[0]
    554             headers = ""
    555             if mtype:
    556                 headers += "Content-Type: %s\n" % mtype
    557             if retrlen is not None and retrlen >= 0:
    558                 headers += "Content-Length: %d\n" % retrlen
    559             headers = mimetools.Message(StringIO(headers))
    560             return addinfourl(fp, headers, "ftp:" + url)
    561         except ftperrors(), msg:
    562             raise IOError, ('ftp error', msg), sys.exc_info()[2]
    563 
    564     def open_data(self, url, data=None):
    565         """Use "data" URL."""
    566         if not isinstance(url, str):
    567             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
    568         # ignore POSTed data
    569         #
    570         # syntax of data URLs:
    571         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
    572         # mediatype := [ type "/" subtype ] *( ";" parameter )
    573         # data      := *urlchar
    574         # parameter := attribute "=" value
    575         import mimetools
    576         try:
    577             from cStringIO import StringIO
    578         except ImportError:
    579             from StringIO import StringIO
    580         try:
    581             [type, data] = url.split(',', 1)
    582         except ValueError:
    583             raise IOError, ('data error', 'bad data URL')
    584         if not type:
    585             type = 'text/plain;charset=US-ASCII'
    586         semi = type.rfind(';')
    587         if semi >= 0 and '=' not in type[semi:]:
    588             encoding = type[semi+1:]
    589             type = type[:semi]
    590         else:
    591             encoding = ''
    592         msg = []
    593         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
    594                                             time.gmtime(time.time())))
    595         msg.append('Content-type: %s' % type)
    596         if encoding == 'base64':
    597             data = base64.decodestring(data)
    598         else:
    599             data = unquote(data)
    600         msg.append('Content-Length: %d' % len(data))
    601         msg.append('')
    602         msg.append(data)
    603         msg = '\n'.join(msg)
    604         f = StringIO(msg)
    605         headers = mimetools.Message(f, 0)
    606         #f.fileno = None     # needed for addinfourl
    607         return addinfourl(f, headers, url)
    608 
    609 
    610 class FancyURLopener(URLopener):
    611     """Derived class with handlers for errors we can handle (perhaps)."""
    612 
    613     def __init__(self, *args, **kwargs):
    614         URLopener.__init__(self, *args, **kwargs)
    615         self.auth_cache = {}
    616         self.tries = 0
    617         self.maxtries = 10
    618 
    619     def http_error_default(self, url, fp, errcode, errmsg, headers):
    620         """Default error handling -- don't raise an exception."""
    621         return addinfourl(fp, headers, "http:" + url, errcode)
    622 
    623     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
    624         """Error 302 -- relocated (temporarily)."""
    625         self.tries += 1
    626         if self.maxtries and self.tries >= self.maxtries:
    627             if hasattr(self, "http_error_500"):
    628                 meth = self.http_error_500
    629             else:
    630                 meth = self.http_error_default
    631             self.tries = 0
    632             return meth(url, fp, 500,
    633                         "Internal Server Error: Redirect Recursion", headers)
    634         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
    635                                         data)
    636         self.tries = 0
    637         return result
    638 
    639     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
    640         if 'location' in headers:
    641             newurl = headers['location']
    642         elif 'uri' in headers:
    643             newurl = headers['uri']
    644         else:
    645             return
    646         fp.close()
    647         # In case the server sent a relative URL, join with original:
    648         newurl = basejoin(self.type + ":" + url, newurl)
    649 
    650         # For security reasons we do not allow redirects to protocols
    651         # other than HTTP, HTTPS or FTP.
    652         newurl_lower = newurl.lower()
    653         if not (newurl_lower.startswith('http://') or
    654                 newurl_lower.startswith('https://') or
    655                 newurl_lower.startswith('ftp://')):
    656             raise IOError('redirect error', errcode,
    657                           errmsg + " - Redirection to url '%s' is not allowed" %
    658                           newurl,
    659                           headers)
    660 
    661         return self.open(newurl)
    662 
    663     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
    664         """Error 301 -- also relocated (permanently)."""
    665         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
    666 
    667     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
    668         """Error 303 -- also relocated (essentially identical to 302)."""
    669         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
    670 
    671     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
    672         """Error 307 -- relocated, but turn POST into error."""
    673         if data is None:
    674             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
    675         else:
    676             return self.http_error_default(url, fp, errcode, errmsg, headers)
    677 
    678     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
    679         """Error 401 -- authentication required.
    680         This function supports Basic authentication only."""
    681         if not 'www-authenticate' in headers:
    682             URLopener.http_error_default(self, url, fp,
    683                                          errcode, errmsg, headers)
    684         stuff = headers['www-authenticate']
    685         import re
    686         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
    687         if not match:
    688             URLopener.http_error_default(self, url, fp,
    689                                          errcode, errmsg, headers)
    690         scheme, realm = match.groups()
    691         if scheme.lower() != 'basic':
    692             URLopener.http_error_default(self, url, fp,
    693                                          errcode, errmsg, headers)
    694         name = 'retry_' + self.type + '_basic_auth'
    695         if data is None:
    696             return getattr(self,name)(url, realm)
    697         else:
    698             return getattr(self,name)(url, realm, data)
    699 
    700     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
    701         """Error 407 -- proxy authentication required.
    702         This function supports Basic authentication only."""
    703         if not 'proxy-authenticate' in headers:
    704             URLopener.http_error_default(self, url, fp,
    705                                          errcode, errmsg, headers)
    706         stuff = headers['proxy-authenticate']
    707         import re
    708         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
    709         if not match:
    710             URLopener.http_error_default(self, url, fp,
    711                                          errcode, errmsg, headers)
    712         scheme, realm = match.groups()
    713         if scheme.lower() != 'basic':
    714             URLopener.http_error_default(self, url, fp,
    715                                          errcode, errmsg, headers)
    716         name = 'retry_proxy_' + self.type + '_basic_auth'
    717         if data is None:
    718             return getattr(self,name)(url, realm)
    719         else:
    720             return getattr(self,name)(url, realm, data)
    721 
    722     def retry_proxy_http_basic_auth(self, url, realm, data=None):
    723         host, selector = splithost(url)
    724         newurl = 'http://' + host + selector
    725         proxy = self.proxies['http']
    726         urltype, proxyhost = splittype(proxy)
    727         proxyhost, proxyselector = splithost(proxyhost)
    728         i = proxyhost.find('@') + 1
    729         proxyhost = proxyhost[i:]
    730         user, passwd = self.get_user_passwd(proxyhost, realm, i)
    731         if not (user or passwd): return None
    732         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
    733         self.proxies['http'] = 'http://' + proxyhost + proxyselector
    734         if data is None:
    735             return self.open(newurl)
    736         else:
    737             return self.open(newurl, data)
    738 
    739     def retry_proxy_https_basic_auth(self, url, realm, data=None):
    740         host, selector = splithost(url)
    741         newurl = 'https://' + host + selector
    742         proxy = self.proxies['https']
    743         urltype, proxyhost = splittype(proxy)
    744         proxyhost, proxyselector = splithost(proxyhost)
    745         i = proxyhost.find('@') + 1
    746         proxyhost = proxyhost[i:]
    747         user, passwd = self.get_user_passwd(proxyhost, realm, i)
    748         if not (user or passwd): return None
    749         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
    750         self.proxies['https'] = 'https://' + proxyhost + proxyselector
    751         if data is None:
    752             return self.open(newurl)
    753         else:
    754             return self.open(newurl, data)
    755 
    756     def retry_http_basic_auth(self, url, realm, data=None):
    757         host, selector = splithost(url)
    758         i = host.find('@') + 1
    759         host = host[i:]
    760         user, passwd = self.get_user_passwd(host, realm, i)
    761         if not (user or passwd): return None
    762         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
    763         newurl = 'http://' + host + selector
    764         if data is None:
    765             return self.open(newurl)
    766         else:
    767             return self.open(newurl, data)
    768 
    769     def retry_https_basic_auth(self, url, realm, data=None):
    770         host, selector = splithost(url)
    771         i = host.find('@') + 1
    772         host = host[i:]
    773         user, passwd = self.get_user_passwd(host, realm, i)
    774         if not (user or passwd): return None
    775         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
    776         newurl = 'https://' + host + selector
    777         if data is None:
    778             return self.open(newurl)
    779         else:
    780             return self.open(newurl, data)
    781 
    782     def get_user_passwd(self, host, realm, clear_cache=0):
    783         key = realm + '@' + host.lower()
    784         if key in self.auth_cache:
    785             if clear_cache:
    786                 del self.auth_cache[key]
    787             else:
    788                 return self.auth_cache[key]
    789         user, passwd = self.prompt_user_passwd(host, realm)
    790         if user or passwd: self.auth_cache[key] = (user, passwd)
    791         return user, passwd
    792 
    793     def prompt_user_passwd(self, host, realm):
    794         """Override this in a GUI environment!"""
    795         import getpass
    796         try:
    797             user = raw_input("Enter username for %s at %s: " % (realm,
    798                                                                 host))
    799             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
    800                 (user, realm, host))
    801             return user, passwd
    802         except KeyboardInterrupt:
    803             print
    804             return None, None
    805 
    806 
    807 # Utility functions
    808 
    809 _localhost = None
    810 def localhost():
    811     """Return the IP address of the magic hostname 'localhost'."""
    812     global _localhost
    813     if _localhost is None:
    814         _localhost = socket.gethostbyname('localhost')
    815     return _localhost
    816 
    817 _thishost = None
    818 def thishost():
    819     """Return the IP address of the current host."""
    820     global _thishost
    821     if _thishost is None:
    822         _thishost = socket.gethostbyname(socket.gethostname())
    823     return _thishost
    824 
    825 _ftperrors = None
    826 def ftperrors():
    827     """Return the set of errors raised by the FTP class."""
    828     global _ftperrors
    829     if _ftperrors is None:
    830         import ftplib
    831         _ftperrors = ftplib.all_errors
    832     return _ftperrors
    833 
    834 _noheaders = None
    835 def noheaders():
    836     """Return an empty mimetools.Message object."""
    837     global _noheaders
    838     if _noheaders is None:
    839         import mimetools
    840         try:
    841             from cStringIO import StringIO
    842         except ImportError:
    843             from StringIO import StringIO
    844         _noheaders = mimetools.Message(StringIO(), 0)
    845         _noheaders.fp.close()   # Recycle file descriptor
    846     return _noheaders
    847 
    848 
    849 # Utility classes
    850 
    851 class ftpwrapper:
    852     """Class used by open_ftp() for cache of open FTP connections."""
    853 
    854     def __init__(self, user, passwd, host, port, dirs,
    855                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
    856                  persistent=True):
    857         self.user = user
    858         self.passwd = passwd
    859         self.host = host
    860         self.port = port
    861         self.dirs = dirs
    862         self.timeout = timeout
    863         self.refcount = 0
    864         self.keepalive = persistent
    865         self.init()
    866 
    867     def init(self):
    868         import ftplib
    869         self.busy = 0
    870         self.ftp = ftplib.FTP()
    871         self.ftp.connect(self.host, self.port, self.timeout)
    872         self.ftp.login(self.user, self.passwd)
    873         for dir in self.dirs:
    874             self.ftp.cwd(dir)
    875 
    876     def retrfile(self, file, type):
    877         import ftplib
    878         self.endtransfer()
    879         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
    880         else: cmd = 'TYPE ' + type; isdir = 0
    881         try:
    882             self.ftp.voidcmd(cmd)
    883         except ftplib.all_errors:
    884             self.init()
    885             self.ftp.voidcmd(cmd)
    886         conn = None
    887         if file and not isdir:
    888             # Try to retrieve as a file
    889             try:
    890                 cmd = 'RETR ' + file
    891                 conn, retrlen = self.ftp.ntransfercmd(cmd)
    892             except ftplib.error_perm, reason:
    893                 if str(reason)[:3] != '550':
    894                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
    895         if not conn:
    896             # Set transfer mode to ASCII!
    897             self.ftp.voidcmd('TYPE A')
    898             # Try a directory listing. Verify that directory exists.
    899             if file:
    900                 pwd = self.ftp.pwd()
    901                 try:
    902                     try:
    903                         self.ftp.cwd(file)
    904                     except ftplib.error_perm, reason:
    905                         raise IOError, ('ftp error', reason), sys.exc_info()[2]
    906                 finally:
    907                     self.ftp.cwd(pwd)
    908                 cmd = 'LIST ' + file
    909             else:
    910                 cmd = 'LIST'
    911             conn, retrlen = self.ftp.ntransfercmd(cmd)
    912         self.busy = 1
    913         ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
    914         self.refcount += 1
    915         conn.close()
    916         # Pass back both a suitably decorated object and a retrieval length
    917         return (ftpobj, retrlen)
    918 
    919     def endtransfer(self):
    920         if not self.busy:
    921             return
    922         self.busy = 0
    923         try:
    924             self.ftp.voidresp()
    925         except ftperrors():
    926             pass
    927 
    928     def close(self):
    929         self.keepalive = False
    930         if self.refcount <= 0:
    931             self.real_close()
    932 
    933     def file_close(self):
    934         self.endtransfer()
    935         self.refcount -= 1
    936         if self.refcount <= 0 and not self.keepalive:
    937             self.real_close()
    938 
    939     def real_close(self):
    940         self.endtransfer()
    941         try:
    942             self.ftp.close()
    943         except ftperrors():
    944             pass
    945 
    946 class addbase:
    947     """Base class for addinfo and addclosehook."""
    948 
    949     def __init__(self, fp):
    950         self.fp = fp
    951         self.read = self.fp.read
    952         self.readline = self.fp.readline
    953         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
    954         if hasattr(self.fp, "fileno"):
    955             self.fileno = self.fp.fileno
    956         else:
    957             self.fileno = lambda: None
    958         if hasattr(self.fp, "__iter__"):
    959             self.__iter__ = self.fp.__iter__
    960             if hasattr(self.fp, "next"):
    961                 self.next = self.fp.next
    962 
    963     def __repr__(self):
    964         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
    965                                              id(self), self.fp)
    966 
    967     def close(self):
    968         self.read = None
    969         self.readline = None
    970         self.readlines = None
    971         self.fileno = None
    972         if self.fp: self.fp.close()
    973         self.fp = None
    974 
    975 class addclosehook(addbase):
    976     """Class to add a close hook to an open file."""
    977 
    978     def __init__(self, fp, closehook, *hookargs):
    979         addbase.__init__(self, fp)
    980         self.closehook = closehook
    981         self.hookargs = hookargs
    982 
    983     def close(self):
    984         if self.closehook:
    985             self.closehook(*self.hookargs)
    986             self.closehook = None
    987             self.hookargs = None
    988         addbase.close(self)
    989 
    990 class addinfo(addbase):
    991     """class to add an info() method to an open file."""
    992 
    993     def __init__(self, fp, headers):
    994         addbase.__init__(self, fp)
    995         self.headers = headers
    996 
    997     def info(self):
    998         return self.headers
    999 
   1000 class addinfourl(addbase):
   1001     """class to add info() and geturl() methods to an open file."""
   1002 
   1003     def __init__(self, fp, headers, url, code=None):
   1004         addbase.__init__(self, fp)
   1005         self.headers = headers
   1006         self.url = url
   1007         self.code = code
   1008 
   1009     def info(self):
   1010         return self.headers
   1011 
   1012     def getcode(self):
   1013         return self.code
   1014 
   1015     def geturl(self):
   1016         return self.url
   1017 
   1018 
   1019 # Utilities to parse URLs (most of these return None for missing parts):
   1020 # unwrap('<URL:type://host/path>') --> 'type://host/path'
   1021 # splittype('type:opaquestring') --> 'type', 'opaquestring'
   1022 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
   1023 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
   1024 # splitpasswd('user:passwd') -> 'user', 'passwd'
   1025 # splitport('host:port') --> 'host', 'port'
   1026 # splitquery('/path?query') --> '/path', 'query'
   1027 # splittag('/path#tag') --> '/path', 'tag'
   1028 # splitattr('/path;attr1=value1;attr2=value2;...') ->
   1029 #   '/path', ['attr1=value1', 'attr2=value2', ...]
   1030 # splitvalue('attr=value') --> 'attr', 'value'
   1031 # unquote('abc%20def') -> 'abc def'
   1032 # quote('abc def') -> 'abc%20def')
   1033 
   1034 try:
   1035     unicode
   1036 except NameError:
   1037     def _is_unicode(x):
   1038         return 0
   1039 else:
   1040     def _is_unicode(x):
   1041         return isinstance(x, unicode)
   1042 
   1043 def toBytes(url):
   1044     """toBytes(u"URL") --> 'URL'."""
   1045     # Most URL schemes require ASCII. If that changes, the conversion
   1046     # can be relaxed
   1047     if _is_unicode(url):
   1048         try:
   1049             url = url.encode("ASCII")
   1050         except UnicodeError:
   1051             raise UnicodeError("URL " + repr(url) +
   1052                                " contains non-ASCII characters")
   1053     return url
   1054 
   1055 def unwrap(url):
   1056     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
   1057     url = url.strip()
   1058     if url[:1] == '<' and url[-1:] == '>':
   1059         url = url[1:-1].strip()
   1060     if url[:4] == 'URL:': url = url[4:].strip()
   1061     return url
   1062 
   1063 _typeprog = None
   1064 def splittype(url):
   1065     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
   1066     global _typeprog
   1067     if _typeprog is None:
   1068         import re
   1069         _typeprog = re.compile('^([^/:]+):')
   1070 
   1071     match = _typeprog.match(url)
   1072     if match:
   1073         scheme = match.group(1)
   1074         return scheme.lower(), url[len(scheme) + 1:]
   1075     return None, url
   1076 
   1077 _hostprog = None
   1078 def splithost(url):
   1079     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
   1080     global _hostprog
   1081     if _hostprog is None:
   1082         import re
   1083         _hostprog = re.compile('^//([^/?]*)(.*)$')
   1084 
   1085     match = _hostprog.match(url)
   1086     if match:
   1087         host_port = match.group(1)
   1088         path = match.group(2)
   1089         if path and not path.startswith('/'):
   1090             path = '/' + path
   1091         return host_port, path
   1092     return None, url
   1093 
   1094 _userprog = None
   1095 def splituser(host):
   1096     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
   1097     global _userprog
   1098     if _userprog is None:
   1099         import re
   1100         _userprog = re.compile('^(.*)@(.*)$')
   1101 
   1102     match = _userprog.match(host)
   1103     if match: return match.group(1, 2)
   1104     return None, host
   1105 
   1106 _passwdprog = None
   1107 def splitpasswd(user):
   1108     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
   1109     global _passwdprog
   1110     if _passwdprog is None:
   1111         import re
   1112         _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
   1113 
   1114     match = _passwdprog.match(user)
   1115     if match: return match.group(1, 2)
   1116     return user, None
   1117 
   1118 # splittag('/path#tag') --> '/path', 'tag'
   1119 _portprog = None
   1120 def splitport(host):
   1121     """splitport('host:port') --> 'host', 'port'."""
   1122     global _portprog
   1123     if _portprog is None:
   1124         import re
   1125         _portprog = re.compile('^(.*):([0-9]+)$')
   1126 
   1127     match = _portprog.match(host)
   1128     if match: return match.group(1, 2)
   1129     return host, None
   1130 
   1131 _nportprog = None
   1132 def splitnport(host, defport=-1):
   1133     """Split host and port, returning numeric port.
   1134     Return given default port if no ':' found; defaults to -1.
   1135     Return numerical port if a valid number are found after ':'.
   1136     Return None if ':' but not a valid number."""
   1137     global _nportprog
   1138     if _nportprog is None:
   1139         import re
   1140         _nportprog = re.compile('^(.*):(.*)$')
   1141 
   1142     match = _nportprog.match(host)
   1143     if match:
   1144         host, port = match.group(1, 2)
   1145         try:
   1146             if not port: raise ValueError, "no digits"
   1147             nport = int(port)
   1148         except ValueError:
   1149             nport = None
   1150         return host, nport
   1151     return host, defport
   1152 
   1153 _queryprog = None
   1154 def splitquery(url):
   1155     """splitquery('/path?query') --> '/path', 'query'."""
   1156     global _queryprog
   1157     if _queryprog is None:
   1158         import re
   1159         _queryprog = re.compile('^(.*)\?([^?]*)$')
   1160 
   1161     match = _queryprog.match(url)
   1162     if match: return match.group(1, 2)
   1163     return url, None
   1164 
   1165 _tagprog = None
   1166 def splittag(url):
   1167     """splittag('/path#tag') --> '/path', 'tag'."""
   1168     global _tagprog
   1169     if _tagprog is None:
   1170         import re
   1171         _tagprog = re.compile('^(.*)#([^#]*)$')
   1172 
   1173     match = _tagprog.match(url)
   1174     if match: return match.group(1, 2)
   1175     return url, None
   1176 
   1177 def splitattr(url):
   1178     """splitattr('/path;attr1=value1;attr2=value2;...') ->
   1179         '/path', ['attr1=value1', 'attr2=value2', ...]."""
   1180     words = url.split(';')
   1181     return words[0], words[1:]
   1182 
   1183 _valueprog = None
   1184 def splitvalue(attr):
   1185     """splitvalue('attr=value') --> 'attr', 'value'."""
   1186     global _valueprog
   1187     if _valueprog is None:
   1188         import re
   1189         _valueprog = re.compile('^([^=]*)=(.*)$')
   1190 
   1191     match = _valueprog.match(attr)
   1192     if match: return match.group(1, 2)
   1193     return attr, None
   1194 
   1195 # urlparse contains a duplicate of this method to avoid a circular import.  If
   1196 # you update this method, also update the copy in urlparse.  This code
   1197 # duplication does not exist in Python3.
   1198 
   1199 _hexdig = '0123456789ABCDEFabcdef'
   1200 _hextochr = dict((a + b, chr(int(a + b, 16)))
   1201                  for a in _hexdig for b in _hexdig)
   1202 _asciire = re.compile('([\x00-\x7f]+)')
   1203 
   1204 def unquote(s):
   1205     """unquote('abc%20def') -> 'abc def'."""
   1206     if _is_unicode(s):
   1207         if '%' not in s:
   1208             return s
   1209         bits = _asciire.split(s)
   1210         res = [bits[0]]
   1211         append = res.append
   1212         for i in range(1, len(bits), 2):
   1213             append(unquote(str(bits[i])).decode('latin1'))
   1214             append(bits[i + 1])
   1215         return ''.join(res)
   1216 
   1217     bits = s.split('%')
   1218     # fastpath
   1219     if len(bits) == 1:
   1220         return s
   1221     res = [bits[0]]
   1222     append = res.append
   1223     for item in bits[1:]:
   1224         try:
   1225             append(_hextochr[item[:2]])
   1226             append(item[2:])
   1227         except KeyError:
   1228             append('%')
   1229             append(item)
   1230     return ''.join(res)
   1231 
   1232 def unquote_plus(s):
   1233     """unquote('%7e/abc+def') -> '~/abc def'"""
   1234     s = s.replace('+', ' ')
   1235     return unquote(s)
   1236 
   1237 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
   1238                'abcdefghijklmnopqrstuvwxyz'
   1239                '0123456789' '_.-')
   1240 _safe_map = {}
   1241 for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
   1242     _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
   1243 _safe_quoters = {}
   1244 
   1245 def quote(s, safe='/'):
   1246     """quote('abc def') -> 'abc%20def'
   1247 
   1248     Each part of a URL, e.g. the path info, the query, etc., has a
   1249     different set of reserved characters that must be quoted.
   1250 
   1251     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
   1252     the following reserved characters.
   1253 
   1254     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
   1255                   "$" | ","
   1256 
   1257     Each of these characters is reserved in some component of a URL,
   1258     but not necessarily in all of them.
   1259 
   1260     By default, the quote function is intended for quoting the path
   1261     section of a URL.  Thus, it will not encode '/'.  This character
   1262     is reserved, but in typical usage the quote function is being
   1263     called on a path where the existing slash characters are used as
   1264     reserved characters.
   1265     """
   1266     # fastpath
   1267     if not s:
   1268         if s is None:
   1269             raise TypeError('None object cannot be quoted')
   1270         return s
   1271     cachekey = (safe, always_safe)
   1272     try:
   1273         (quoter, safe) = _safe_quoters[cachekey]
   1274     except KeyError:
   1275         safe_map = _safe_map.copy()
   1276         safe_map.update([(c, c) for c in safe])
   1277         quoter = safe_map.__getitem__
   1278         safe = always_safe + safe
   1279         _safe_quoters[cachekey] = (quoter, safe)
   1280     if not s.rstrip(safe):
   1281         return s
   1282     return ''.join(map(quoter, s))
   1283 
   1284 def quote_plus(s, safe=''):
   1285     """Quote the query fragment of a URL; replacing ' ' with '+'"""
   1286     if ' ' in s:
   1287         s = quote(s, safe + ' ')
   1288         return s.replace(' ', '+')
   1289     return quote(s, safe)
   1290 
   1291 def urlencode(query, doseq=0):
   1292     """Encode a sequence of two-element tuples or dictionary into a URL query string.
   1293 
   1294     If any values in the query arg are sequences and doseq is true, each
   1295     sequence element is converted to a separate parameter.
   1296 
   1297     If the query arg is a sequence of two-element tuples, the order of the
   1298     parameters in the output will match the order of parameters in the
   1299     input.
   1300     """
   1301 
   1302     if hasattr(query,"items"):
   1303         # mapping objects
   1304         query = query.items()
   1305     else:
   1306         # it's a bother at times that strings and string-like objects are
   1307         # sequences...
   1308         try:
   1309             # non-sequence items should not work with len()
   1310             # non-empty strings will fail this
   1311             if len(query) and not isinstance(query[0], tuple):
   1312                 raise TypeError
   1313             # zero-length sequences of all types will get here and succeed,
   1314             # but that's a minor nit - since the original implementation
   1315             # allowed empty dicts that type of behavior probably should be
   1316             # preserved for consistency
   1317         except TypeError:
   1318             ty,va,tb = sys.exc_info()
   1319             raise TypeError, "not a valid non-string sequence or mapping object", tb
   1320 
   1321     l = []
   1322     if not doseq:
   1323         # preserve old behavior
   1324         for k, v in query:
   1325             k = quote_plus(str(k))
   1326             v = quote_plus(str(v))
   1327             l.append(k + '=' + v)
   1328     else:
   1329         for k, v in query:
   1330             k = quote_plus(str(k))
   1331             if isinstance(v, str):
   1332                 v = quote_plus(v)
   1333                 l.append(k + '=' + v)
   1334             elif _is_unicode(v):
   1335                 # is there a reasonable way to convert to ASCII?
   1336                 # encode generates a string, but "replace" or "ignore"
   1337                 # lose information and "strict" can raise UnicodeError
   1338                 v = quote_plus(v.encode("ASCII","replace"))
   1339                 l.append(k + '=' + v)
   1340             else:
   1341                 try:
   1342                     # is this a sufficient test for sequence-ness?
   1343                     len(v)
   1344                 except TypeError:
   1345                     # not a sequence
   1346                     v = quote_plus(str(v))
   1347                     l.append(k + '=' + v)
   1348                 else:
   1349                     # loop over the sequence
   1350                     for elt in v:
   1351                         l.append(k + '=' + quote_plus(str(elt)))
   1352     return '&'.join(l)
   1353 
   1354 # Proxy handling
   1355 def getproxies_environment():
   1356     """Return a dictionary of scheme -> proxy server URL mappings.
   1357 
   1358     Scan the environment for variables named <scheme>_proxy;
   1359     this seems to be the standard convention.  If you need a
   1360     different way, you can pass a proxies dictionary to the
   1361     [Fancy]URLopener constructor.
   1362 
   1363     """
   1364     proxies = {}
   1365     for name, value in os.environ.items():
   1366         name = name.lower()
   1367         if value and name[-6:] == '_proxy':
   1368             proxies[name[:-6]] = value
   1369     return proxies
   1370 
   1371 def proxy_bypass_environment(host):
   1372     """Test if proxies should not be used for a particular host.
   1373 
   1374     Checks the environment for a variable named no_proxy, which should
   1375     be a list of DNS suffixes separated by commas, or '*' for all hosts.
   1376     """
   1377     no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
   1378     # '*' is special case for always bypass
   1379     if no_proxy == '*':
   1380         return 1
   1381     # strip port off host
   1382     hostonly, port = splitport(host)
   1383     # check if the host ends with any of the DNS suffixes
   1384     no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
   1385     for name in no_proxy_list:
   1386         if name and (hostonly.endswith(name) or host.endswith(name)):
   1387             return 1
   1388     # otherwise, don't bypass
   1389     return 0
   1390 
   1391 
   1392 if sys.platform == 'darwin':
   1393     from _scproxy import _get_proxy_settings, _get_proxies
   1394 
   1395     def proxy_bypass_macosx_sysconf(host):
   1396         """
   1397         Return True iff this host shouldn't be accessed using a proxy
   1398 
   1399         This function uses the MacOSX framework SystemConfiguration
   1400         to fetch the proxy information.
   1401         """
   1402         import re
   1403         import socket
   1404         from fnmatch import fnmatch
   1405 
   1406         hostonly, port = splitport(host)
   1407 
   1408         def ip2num(ipAddr):
   1409             parts = ipAddr.split('.')
   1410             parts = map(int, parts)
   1411             if len(parts) != 4:
   1412                 parts = (parts + [0, 0, 0, 0])[:4]
   1413             return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
   1414 
   1415         proxy_settings = _get_proxy_settings()
   1416 
   1417         # Check for simple host names:
   1418         if '.' not in host:
   1419             if proxy_settings['exclude_simple']:
   1420                 return True
   1421 
   1422         hostIP = None
   1423 
   1424         for value in proxy_settings.get('exceptions', ()):
   1425             # Items in the list are strings like these: *.local, 169.254/16
   1426             if not value: continue
   1427 
   1428             m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
   1429             if m is not None:
   1430                 if hostIP is None:
   1431                     try:
   1432                         hostIP = socket.gethostbyname(hostonly)
   1433                         hostIP = ip2num(hostIP)
   1434                     except socket.error:
   1435                         continue
   1436 
   1437                 base = ip2num(m.group(1))
   1438                 mask = m.group(2)
   1439                 if mask is None:
   1440                     mask = 8 * (m.group(1).count('.') + 1)
   1441 
   1442                 else:
   1443                     mask = int(mask[1:])
   1444                 mask = 32 - mask
   1445 
   1446                 if (hostIP >> mask) == (base >> mask):
   1447                     return True
   1448 
   1449             elif fnmatch(host, value):
   1450                 return True
   1451 
   1452         return False
   1453 
   1454     def getproxies_macosx_sysconf():
   1455         """Return a dictionary of scheme -> proxy server URL mappings.
   1456 
   1457         This function uses the MacOSX framework SystemConfiguration
   1458         to fetch the proxy information.
   1459         """
   1460         return _get_proxies()
   1461 
   1462     def proxy_bypass(host):
   1463         if getproxies_environment():
   1464             return proxy_bypass_environment(host)
   1465         else:
   1466             return proxy_bypass_macosx_sysconf(host)
   1467 
   1468     def getproxies():
   1469         return getproxies_environment() or getproxies_macosx_sysconf()
   1470 
   1471 elif os.name == 'nt':
   1472     def getproxies_registry():
   1473         """Return a dictionary of scheme -> proxy server URL mappings.
   1474 
   1475         Win32 uses the registry to store proxies.
   1476 
   1477         """
   1478         proxies = {}
   1479         try:
   1480             import _winreg
   1481         except ImportError:
   1482             # Std module, so should be around - but you never know!
   1483             return proxies
   1484         try:
   1485             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
   1486                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
   1487             proxyEnable = _winreg.QueryValueEx(internetSettings,
   1488                                                'ProxyEnable')[0]
   1489             if proxyEnable:
   1490                 # Returned as Unicode but problems if not converted to ASCII
   1491                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
   1492                                                        'ProxyServer')[0])
   1493                 if '=' in proxyServer:
   1494                     # Per-protocol settings
   1495                     for p in proxyServer.split(';'):
   1496                         protocol, address = p.split('=', 1)
   1497                         # See if address has a type:// prefix
   1498                         import re
   1499                         if not re.match('^([^/:]+)://', address):
   1500                             address = '%s://%s' % (protocol, address)
   1501                         proxies[protocol] = address
   1502                 else:
   1503                     # Use one setting for all protocols
   1504                     if proxyServer[:5] == 'http:':
   1505                         proxies['http'] = proxyServer
   1506                     else:
   1507                         proxies['http'] = 'http://%s' % proxyServer
   1508                         proxies['https'] = 'https://%s' % proxyServer
   1509                         proxies['ftp'] = 'ftp://%s' % proxyServer
   1510             internetSettings.Close()
   1511         except (WindowsError, ValueError, TypeError):
   1512             # Either registry key not found etc, or the value in an
   1513             # unexpected format.
   1514             # proxies already set up to be empty so nothing to do
   1515             pass
   1516         return proxies
   1517 
   1518     def getproxies():
   1519         """Return a dictionary of scheme -> proxy server URL mappings.
   1520 
   1521         Returns settings gathered from the environment, if specified,
   1522         or the registry.
   1523 
   1524         """
   1525         return getproxies_environment() or getproxies_registry()
   1526 
   1527     def proxy_bypass_registry(host):
   1528         try:
   1529             import _winreg
   1530             import re
   1531         except ImportError:
   1532             # Std modules, so should be around - but you never know!
   1533             return 0
   1534         try:
   1535             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
   1536                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
   1537             proxyEnable = _winreg.QueryValueEx(internetSettings,
   1538                                                'ProxyEnable')[0]
   1539             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
   1540                                                      'ProxyOverride')[0])
   1541             # ^^^^ Returned as Unicode but problems if not converted to ASCII
   1542         except WindowsError:
   1543             return 0
   1544         if not proxyEnable or not proxyOverride:
   1545             return 0
   1546         # try to make a host list from name and IP address.
   1547         rawHost, port = splitport(host)
   1548         host = [rawHost]
   1549         try:
   1550             addr = socket.gethostbyname(rawHost)
   1551             if addr != rawHost:
   1552                 host.append(addr)
   1553         except socket.error:
   1554             pass
   1555         try:
   1556             fqdn = socket.getfqdn(rawHost)
   1557             if fqdn != rawHost:
   1558                 host.append(fqdn)
   1559         except socket.error:
   1560             pass
   1561         # make a check value list from the registry entry: replace the
   1562         # '<local>' string by the localhost entry and the corresponding
   1563         # canonical entry.
   1564         proxyOverride = proxyOverride.split(';')
   1565         # now check if we match one of the registry values.
   1566         for test in proxyOverride:
   1567             if test == '<local>':
   1568                 if '.' not in rawHost:
   1569                     return 1
   1570             test = test.replace(".", r"\.")     # mask dots
   1571             test = test.replace("*", r".*")     # change glob sequence
   1572             test = test.replace("?", r".")      # change glob char
   1573             for val in host:
   1574                 # print "%s <--> %s" %( test, val )
   1575                 if re.match(test, val, re.I):
   1576                     return 1
   1577         return 0
   1578 
   1579     def proxy_bypass(host):
   1580         """Return a dictionary of scheme -> proxy server URL mappings.
   1581 
   1582         Returns settings gathered from the environment, if specified,
   1583         or the registry.
   1584 
   1585         """
   1586         if getproxies_environment():
   1587             return proxy_bypass_environment(host)
   1588         else:
   1589             return proxy_bypass_registry(host)
   1590 
   1591 else:
   1592     # By default use environment variables
   1593     getproxies = getproxies_environment
   1594     proxy_bypass = proxy_bypass_environment
   1595 
   1596 # Test and time quote() and unquote()
   1597 def test1():
   1598     s = ''
   1599     for i in range(256): s = s + chr(i)
   1600     s = s*4
   1601     t0 = time.time()
   1602     qs = quote(s)
   1603     uqs = unquote(qs)
   1604     t1 = time.time()
   1605     if uqs != s:
   1606         print 'Wrong!'
   1607     print repr(s)
   1608     print repr(qs)
   1609     print repr(uqs)
   1610     print round(t1 - t0, 3), 'sec'
   1611 
   1612 
   1613 def reporthook(blocknum, blocksize, totalsize):
   1614     # Report during remote transfers
   1615     print "Block number: %d, Block size: %d, Total size: %d" % (
   1616         blocknum, blocksize, totalsize)
   1617