Home | History | Annotate | Download | only in urllib
      1 """An extensible library for opening URLs using a variety of protocols
      2 
      3 The simplest way to use this module is to call the urlopen function,
      4 which accepts a string containing a URL or a Request object (described
      5 below).  It opens the URL and returns the results as file-like
      6 object; the returned object has some extra methods described below.
      7 
      8 The OpenerDirector manages a collection of Handler objects that do
      9 all the actual work.  Each Handler implements a particular protocol or
     10 option.  The OpenerDirector is a composite object that invokes the
     11 Handlers needed to open the requested URL.  For example, the
     12 HTTPHandler performs HTTP GET and POST requests and deals with
     13 non-error returns.  The HTTPRedirectHandler automatically deals with
     14 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
     15 deals with digest authentication.
     16 
     17 urlopen(url, data=None) -- Basic usage is the same as original
     18 urllib.  pass the url and optionally data to post to an HTTP URL, and
     19 get a file-like object back.  One difference is that you can also pass
     20 a Request instance instead of URL.  Raises a URLError (subclass of
     21 OSError); for HTTP errors, raises an HTTPError, which can also be
     22 treated as a valid response.
     23 
     24 build_opener -- Function that creates a new OpenerDirector instance.
     25 Will install the default handlers.  Accepts one or more Handlers as
     26 arguments, either instances or Handler classes that it will
     27 instantiate.  If one of the argument is a subclass of the default
     28 handler, the argument will be installed instead of the default.
     29 
     30 install_opener -- Installs a new opener as the default opener.
     31 
     32 objects of interest:
     33 
     34 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
     35 the Handler classes, while dealing with requests and responses.
     36 
     37 Request -- An object that encapsulates the state of a request.  The
     38 state can be as simple as the URL.  It can also include extra HTTP
     39 headers, e.g. a User-Agent.
     40 
     41 BaseHandler --
     42 
     43 internals:
     44 BaseHandler and parent
     45 _call_chain conventions
     46 
     47 Example usage:
     48 
     49 import urllib.request
     50 
     51 # set up authentication info
     52 authinfo = urllib.request.HTTPBasicAuthHandler()
     53 authinfo.add_password(realm='PDQ Application',
     54                       uri='https://mahler:8092/site-updates.py',
     55                       user='klem',
     56                       passwd='geheim$parole')
     57 
     58 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
     59 
     60 # build a new opener that adds authentication and caching FTP handlers
     61 opener = urllib.request.build_opener(proxy_support, authinfo,
     62                                      urllib.request.CacheFTPHandler)
     63 
     64 # install it
     65 urllib.request.install_opener(opener)
     66 
     67 f = urllib.request.urlopen('http://www.python.org/')
     68 """
     69 
     70 # XXX issues:
     71 # If an authentication error handler that tries to perform
     72 # authentication for some reason but fails, how should the error be
     73 # signalled?  The client needs to know the HTTP error code.  But if
     74 # the handler knows that the problem was, e.g., that it didn't know
     75 # that hash algo that requested in the challenge, it would be good to
     76 # pass that information along to the client, too.
     77 # ftp errors aren't handled cleanly
     78 # check digest against correct (i.e. non-apache) implementation
     79 
     80 # Possible extensions:
     81 # complex proxies  XXX not sure what exactly was meant by this
     82 # abstract factory for opener
     83 
     84 import base64
     85 import bisect
     86 import email
     87 import hashlib
     88 import http.client
     89 import io
     90 import os
     91 import posixpath
     92 import re
     93 import socket
     94 import string
     95 import sys
     96 import time
     97 import tempfile
     98 import contextlib
     99 import warnings
    100 
    101 
    102 from urllib.error import URLError, HTTPError, ContentTooShortError
    103 from urllib.parse import (
    104     urlparse, urlsplit, urljoin, unwrap, quote, unquote,
    105     splittype, splithost, splitport, splituser, splitpasswd,
    106     splitattr, splitquery, splitvalue, splittag, to_bytes,
    107     unquote_to_bytes, urlunparse)
    108 from urllib.response import addinfourl, addclosehook
    109 
    110 # check for SSL
    111 try:
    112     import ssl
    113 except ImportError:
    114     _have_ssl = False
    115 else:
    116     _have_ssl = True
    117 
    118 __all__ = [
    119     # Classes
    120     'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
    121     'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
    122     'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
    123     'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
    124     'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
    125     'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
    126     'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
    127     'UnknownHandler', 'HTTPErrorProcessor',
    128     # Functions
    129     'urlopen', 'install_opener', 'build_opener',
    130     'pathname2url', 'url2pathname', 'getproxies',
    131     # Legacy interface
    132     'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
    133 ]
    134 
    135 # used in User-Agent header sent
    136 __version__ = '%d.%d' % sys.version_info[:2]
    137 
    138 _opener = None
    139 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
    140             *, cafile=None, capath=None, cadefault=False, context=None):
    141     '''Open the URL url, which can be either a string or a Request object.
    142 
    143     *data* must be an object specifying additional data to be sent to
    144     the server, or None if no such data is needed.  See Request for
    145     details.
    146 
    147     urllib.request module uses HTTP/1.1 and includes a "Connection:close"
    148     header in its HTTP requests.
    149 
    150     The optional *timeout* parameter specifies a timeout in seconds for
    151     blocking operations like the connection attempt (if not specified, the
    152     global default timeout setting will be used). This only works for HTTP,
    153     HTTPS and FTP connections.
    154 
    155     If *context* is specified, it must be a ssl.SSLContext instance describing
    156     the various SSL options. See HTTPSConnection for more details.
    157 
    158     The optional *cafile* and *capath* parameters specify a set of trusted CA
    159     certificates for HTTPS requests. cafile should point to a single file
    160     containing a bundle of CA certificates, whereas capath should point to a
    161     directory of hashed certificate files. More information can be found in
    162     ssl.SSLContext.load_verify_locations().
    163 
    164     The *cadefault* parameter is ignored.
    165 
    166     This function always returns an object which can work as a context
    167     manager and has methods such as
    168 
    169     * geturl() - return the URL of the resource retrieved, commonly used to
    170       determine if a redirect was followed
    171 
    172     * info() - return the meta-information of the page, such as headers, in the
    173       form of an email.message_from_string() instance (see Quick Reference to
    174       HTTP Headers)
    175 
    176     * getcode() - return the HTTP status code of the response.  Raises URLError
    177       on errors.
    178 
    179     For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
    180     object slightly modified. In addition to the three new methods above, the
    181     msg attribute contains the same information as the reason attribute ---
    182     the reason phrase returned by the server --- instead of the response
    183     headers as it is specified in the documentation for HTTPResponse.
    184 
    185     For FTP, file, and data URLs and requests explicitly handled by legacy
    186     URLopener and FancyURLopener classes, this function returns a
    187     urllib.response.addinfourl object.
    188 
    189     Note that None may be returned if no handler handles the request (though
    190     the default installed global OpenerDirector uses UnknownHandler to ensure
    191     this never happens).
    192 
    193     In addition, if proxy settings are detected (for example, when a *_proxy
    194     environment variable like http_proxy is set), ProxyHandler is default
    195     installed and makes sure the requests are handled through the proxy.
    196 
    197     '''
    198     global _opener
    199     if cafile or capath or cadefault:
    200         import warnings
    201         warnings.warn("cafile, capath and cadefault are deprecated, use a "
    202                       "custom context instead.", DeprecationWarning, 2)
    203         if context is not None:
    204             raise ValueError(
    205                 "You can't pass both context and any of cafile, capath, and "
    206                 "cadefault"
    207             )
    208         if not _have_ssl:
    209             raise ValueError('SSL support not available')
    210         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
    211                                              cafile=cafile,
    212                                              capath=capath)
    213         https_handler = HTTPSHandler(context=context)
    214         opener = build_opener(https_handler)
    215     elif context:
    216         https_handler = HTTPSHandler(context=context)
    217         opener = build_opener(https_handler)
    218     elif _opener is None:
    219         _opener = opener = build_opener()
    220     else:
    221         opener = _opener
    222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):
    225     global _opener
    226     _opener = opener
    227 
    228 _url_tempfiles = []
    229 def urlretrieve(url, filename=None, reporthook=None, data=None):
    230     """
    231     Retrieve a URL into a temporary location on disk.
    232 
    233     Requires a URL argument. If a filename is passed, it is used as
    234     the temporary file location. The reporthook argument should be
    235     a callable that accepts a block number, a read size, and the
    236     total file size of the URL target. The data argument should be
    237     valid URL encoded data.
    238 
    239     If a filename is passed and the URL points to a local resource,
    240     the result is a copy from local file to new file.
    241 
    242     Returns a tuple containing the path to the newly created
    243     data file as well as the resulting HTTPMessage object.
    244     """
    245     url_type, path = splittype(url)
    246 
    247     with contextlib.closing(urlopen(url, data)) as fp:
    248         headers = fp.info()
    249 
    250         # Just return the local path and the "headers" for file://
    251         # URLs. No sense in performing a copy unless requested.
    252         if url_type == "file" and not filename:
    253             return os.path.normpath(path), headers
    254 
    255         # Handle temporary file setup.
    256         if filename:
    257             tfp = open(filename, 'wb')
    258         else:
    259             tfp = tempfile.NamedTemporaryFile(delete=False)
    260             filename = tfp.name
    261             _url_tempfiles.append(filename)
    262 
    263         with tfp:
    264             result = filename, headers
    265             bs = 1024*8
    266             size = -1
    267             read = 0
    268             blocknum = 0
    269             if "content-length" in headers:
    270                 size = int(headers["Content-Length"])
    271 
    272             if reporthook:
    273                 reporthook(blocknum, bs, size)
    274 
    275             while True:
    276                 block = fp.read(bs)
    277                 if not block:
    278                     break
    279                 read += len(block)
    280                 tfp.write(block)
    281                 blocknum += 1
    282                 if reporthook:
    283                     reporthook(blocknum, bs, size)
    284 
    285     if size >= 0 and read < size:
    286         raise ContentTooShortError(
    287             "retrieval incomplete: got only %i out of %i bytes"
    288             % (read, size), result)
    289 
    290     return result
    291 
    292 def urlcleanup():
    293     """Clean up temporary files from urlretrieve calls."""
    294     for temp_file in _url_tempfiles:
    295         try:
    296             os.unlink(temp_file)
    297         except OSError:
    298             pass
    299 
    300     del _url_tempfiles[:]
    301     global _opener
    302     if _opener:
    303         _opener = None
    304 
    305 # copied from cookielib.py
    306 _cut_port_re = re.compile(r":\d+$", re.ASCII)
    307 def request_host(request):
    308     """Return request-host, as defined by RFC 2965.
    309 
    310     Variation from RFC: returned value is lowercased, for convenient
    311     comparison.
    312 
    313     """
    314     url = request.full_url
    315     host = urlparse(url)[1]
    316     if host == "":
    317         host = request.get_header("Host", "")
    318 
    319     # remove port, if present
    320     host = _cut_port_re.sub("", host, 1)
    321     return host.lower()
    322 
    323 class Request:
    324 
    325     def __init__(self, url, data=None, headers={},
    326                  origin_req_host=None, unverifiable=False,
    327                  method=None):
    328         self.full_url = url
    329         self.headers = {}
    330         self.unredirected_hdrs = {}
    331         self._data = None
    332         self.data = data
    333         self._tunnel_host = None
    334         for key, value in headers.items():
    335             self.add_header(key, value)
    336         if origin_req_host is None:
    337             origin_req_host = request_host(self)
    338         self.origin_req_host = origin_req_host
    339         self.unverifiable = unverifiable
    340         if method:
    341             self.method = method
    342 
    343     @property
    344     def full_url(self):
    345         if self.fragment:
    346             return '{}#{}'.format(self._full_url, self.fragment)
    347         return self._full_url
    348 
    349     @full_url.setter
    350     def full_url(self, url):
    351         # unwrap('<URL:type://host/path>') --> 'type://host/path'
    352         self._full_url = unwrap(url)
    353         self._full_url, self.fragment = splittag(self._full_url)
    354         self._parse()
    355 
    356     @full_url.deleter
    357     def full_url(self):
    358         self._full_url = None
    359         self.fragment = None
    360         self.selector = ''
    361 
    362     @property
    363     def data(self):
    364         return self._data
    365 
    366     @data.setter
    367     def data(self, data):
    368         if data != self._data:
    369             self._data = data
    370             # issue 16464
    371             # if we change data we need to remove content-length header
    372             # (cause it's most probably calculated for previous value)
    373             if self.has_header("Content-length"):
    374                 self.remove_header("Content-length")
    375 
    376     @data.deleter
    377     def data(self):
    378         self.data = None
    379 
    380     def _parse(self):
    381         self.type, rest = splittype(self._full_url)
    382         if self.type is None:
    383             raise ValueError("unknown url type: %r" % self.full_url)
    384         self.host, self.selector = splithost(rest)
    385         if self.host:
    386             self.host = unquote(self.host)
    387 
    388     def get_method(self):
    389         """Return a string indicating the HTTP request method."""
    390         default_method = "POST" if self.data is not None else "GET"
    391         return getattr(self, 'method', default_method)
    392 
    393     def get_full_url(self):
    394         return self.full_url
    395 
    396     def set_proxy(self, host, type):
    397         if self.type == 'https' and not self._tunnel_host:
    398             self._tunnel_host = self.host
    399         else:
    400             self.type= type
    401             self.selector = self.full_url
    402         self.host = host
    403 
    404     def has_proxy(self):
    405         return self.selector == self.full_url
    406 
    407     def add_header(self, key, val):
    408         # useful for something like authentication
    409         self.headers[key.capitalize()] = val
    410 
    411     def add_unredirected_header(self, key, val):
    412         # will not be added to a redirected request
    413         self.unredirected_hdrs[key.capitalize()] = val
    414 
    415     def has_header(self, header_name):
    416         return (header_name in self.headers or
    417                 header_name in self.unredirected_hdrs)
    418 
    419     def get_header(self, header_name, default=None):
    420         return self.headers.get(
    421             header_name,
    422             self.unredirected_hdrs.get(header_name, default))
    423 
    424     def remove_header(self, header_name):
    425         self.headers.pop(header_name, None)
    426         self.unredirected_hdrs.pop(header_name, None)
    427 
    428     def header_items(self):
    429         hdrs = self.unredirected_hdrs.copy()
    430         hdrs.update(self.headers)
    431         return list(hdrs.items())
    432 
    433 class OpenerDirector:
    434     def __init__(self):
    435         client_version = "Python-urllib/%s" % __version__
    436         self.addheaders = [('User-agent', client_version)]
    437         # self.handlers is retained only for backward compatibility
    438         self.handlers = []
    439         # manage the individual handlers
    440         self.handle_open = {}
    441         self.handle_error = {}
    442         self.process_response = {}
    443         self.process_request = {}
    444 
    445     def add_handler(self, handler):
    446         if not hasattr(handler, "add_parent"):
    447             raise TypeError("expected BaseHandler instance, got %r" %
    448                             type(handler))
    449 
    450         added = False
    451         for meth in dir(handler):
    452             if meth in ["redirect_request", "do_open", "proxy_open"]:
    453                 # oops, coincidental match
    454                 continue
    455 
    456             i = meth.find("_")
    457             protocol = meth[:i]
    458             condition = meth[i+1:]
    459 
    460             if condition.startswith("error"):
    461                 j = condition.find("_") + i + 1
    462                 kind = meth[j+1:]
    463                 try:
    464                     kind = int(kind)
    465                 except ValueError:
    466                     pass
    467                 lookup = self.handle_error.get(protocol, {})
    468                 self.handle_error[protocol] = lookup
    469             elif condition == "open":
    470                 kind = protocol
    471                 lookup = self.handle_open
    472             elif condition == "response":
    473                 kind = protocol
    474                 lookup = self.process_response
    475             elif condition == "request":
    476                 kind = protocol
    477                 lookup = self.process_request
    478             else:
    479                 continue
    480 
    481             handlers = lookup.setdefault(kind, [])
    482             if handlers:
    483                 bisect.insort(handlers, handler)
    484             else:
    485                 handlers.append(handler)
    486             added = True
    487 
    488         if added:
    489             bisect.insort(self.handlers, handler)
    490             handler.add_parent(self)
    491 
    492     def close(self):
    493         # Only exists for backwards compatibility.
    494         pass
    495 
    496     def _call_chain(self, chain, kind, meth_name, *args):
    497         # Handlers raise an exception if no one else should try to handle
    498         # the request, or return None if they can't but another handler
    499         # could.  Otherwise, they return the response.
    500         handlers = chain.get(kind, ())
    501         for handler in handlers:
    502             func = getattr(handler, meth_name)
    503             result = func(*args)
    504             if result is not None:
    505                 return result
    506 
    507     def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
    508         # accept a URL or a Request object
    509         if isinstance(fullurl, str):
    510             req = Request(fullurl, data)
    511         else:
    512             req = fullurl
    513             if data is not None:
    514                 req.data = data
    515 
    516         req.timeout = timeout
    517         protocol = req.type
    518 
    519         # pre-process request
    520         meth_name = protocol+"_request"
    521         for processor in self.process_request.get(protocol, []):
    522             meth = getattr(processor, meth_name)
    523             req = meth(req)
    524 
    525         response = self._open(req, data)
    526 
    527         # post-process response
    528         meth_name = protocol+"_response"
    529         for processor in self.process_response.get(protocol, []):
    530             meth = getattr(processor, meth_name)
    531             response = meth(req, response)
    532 
    533         return response
    534 
    535     def _open(self, req, data=None):
    536         result = self._call_chain(self.handle_open, 'default',
    537                                   'default_open', req)
    538         if result:
    539             return result
    540 
    541         protocol = req.type
    542         result = self._call_chain(self.handle_open, protocol, protocol +
    543                                   '_open', req)
    544         if result:
    545             return result
    546 
    547         return self._call_chain(self.handle_open, 'unknown',
    548                                 'unknown_open', req)
    549 
    550     def error(self, proto, *args):
    551         if proto in ('http', 'https'):
    552             # XXX http[s] protocols are special-cased
    553             dict = self.handle_error['http'] # https is not different than http
    554             proto = args[2]  # YUCK!
    555             meth_name = 'http_error_%s' % proto
    556             http_err = 1
    557             orig_args = args
    558         else:
    559             dict = self.handle_error
    560             meth_name = proto + '_error'
    561             http_err = 0
    562         args = (dict, proto, meth_name) + args
    563         result = self._call_chain(*args)
    564         if result:
    565             return result
    566 
    567         if http_err:
    568             args = (dict, 'default', 'http_error_default') + orig_args
    569             return self._call_chain(*args)
    570 
    571 # XXX probably also want an abstract factory that knows when it makes
    572 # sense to skip a superclass in favor of a subclass and when it might
    573 # make sense to include both
    574 
    575 def build_opener(*handlers):
    576     """Create an opener object from a list of handlers.
    577 
    578     The opener will use several default handlers, including support
    579     for HTTP, FTP and when applicable HTTPS.
    580 
    581     If any of the handlers passed as arguments are subclasses of the
    582     default handlers, the default handlers will not be used.
    583     """
    584     opener = OpenerDirector()
    585     default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
    586                        HTTPDefaultErrorHandler, HTTPRedirectHandler,
    587                        FTPHandler, FileHandler, HTTPErrorProcessor,
    588                        DataHandler]
    589     if hasattr(http.client, "HTTPSConnection"):
    590         default_classes.append(HTTPSHandler)
    591     skip = set()
    592     for klass in default_classes:
    593         for check in handlers:
    594             if isinstance(check, type):
    595                 if issubclass(check, klass):
    596                     skip.add(klass)
    597             elif isinstance(check, klass):
    598                 skip.add(klass)
    599     for klass in skip:
    600         default_classes.remove(klass)
    601 
    602     for klass in default_classes:
    603         opener.add_handler(klass())
    604 
    605     for h in handlers:
    606         if isinstance(h, type):
    607             h = h()
    608         opener.add_handler(h)
    609     return opener
    610 
    611 class BaseHandler:
    612     handler_order = 500
    613 
    614     def add_parent(self, parent):
    615         self.parent = parent
    616 
    617     def close(self):
    618         # Only exists for backwards compatibility
    619         pass
    620 
    621     def __lt__(self, other):
    622         if not hasattr(other, "handler_order"):
    623             # Try to preserve the old behavior of having custom classes
    624             # inserted after default ones (works only for custom user
    625             # classes which are not aware of handler_order).
    626             return True
    627         return self.handler_order < other.handler_order
    628 
    629 
    630 class HTTPErrorProcessor(BaseHandler):
    631     """Process HTTP error responses."""
    632     handler_order = 1000  # after all other processing
    633 
    634     def http_response(self, request, response):
    635         code, msg, hdrs = response.code, response.msg, response.info()
    636 
    637         # According to RFC 2616, "2xx" code indicates that the client's
    638         # request was successfully received, understood, and accepted.
    639         if not (200 <= code < 300):
    640             response = self.parent.error(
    641                 'http', request, response, code, msg, hdrs)
    642 
    643         return response
    644 
    645     https_response = http_response
    646 
    647 class HTTPDefaultErrorHandler(BaseHandler):
    648     def http_error_default(self, req, fp, code, msg, hdrs):
    649         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    650 
    651 class HTTPRedirectHandler(BaseHandler):
    652     # maximum number of redirections to any single URL
    653     # this is needed because of the state that cookies introduce
    654     max_repeats = 4
    655     # maximum total number of redirections (regardless of URL) before
    656     # assuming we're in a loop
    657     max_redirections = 10
    658 
    659     def redirect_request(self, req, fp, code, msg, headers, newurl):
    660         """Return a Request or None in response to a redirect.
    661 
    662         This is called by the http_error_30x methods when a
    663         redirection response is received.  If a redirection should
    664         take place, return a new Request to allow http_error_30x to
    665         perform the redirect.  Otherwise, raise HTTPError if no-one
    666         else should try to handle this url.  Return None if you can't
    667         but another Handler might.
    668         """
    669         m = req.get_method()
    670         if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
    671             or code in (301, 302, 303) and m == "POST")):
    672             raise HTTPError(req.full_url, code, msg, headers, fp)
    673 
    674         # Strictly (according to RFC 2616), 301 or 302 in response to
    675         # a POST MUST NOT cause a redirection without confirmation
    676         # from the user (of urllib.request, in this case).  In practice,
    677         # essentially all clients do redirect in this case, so we do
    678         # the same.
    679 
    680         # Be conciliant with URIs containing a space.  This is mainly
    681         # redundant with the more complete encoding done in http_error_302(),
    682         # but it is kept for compatibility with other callers.
    683         newurl = newurl.replace(' ', '%20')
    684 
    685         CONTENT_HEADERS = ("content-length", "content-type")
    686         newheaders = {k: v for k, v in req.headers.items()
    687                       if k.lower() not in CONTENT_HEADERS}
    688         return Request(newurl,
    689                        headers=newheaders,
    690                        origin_req_host=req.origin_req_host,
    691                        unverifiable=True)
    692 
    693     # Implementation note: To avoid the server sending us into an
    694     # infinite loop, the request object needs to track what URLs we
    695     # have already seen.  Do this by adding a handler-specific
    696     # attribute to the Request object.
    697     def http_error_302(self, req, fp, code, msg, headers):
    698         # Some servers (incorrectly) return multiple Location headers
    699         # (so probably same goes for URI).  Use first header.
    700         if "location" in headers:
    701             newurl = headers["location"]
    702         elif "uri" in headers:
    703             newurl = headers["uri"]
    704         else:
    705             return
    706 
    707         # fix a possible malformed URL
    708         urlparts = urlparse(newurl)
    709 
    710         # For security reasons we don't allow redirection to anything other
    711         # than http, https or ftp.
    712 
    713         if urlparts.scheme not in ('http', 'https', 'ftp', ''):
    714             raise HTTPError(
    715                 newurl, code,
    716                 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
    717                 headers, fp)
    718 
    719         if not urlparts.path and urlparts.netloc:
    720             urlparts = list(urlparts)
    721             urlparts[2] = "/"
    722         newurl = urlunparse(urlparts)
    723 
    724         # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
    725         # original bytes and percent-encode non-ASCII bytes, and any special
    726         # characters such as the space.
    727         newurl = quote(
    728             newurl, encoding="iso-8859-1", safe=string.punctuation)
    729         newurl = urljoin(req.full_url, newurl)
    730 
    731         # XXX Probably want to forget about the state of the current
    732         # request, although that might interact poorly with other
    733         # handlers that also use handler-specific request attributes
    734         new = self.redirect_request(req, fp, code, msg, headers, newurl)
    735         if new is None:
    736             return
    737 
    738         # loop detection
    739         # .redirect_dict has a key url if url was previously visited.
    740         if hasattr(req, 'redirect_dict'):
    741             visited = new.redirect_dict = req.redirect_dict
    742             if (visited.get(newurl, 0) >= self.max_repeats or
    743                 len(visited) >= self.max_redirections):
    744                 raise HTTPError(req.full_url, code,
    745                                 self.inf_msg + msg, headers, fp)
    746         else:
    747             visited = new.redirect_dict = req.redirect_dict = {}
    748         visited[newurl] = visited.get(newurl, 0) + 1
    749 
    750         # Don't close the fp until we are sure that we won't use it
    751         # with HTTPError.
    752         fp.read()
    753         fp.close()
    754 
    755         return self.parent.open(new, timeout=req.timeout)
    756 
    757     http_error_301 = http_error_303 = http_error_307 = http_error_302
    758 
    759     inf_msg = "The HTTP server returned a redirect error that would " \
    760               "lead to an infinite loop.\n" \
    761               "The last 30x error message was:\n"
    762 
    763 
    764 def _parse_proxy(proxy):
    765     """Return (scheme, user, password, host/port) given a URL or an authority.
    766 
    767     If a URL is supplied, it must have an authority (host:port) component.
    768     According to RFC 3986, having an authority component means the URL must
    769     have two slashes after the scheme.
    770     """
    771     scheme, r_scheme = splittype(proxy)
    772     if not r_scheme.startswith("/"):
    773         # authority
    774         scheme = None
    775         authority = proxy
    776     else:
    777         # URL
    778         if not r_scheme.startswith("//"):
    779             raise ValueError("proxy URL with no authority: %r" % proxy)
    780         # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
    781         # and 3.3.), path is empty or starts with '/'
    782         end = r_scheme.find("/", 2)
    783         if end == -1:
    784             end = None
    785         authority = r_scheme[2:end]
    786     userinfo, hostport = splituser(authority)
    787     if userinfo is not None:
    788         user, password = splitpasswd(userinfo)
    789     else:
    790         user = password = None
    791     return scheme, user, password, hostport
    792 
    793 class ProxyHandler(BaseHandler):
    794     # Proxies must be in front
    795     handler_order = 100
    796 
    797     def __init__(self, proxies=None):
    798         if proxies is None:
    799             proxies = getproxies()
    800         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
    801         self.proxies = proxies
    802         for type, url in proxies.items():
    803             setattr(self, '%s_open' % type,
    804                     lambda r, proxy=url, type=type, meth=self.proxy_open:
    805                         meth(r, proxy, type))
    806 
    807     def proxy_open(self, req, proxy, type):
    808         orig_type = req.type
    809         proxy_type, user, password, hostport = _parse_proxy(proxy)
    810         if proxy_type is None:
    811             proxy_type = orig_type
    812 
    813         if req.host and proxy_bypass(req.host):
    814             return None
    815 
    816         if user and password:
    817             user_pass = '%s:%s' % (unquote(user),
    818                                    unquote(password))
    819             creds = base64.b64encode(user_pass.encode()).decode("ascii")
    820             req.add_header('Proxy-authorization', 'Basic ' + creds)
    821         hostport = unquote(hostport)
    822         req.set_proxy(hostport, proxy_type)
    823         if orig_type == proxy_type or orig_type == 'https':
    824             # let other handlers take care of it
    825             return None
    826         else:
    827             # need to start over, because the other handlers don't
    828             # grok the proxy's URL type
    829             # e.g. if we have a constructor arg proxies like so:
    830             # {'http': 'ftp://proxy.example.com'}, we may end up turning
    831             # a request for http://acme.example.com/a into one for
    832             # ftp://proxy.example.com/a
    833             return self.parent.open(req, timeout=req.timeout)
    834 
    835 class HTTPPasswordMgr:
    836 
    837     def __init__(self):
    838         self.passwd = {}
    839 
    840     def add_password(self, realm, uri, user, passwd):
    841         # uri could be a single URI or a sequence
    842         if isinstance(uri, str):
    843             uri = [uri]
    844         if realm not in self.passwd:
    845             self.passwd[realm] = {}
    846         for default_port in True, False:
    847             reduced_uri = tuple(
    848                 self.reduce_uri(u, default_port) for u in uri)
    849             self.passwd[realm][reduced_uri] = (user, passwd)
    850 
    851     def find_user_password(self, realm, authuri):
    852         domains = self.passwd.get(realm, {})
    853         for default_port in True, False:
    854             reduced_authuri = self.reduce_uri(authuri, default_port)
    855             for uris, authinfo in domains.items():
    856                 for uri in uris:
    857                     if self.is_suburi(uri, reduced_authuri):
    858                         return authinfo
    859         return None, None
    860 
    861     def reduce_uri(self, uri, default_port=True):
    862         """Accept authority or URI and extract only the authority and path."""
    863         # note HTTP URLs do not have a userinfo component
    864         parts = urlsplit(uri)
    865         if parts[1]:
    866             # URI
    867             scheme = parts[0]
    868             authority = parts[1]
    869             path = parts[2] or '/'
    870         else:
    871             # host or host:port
    872             scheme = None
    873             authority = uri
    874             path = '/'
    875         host, port = splitport(authority)
    876         if default_port and port is None and scheme is not None:
    877             dport = {"http": 80,
    878                      "https": 443,
    879                      }.get(scheme)
    880             if dport is not None:
    881                 authority = "%s:%d" % (host, dport)
    882         return authority, path
    883 
    884     def is_suburi(self, base, test):
    885         """Check if test is below base in a URI tree
    886 
    887         Both args must be URIs in reduced form.
    888         """
    889         if base == test:
    890             return True
    891         if base[0] != test[0]:
    892             return False
    893         common = posixpath.commonprefix((base[1], test[1]))
    894         if len(common) == len(base[1]):
    895             return True
    896         return False
    897 
    898 
    899 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
    900 
    901     def find_user_password(self, realm, authuri):
    902         user, password = HTTPPasswordMgr.find_user_password(self, realm,
    903                                                             authuri)
    904         if user is not None:
    905             return user, password
    906         return HTTPPasswordMgr.find_user_password(self, None, authuri)
    907 
    908 
    909 class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
    910 
    911     def __init__(self, *args, **kwargs):
    912         self.authenticated = {}
    913         super().__init__(*args, **kwargs)
    914 
    915     def add_password(self, realm, uri, user, passwd, is_authenticated=False):
    916         self.update_authenticated(uri, is_authenticated)
    917         # Add a default for prior auth requests
    918         if realm is not None:
    919             super().add_password(None, uri, user, passwd)
    920         super().add_password(realm, uri, user, passwd)
    921 
    922     def update_authenticated(self, uri, is_authenticated=False):
    923         # uri could be a single URI or a sequence
    924         if isinstance(uri, str):
    925             uri = [uri]
    926 
    927         for default_port in True, False:
    928             for u in uri:
    929                 reduced_uri = self.reduce_uri(u, default_port)
    930                 self.authenticated[reduced_uri] = is_authenticated
    931 
    932     def is_authenticated(self, authuri):
    933         for default_port in True, False:
    934             reduced_authuri = self.reduce_uri(authuri, default_port)
    935             for uri in self.authenticated:
    936                 if self.is_suburi(uri, reduced_authuri):
    937                     return self.authenticated[uri]
    938 
    939 
    940 class AbstractBasicAuthHandler:
    941 
    942     # XXX this allows for multiple auth-schemes, but will stupidly pick
    943     # the last one with a realm specified.
    944 
    945     # allow for double- and single-quoted realm values
    946     # (single quotes are a violation of the RFC, but appear in the wild)
    947     rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
    948                     'realm=(["\']?)([^"\']*)\\2', re.I)
    949 
    950     # XXX could pre-emptively send auth info already accepted (RFC 2617,
    951     # end of section 2, and section 1.2 immediately after "credentials"
    952     # production).
    953 
    954     def __init__(self, password_mgr=None):
    955         if password_mgr is None:
    956             password_mgr = HTTPPasswordMgr()
    957         self.passwd = password_mgr
    958         self.add_password = self.passwd.add_password
    959 
    960     def http_error_auth_reqed(self, authreq, host, req, headers):
    961         # host may be an authority (without userinfo) or a URL with an
    962         # authority
    963         # XXX could be multiple headers
    964         authreq = headers.get(authreq, None)
    965 
    966         if authreq:
    967             scheme = authreq.split()[0]
    968             if scheme.lower() != 'basic':
    969                 raise ValueError("AbstractBasicAuthHandler does not"
    970                                  " support the following scheme: '%s'" %
    971                                  scheme)
    972             else:
    973                 mo = AbstractBasicAuthHandler.rx.search(authreq)
    974                 if mo:
    975                     scheme, quote, realm = mo.groups()
    976                     if quote not in ['"',"'"]:
    977                         warnings.warn("Basic Auth Realm was unquoted",
    978                                       UserWarning, 2)
    979                     if scheme.lower() == 'basic':
    980                         return self.retry_http_basic_auth(host, req, realm)
    981 
    982     def retry_http_basic_auth(self, host, req, realm):
    983         user, pw = self.passwd.find_user_password(realm, host)
    984         if pw is not None:
    985             raw = "%s:%s" % (user, pw)
    986             auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
    987             if req.get_header(self.auth_header, None) == auth:
    988                 return None
    989             req.add_unredirected_header(self.auth_header, auth)
    990             return self.parent.open(req, timeout=req.timeout)
    991         else:
    992             return None
    993 
    994     def http_request(self, req):
    995         if (not hasattr(self.passwd, 'is_authenticated') or
    996            not self.passwd.is_authenticated(req.full_url)):
    997             return req
    998 
    999         if not req.has_header('Authorization'):
   1000             user, passwd = self.passwd.find_user_password(None, req.full_url)
   1001             credentials = '{0}:{1}'.format(user, passwd).encode()
   1002             auth_str = base64.standard_b64encode(credentials).decode()
   1003             req.add_unredirected_header('Authorization',
   1004                                         'Basic {}'.format(auth_str.strip()))
   1005         return req
   1006 
   1007     def http_response(self, req, response):
   1008         if hasattr(self.passwd, 'is_authenticated'):
   1009             if 200 <= response.code < 300:
   1010                 self.passwd.update_authenticated(req.full_url, True)
   1011             else:
   1012                 self.passwd.update_authenticated(req.full_url, False)
   1013         return response
   1014 
   1015     https_request = http_request
   1016     https_response = http_response
   1017 
   1018 
   1019 
   1020 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
   1021 
   1022     auth_header = 'Authorization'
   1023 
   1024     def http_error_401(self, req, fp, code, msg, headers):
   1025         url = req.full_url
   1026         response = self.http_error_auth_reqed('www-authenticate',
   1027                                           url, req, headers)
   1028         return response
   1029 
   1030 
   1031 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
   1032 
   1033     auth_header = 'Proxy-authorization'
   1034 
   1035     def http_error_407(self, req, fp, code, msg, headers):
   1036         # http_error_auth_reqed requires that there is no userinfo component in
   1037         # authority.  Assume there isn't one, since urllib.request does not (and
   1038         # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
   1039         # userinfo.
   1040         authority = req.host
   1041         response = self.http_error_auth_reqed('proxy-authenticate',
   1042                                           authority, req, headers)
   1043         return response
   1044 
   1045 
   1046 # Return n random bytes.
   1047 _randombytes = os.urandom
   1048 
   1049 
   1050 class AbstractDigestAuthHandler:
   1051     # Digest authentication is specified in RFC 2617.
   1052 
   1053     # XXX The client does not inspect the Authentication-Info header
   1054     # in a successful response.
   1055 
   1056     # XXX It should be possible to test this implementation against
   1057     # a mock server that just generates a static set of challenges.
   1058 
   1059     # XXX qop="auth-int" supports is shaky
   1060 
   1061     def __init__(self, passwd=None):
   1062         if passwd is None:
   1063             passwd = HTTPPasswordMgr()
   1064         self.passwd = passwd
   1065         self.add_password = self.passwd.add_password
   1066         self.retried = 0
   1067         self.nonce_count = 0
   1068         self.last_nonce = None
   1069 
   1070     def reset_retry_count(self):
   1071         self.retried = 0
   1072 
   1073     def http_error_auth_reqed(self, auth_header, host, req, headers):
   1074         authreq = headers.get(auth_header, None)
   1075         if self.retried > 5:
   1076             # Don't fail endlessly - if we failed once, we'll probably
   1077             # fail a second time. Hm. Unless the Password Manager is
   1078             # prompting for the information. Crap. This isn't great
   1079             # but it's better than the current 'repeat until recursion
   1080             # depth exceeded' approach <wink>
   1081             raise HTTPError(req.full_url, 401, "digest auth failed",
   1082                             headers, None)
   1083         else:
   1084             self.retried += 1
   1085         if authreq:
   1086             scheme = authreq.split()[0]
   1087             if scheme.lower() == 'digest':
   1088                 return self.retry_http_digest_auth(req, authreq)
   1089             elif scheme.lower() != 'basic':
   1090                 raise ValueError("AbstractDigestAuthHandler does not support"
   1091                                  " the following scheme: '%s'" % scheme)
   1092 
   1093     def retry_http_digest_auth(self, req, auth):
   1094         token, challenge = auth.split(' ', 1)
   1095         chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
   1096         auth = self.get_authorization(req, chal)
   1097         if auth:
   1098             auth_val = 'Digest %s' % auth
   1099             if req.headers.get(self.auth_header, None) == auth_val:
   1100                 return None
   1101             req.add_unredirected_header(self.auth_header, auth_val)
   1102             resp = self.parent.open(req, timeout=req.timeout)
   1103             return resp
   1104 
   1105     def get_cnonce(self, nonce):
   1106         # The cnonce-value is an opaque
   1107         # quoted string value provided by the client and used by both client
   1108         # and server to avoid chosen plaintext attacks, to provide mutual
   1109         # authentication, and to provide some message integrity protection.
   1110         # This isn't a fabulous effort, but it's probably Good Enough.
   1111         s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
   1112         b = s.encode("ascii") + _randombytes(8)
   1113         dig = hashlib.sha1(b).hexdigest()
   1114         return dig[:16]
   1115 
   1116     def get_authorization(self, req, chal):
   1117         try:
   1118             realm = chal['realm']
   1119             nonce = chal['nonce']
   1120             qop = chal.get('qop')
   1121             algorithm = chal.get('algorithm', 'MD5')
   1122             # mod_digest doesn't send an opaque, even though it isn't
   1123             # supposed to be optional
   1124             opaque = chal.get('opaque', None)
   1125         except KeyError:
   1126             return None
   1127 
   1128         H, KD = self.get_algorithm_impls(algorithm)
   1129         if H is None:
   1130             return None
   1131 
   1132         user, pw = self.passwd.find_user_password(realm, req.full_url)
   1133         if user is None:
   1134             return None
   1135 
   1136         # XXX not implemented yet
   1137         if req.data is not None:
   1138             entdig = self.get_entity_digest(req.data, chal)
   1139         else:
   1140             entdig = None
   1141 
   1142         A1 = "%s:%s:%s" % (user, realm, pw)
   1143         A2 = "%s:%s" % (req.get_method(),
   1144                         # XXX selector: what about proxies and full urls
   1145                         req.selector)
   1146         if qop == 'auth':
   1147             if nonce == self.last_nonce:
   1148                 self.nonce_count += 1
   1149             else:
   1150                 self.nonce_count = 1
   1151                 self.last_nonce = nonce
   1152             ncvalue = '%08x' % self.nonce_count
   1153             cnonce = self.get_cnonce(nonce)
   1154             noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
   1155             respdig = KD(H(A1), noncebit)
   1156         elif qop is None:
   1157             respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
   1158         else:
   1159             # XXX handle auth-int.
   1160             raise URLError("qop '%s' is not supported." % qop)
   1161 
   1162         # XXX should the partial digests be encoded too?
   1163 
   1164         base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
   1165                'response="%s"' % (user, realm, nonce, req.selector,
   1166                                   respdig)
   1167         if opaque:
   1168             base += ', opaque="%s"' % opaque
   1169         if entdig:
   1170             base += ', digest="%s"' % entdig
   1171         base += ', algorithm="%s"' % algorithm
   1172         if qop:
   1173             base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
   1174         return base
   1175 
   1176     def get_algorithm_impls(self, algorithm):
   1177         # lambdas assume digest modules are imported at the top level
   1178         if algorithm == 'MD5':
   1179             H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
   1180         elif algorithm == 'SHA':
   1181             H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
   1182         # XXX MD5-sess
   1183         else:
   1184             raise ValueError("Unsupported digest authentication "
   1185                              "algorithm %r" % algorithm)
   1186         KD = lambda s, d: H("%s:%s" % (s, d))
   1187         return H, KD
   1188 
   1189     def get_entity_digest(self, data, chal):
   1190         # XXX not implemented yet
   1191         return None
   1192 
   1193 
   1194 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
   1195     """An authentication protocol defined by RFC 2069
   1196 
   1197     Digest authentication improves on basic authentication because it
   1198     does not transmit passwords in the clear.
   1199     """
   1200 
   1201     auth_header = 'Authorization'
   1202     handler_order = 490  # before Basic auth
   1203 
   1204     def http_error_401(self, req, fp, code, msg, headers):
   1205         host = urlparse(req.full_url)[1]
   1206         retry = self.http_error_auth_reqed('www-authenticate',
   1207                                            host, req, headers)
   1208         self.reset_retry_count()
   1209         return retry
   1210 
   1211 
   1212 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
   1213 
   1214     auth_header = 'Proxy-Authorization'
   1215     handler_order = 490  # before Basic auth
   1216 
   1217     def http_error_407(self, req, fp, code, msg, headers):
   1218         host = req.host
   1219         retry = self.http_error_auth_reqed('proxy-authenticate',
   1220                                            host, req, headers)
   1221         self.reset_retry_count()
   1222         return retry
   1223 
   1224 class AbstractHTTPHandler(BaseHandler):
   1225 
   1226     def __init__(self, debuglevel=0):
   1227         self._debuglevel = debuglevel
   1228 
   1229     def set_http_debuglevel(self, level):
   1230         self._debuglevel = level
   1231 
   1232     def _get_content_length(self, request):
   1233         return http.client.HTTPConnection._get_content_length(
   1234             request.data,
   1235             request.get_method())
   1236 
   1237     def do_request_(self, request):
   1238         host = request.host
   1239         if not host:
   1240             raise URLError('no host given')
   1241 
   1242         if request.data is not None:  # POST
   1243             data = request.data
   1244             if isinstance(data, str):
   1245                 msg = "POST data should be bytes, an iterable of bytes, " \
   1246                       "or a file object. It cannot be of type str."
   1247                 raise TypeError(msg)
   1248             if not request.has_header('Content-type'):
   1249                 request.add_unredirected_header(
   1250                     'Content-type',
   1251                     'application/x-www-form-urlencoded')
   1252             if (not request.has_header('Content-length')
   1253                     and not request.has_header('Transfer-encoding')):
   1254                 content_length = self._get_content_length(request)
   1255                 if content_length is not None:
   1256                     request.add_unredirected_header(
   1257                             'Content-length', str(content_length))
   1258                 else:
   1259                     request.add_unredirected_header(
   1260                             'Transfer-encoding', 'chunked')
   1261 
   1262         sel_host = host
   1263         if request.has_proxy():
   1264             scheme, sel = splittype(request.selector)
   1265             sel_host, sel_path = splithost(sel)
   1266         if not request.has_header('Host'):
   1267             request.add_unredirected_header('Host', sel_host)
   1268         for name, value in self.parent.addheaders:
   1269             name = name.capitalize()
   1270             if not request.has_header(name):
   1271                 request.add_unredirected_header(name, value)
   1272 
   1273         return request
   1274 
   1275     def do_open(self, http_class, req, **http_conn_args):
   1276         """Return an HTTPResponse object for the request, using http_class.
   1277 
   1278         http_class must implement the HTTPConnection API from http.client.
   1279         """
   1280         host = req.host
   1281         if not host:
   1282             raise URLError('no host given')
   1283 
   1284         # will parse host:port
   1285         h = http_class(host, timeout=req.timeout, **http_conn_args)
   1286         h.set_debuglevel(self._debuglevel)
   1287 
   1288         headers = dict(req.unredirected_hdrs)
   1289         headers.update({k: v for k, v in req.headers.items()
   1290                         if k not in headers})
   1291 
   1292         # TODO(jhylton): Should this be redesigned to handle
   1293         # persistent connections?
   1294 
   1295         # We want to make an HTTP/1.1 request, but the addinfourl
   1296         # class isn't prepared to deal with a persistent connection.
   1297         # It will try to read all remaining data from the socket,
   1298         # which will block while the server waits for the next request.
   1299         # So make sure the connection gets closed after the (only)
   1300         # request.
   1301         headers["Connection"] = "close"
   1302         headers = {name.title(): val for name, val in headers.items()}
   1303 
   1304         if req._tunnel_host:
   1305             tunnel_headers = {}
   1306             proxy_auth_hdr = "Proxy-Authorization"
   1307             if proxy_auth_hdr in headers:
   1308                 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
   1309                 # Proxy-Authorization should not be sent to origin
   1310                 # server.
   1311                 del headers[proxy_auth_hdr]
   1312             h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
   1313 
   1314         try:
   1315             try:
   1316                 h.request(req.get_method(), req.selector, req.data, headers,
   1317                           encode_chunked=req.has_header('Transfer-encoding'))
   1318             except OSError as err: # timeout error
   1319                 raise URLError(err)
   1320             r = h.getresponse()
   1321         except:
   1322             h.close()
   1323             raise
   1324 
   1325         # If the server does not send us a 'Connection: close' header,
   1326         # HTTPConnection assumes the socket should be left open. Manually
   1327         # mark the socket to be closed when this response object goes away.
   1328         if h.sock:
   1329             h.sock.close()
   1330             h.sock = None
   1331 
   1332         r.url = req.get_full_url()
   1333         # This line replaces the .msg attribute of the HTTPResponse
   1334         # with .headers, because urllib clients expect the response to
   1335         # have the reason in .msg.  It would be good to mark this
   1336         # attribute is deprecated and get then to use info() or
   1337         # .headers.
   1338         r.msg = r.reason
   1339         return r
   1340 
   1341 
   1342 class HTTPHandler(AbstractHTTPHandler):
   1343 
   1344     def http_open(self, req):
   1345         return self.do_open(http.client.HTTPConnection, req)
   1346 
   1347     http_request = AbstractHTTPHandler.do_request_
   1348 
   1349 if hasattr(http.client, 'HTTPSConnection'):
   1350 
   1351     class HTTPSHandler(AbstractHTTPHandler):
   1352 
   1353         def __init__(self, debuglevel=0, context=None, check_hostname=None):
   1354             AbstractHTTPHandler.__init__(self, debuglevel)
   1355             self._context = context
   1356             self._check_hostname = check_hostname
   1357 
   1358         def https_open(self, req):
   1359             return self.do_open(http.client.HTTPSConnection, req,
   1360                 context=self._context, check_hostname=self._check_hostname)
   1361 
   1362         https_request = AbstractHTTPHandler.do_request_
   1363 
   1364     __all__.append('HTTPSHandler')
   1365 
   1366 class HTTPCookieProcessor(BaseHandler):
   1367     def __init__(self, cookiejar=None):
   1368         import http.cookiejar
   1369         if cookiejar is None:
   1370             cookiejar = http.cookiejar.CookieJar()
   1371         self.cookiejar = cookiejar
   1372 
   1373     def http_request(self, request):
   1374         self.cookiejar.add_cookie_header(request)
   1375         return request
   1376 
   1377     def http_response(self, request, response):
   1378         self.cookiejar.extract_cookies(response, request)
   1379         return response
   1380 
   1381     https_request = http_request
   1382     https_response = http_response
   1383 
   1384 class UnknownHandler(BaseHandler):
   1385     def unknown_open(self, req):
   1386         type = req.type
   1387         raise URLError('unknown url type: %s' % type)
   1388 
   1389 def parse_keqv_list(l):
   1390     """Parse list of key=value strings where keys are not duplicated."""
   1391     parsed = {}
   1392     for elt in l:
   1393         k, v = elt.split('=', 1)
   1394         if v[0] == '"' and v[-1] == '"':
   1395             v = v[1:-1]
   1396         parsed[k] = v
   1397     return parsed
   1398 
   1399 def parse_http_list(s):
   1400     """Parse lists as described by RFC 2068 Section 2.
   1401 
   1402     In particular, parse comma-separated lists where the elements of
   1403     the list may include quoted-strings.  A quoted-string could
   1404     contain a comma.  A non-quoted string could have quotes in the
   1405     middle.  Neither commas nor quotes count if they are escaped.
   1406     Only double-quotes count, not single-quotes.
   1407     """
   1408     res = []
   1409     part = ''
   1410 
   1411     escape = quote = False
   1412     for cur in s:
   1413         if escape:
   1414             part += cur
   1415             escape = False
   1416             continue
   1417         if quote:
   1418             if cur == '\\':
   1419                 escape = True
   1420                 continue
   1421             elif cur == '"':
   1422                 quote = False
   1423             part += cur
   1424             continue
   1425 
   1426         if cur == ',':
   1427             res.append(part)
   1428             part = ''
   1429             continue
   1430 
   1431         if cur == '"':
   1432             quote = True
   1433 
   1434         part += cur
   1435 
   1436     # append last part
   1437     if part:
   1438         res.append(part)
   1439 
   1440     return [part.strip() for part in res]
   1441 
   1442 class FileHandler(BaseHandler):
   1443     # Use local file or FTP depending on form of URL
   1444     def file_open(self, req):
   1445         url = req.selector
   1446         if url[:2] == '//' and url[2:3] != '/' and (req.host and
   1447                 req.host != 'localhost'):
   1448             if not req.host in self.get_names():
   1449                 raise URLError("file:// scheme is supported only on localhost")
   1450         else:
   1451             return self.open_local_file(req)
   1452 
   1453     # names for the localhost
   1454     names = None
   1455     def get_names(self):
   1456         if FileHandler.names is None:
   1457             try:
   1458                 FileHandler.names = tuple(
   1459                     socket.gethostbyname_ex('localhost')[2] +
   1460                     socket.gethostbyname_ex(socket.gethostname())[2])
   1461             except socket.gaierror:
   1462                 FileHandler.names = (socket.gethostbyname('localhost'),)
   1463         return FileHandler.names
   1464 
   1465     # not entirely sure what the rules are here
   1466     def open_local_file(self, req):
   1467         import email.utils
   1468         import mimetypes
   1469         host = req.host
   1470         filename = req.selector
   1471         localfile = url2pathname(filename)
   1472         try:
   1473             stats = os.stat(localfile)
   1474             size = stats.st_size
   1475             modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
   1476             mtype = mimetypes.guess_type(filename)[0]
   1477             headers = email.message_from_string(
   1478                 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
   1479                 (mtype or 'text/plain', size, modified))
   1480             if host:
   1481                 host, port = splitport(host)
   1482             if not host or \
   1483                 (not port and _safe_gethostbyname(host) in self.get_names()):
   1484                 if host:
   1485                     origurl = 'file://' + host + filename
   1486                 else:
   1487                     origurl = 'file://' + filename
   1488                 return addinfourl(open(localfile, 'rb'), headers, origurl)
   1489         except OSError as exp:
   1490             raise URLError(exp)
   1491         raise URLError('file not on local host')
   1492 
   1493 def _safe_gethostbyname(host):
   1494     try:
   1495         return socket.gethostbyname(host)
   1496     except socket.gaierror:
   1497         return None
   1498 
   1499 class FTPHandler(BaseHandler):
   1500     def ftp_open(self, req):
   1501         import ftplib
   1502         import mimetypes
   1503         host = req.host
   1504         if not host:
   1505             raise URLError('ftp error: no host given')
   1506         host, port = splitport(host)
   1507         if port is None:
   1508             port = ftplib.FTP_PORT
   1509         else:
   1510             port = int(port)
   1511 
   1512         # username/password handling
   1513         user, host = splituser(host)
   1514         if user:
   1515             user, passwd = splitpasswd(user)
   1516         else:
   1517             passwd = None
   1518         host = unquote(host)
   1519         user = user or ''
   1520         passwd = passwd or ''
   1521 
   1522         try:
   1523             host = socket.gethostbyname(host)
   1524         except OSError as msg:
   1525             raise URLError(msg)
   1526         path, attrs = splitattr(req.selector)
   1527         dirs = path.split('/')
   1528         dirs = list(map(unquote, dirs))
   1529         dirs, file = dirs[:-1], dirs[-1]
   1530         if dirs and not dirs[0]:
   1531             dirs = dirs[1:]
   1532         try:
   1533             fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
   1534             type = file and 'I' or 'D'
   1535             for attr in attrs:
   1536                 attr, value = splitvalue(attr)
   1537                 if attr.lower() == 'type' and \
   1538                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
   1539                     type = value.upper()
   1540             fp, retrlen = fw.retrfile(file, type)
   1541             headers = ""
   1542             mtype = mimetypes.guess_type(req.full_url)[0]
   1543             if mtype:
   1544                 headers += "Content-type: %s\n" % mtype
   1545             if retrlen is not None and retrlen >= 0:
   1546                 headers += "Content-length: %d\n" % retrlen
   1547             headers = email.message_from_string(headers)
   1548             return addinfourl(fp, headers, req.full_url)
   1549         except ftplib.all_errors as exp:
   1550             exc = URLError('ftp error: %r' % exp)
   1551             raise exc.with_traceback(sys.exc_info()[2])
   1552 
   1553     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
   1554         return ftpwrapper(user, passwd, host, port, dirs, timeout,
   1555                           persistent=False)
   1556 
   1557 class CacheFTPHandler(FTPHandler):
   1558     # XXX would be nice to have pluggable cache strategies
   1559     # XXX this stuff is definitely not thread safe
   1560     def __init__(self):
   1561         self.cache = {}
   1562         self.timeout = {}
   1563         self.soonest = 0
   1564         self.delay = 60
   1565         self.max_conns = 16
   1566 
   1567     def setTimeout(self, t):
   1568         self.delay = t
   1569 
   1570     def setMaxConns(self, m):
   1571         self.max_conns = m
   1572 
   1573     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
   1574         key = user, host, port, '/'.join(dirs), timeout
   1575         if key in self.cache:
   1576             self.timeout[key] = time.time() + self.delay
   1577         else:
   1578             self.cache[key] = ftpwrapper(user, passwd, host, port,
   1579                                          dirs, timeout)
   1580             self.timeout[key] = time.time() + self.delay
   1581         self.check_cache()
   1582         return self.cache[key]
   1583 
   1584     def check_cache(self):
   1585         # first check for old ones
   1586         t = time.time()
   1587         if self.soonest <= t:
   1588             for k, v in list(self.timeout.items()):
   1589                 if v < t:
   1590                     self.cache[k].close()
   1591                     del self.cache[k]
   1592                     del self.timeout[k]
   1593         self.soonest = min(list(self.timeout.values()))
   1594 
   1595         # then check the size
   1596         if len(self.cache) == self.max_conns:
   1597             for k, v in list(self.timeout.items()):
   1598                 if v == self.soonest:
   1599                     del self.cache[k]
   1600                     del self.timeout[k]
   1601                     break
   1602             self.soonest = min(list(self.timeout.values()))
   1603 
   1604     def clear_cache(self):
   1605         for conn in self.cache.values():
   1606             conn.close()
   1607         self.cache.clear()
   1608         self.timeout.clear()
   1609 
   1610 class DataHandler(BaseHandler):
   1611     def data_open(self, req):
   1612         # data URLs as specified in RFC 2397.
   1613         #
   1614         # ignores POSTed data
   1615         #
   1616         # syntax:
   1617         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
   1618         # mediatype := [ type "/" subtype ] *( ";" parameter )
   1619         # data      := *urlchar
   1620         # parameter := attribute "=" value
   1621         url = req.full_url
   1622 
   1623         scheme, data = url.split(":",1)
   1624         mediatype, data = data.split(",",1)
   1625 
   1626         # even base64 encoded data URLs might be quoted so unquote in any case:
   1627         data = unquote_to_bytes(data)
   1628         if mediatype.endswith(";base64"):
   1629             data = base64.decodebytes(data)
   1630             mediatype = mediatype[:-7]
   1631 
   1632         if not mediatype:
   1633             mediatype = "text/plain;charset=US-ASCII"
   1634 
   1635         headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
   1636             (mediatype, len(data)))
   1637 
   1638         return addinfourl(io.BytesIO(data), headers, url)
   1639 
   1640 
   1641 # Code move from the old urllib module
   1642 
   1643 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
   1644 
   1645 # Helper for non-unix systems
   1646 if os.name == 'nt':
   1647     from nturl2path import url2pathname, pathname2url
   1648 else:
   1649     def url2pathname(pathname):
   1650         """OS-specific conversion from a relative URL of the 'file' scheme
   1651         to a file system path; not recommended for general use."""
   1652         return unquote(pathname)
   1653 
   1654     def pathname2url(pathname):
   1655         """OS-specific conversion from a file system path to a relative URL
   1656         of the 'file' scheme; not recommended for general use."""
   1657         return quote(pathname)
   1658 
   1659 
   1660 ftpcache = {}
   1661 
   1662 
   1663 class URLopener:
   1664     """Class to open URLs.
   1665     This is a class rather than just a subroutine because we may need
   1666     more than one set of global protocol-specific options.
   1667     Note -- this is a base class for those who don't want the
   1668     automatic handling of errors type 302 (relocated) and 401
   1669     (authorization needed)."""
   1670 
   1671     __tempfiles = None
   1672 
   1673     version = "Python-urllib/%s" % __version__
   1674 
   1675     # Constructor
   1676     def __init__(self, proxies=None, **x509):
   1677         msg = "%(class)s style of invoking requests is deprecated. " \
   1678               "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
   1679         warnings.warn(msg, DeprecationWarning, stacklevel=3)
   1680         if proxies is None:
   1681             proxies = getproxies()
   1682         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
   1683         self.proxies = proxies
   1684         self.key_file = x509.get('key_file')
   1685         self.cert_file = x509.get('cert_file')
   1686         self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
   1687         self.__tempfiles = []
   1688         self.__unlink = os.unlink # See cleanup()
   1689         self.tempcache = None
   1690         # Undocumented feature: if you assign {} to tempcache,
   1691         # it is used to cache files retrieved with
   1692         # self.retrieve().  This is not enabled by default
   1693         # since it does not work for changing documents (and I
   1694         # haven't got the logic to check expiration headers
   1695         # yet).
   1696         self.ftpcache = ftpcache
   1697         # Undocumented feature: you can use a different
   1698         # ftp cache by assigning to the .ftpcache member;
   1699         # in case you want logically independent URL openers
   1700         # XXX This is not threadsafe.  Bah.
   1701 
   1702     def __del__(self):
   1703         self.close()
   1704 
   1705     def close(self):
   1706         self.cleanup()
   1707 
   1708     def cleanup(self):
   1709         # This code sometimes runs when the rest of this module
   1710         # has already been deleted, so it can't use any globals
   1711         # or import anything.
   1712         if self.__tempfiles:
   1713             for file in self.__tempfiles:
   1714                 try:
   1715                     self.__unlink(file)
   1716                 except OSError:
   1717                     pass
   1718             del self.__tempfiles[:]
   1719         if self.tempcache:
   1720             self.tempcache.clear()
   1721 
   1722     def addheader(self, *args):
   1723         """Add a header to be used by the HTTP interface only
   1724         e.g. u.addheader('Accept', 'sound/basic')"""
   1725         self.addheaders.append(args)
   1726 
   1727     # External interface
   1728     def open(self, fullurl, data=None):
   1729         """Use URLopener().open(file) instead of open(file, 'r')."""
   1730         fullurl = unwrap(to_bytes(fullurl))
   1731         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
   1732         if self.tempcache and fullurl in self.tempcache:
   1733             filename, headers = self.tempcache[fullurl]
   1734             fp = open(filename, 'rb')
   1735             return addinfourl(fp, headers, fullurl)
   1736         urltype, url = splittype(fullurl)
   1737         if not urltype:
   1738             urltype = 'file'
   1739         if urltype in self.proxies:
   1740             proxy = self.proxies[urltype]
   1741             urltype, proxyhost = splittype(proxy)
   1742             host, selector = splithost(proxyhost)
   1743             url = (host, fullurl) # Signal special case to open_*()
   1744         else:
   1745             proxy = None
   1746         name = 'open_' + urltype
   1747         self.type = urltype
   1748         name = name.replace('-', '_')
   1749         if not hasattr(self, name):
   1750             if proxy:
   1751                 return self.open_unknown_proxy(proxy, fullurl, data)
   1752             else:
   1753                 return self.open_unknown(fullurl, data)
   1754         try:
   1755             if data is None:
   1756                 return getattr(self, name)(url)
   1757             else:
   1758                 return getattr(self, name)(url, data)
   1759         except (HTTPError, URLError):
   1760             raise
   1761         except OSError as msg:
   1762             raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
   1763 
   1764     def open_unknown(self, fullurl, data=None):
   1765         """Overridable interface to open unknown URL type."""
   1766         type, url = splittype(fullurl)
   1767         raise OSError('url error', 'unknown url type', type)
   1768 
   1769     def open_unknown_proxy(self, proxy, fullurl, data=None):
   1770         """Overridable interface to open unknown URL type."""
   1771         type, url = splittype(fullurl)
   1772         raise OSError('url error', 'invalid proxy for %s' % type, proxy)
   1773 
   1774     # External interface
   1775     def retrieve(self, url, filename=None, reporthook=None, data=None):
   1776         """retrieve(url) returns (filename, headers) for a local object
   1777         or (tempfilename, headers) for a remote object."""
   1778         url = unwrap(to_bytes(url))
   1779         if self.tempcache and url in self.tempcache:
   1780             return self.tempcache[url]
   1781         type, url1 = splittype(url)
   1782         if filename is None and (not type or type == 'file'):
   1783             try:
   1784                 fp = self.open_local_file(url1)
   1785                 hdrs = fp.info()
   1786                 fp.close()
   1787                 return url2pathname(splithost(url1)[1]), hdrs
   1788             except OSError as msg:
   1789                 pass
   1790         fp = self.open(url, data)
   1791         try:
   1792             headers = fp.info()
   1793             if filename:
   1794                 tfp = open(filename, 'wb')
   1795             else:
   1796                 garbage, path = splittype(url)
   1797                 garbage, path = splithost(path or "")
   1798                 path, garbage = splitquery(path or "")
   1799                 path, garbage = splitattr(path or "")
   1800                 suffix = os.path.splitext(path)[1]
   1801                 (fd, filename) = tempfile.mkstemp(suffix)
   1802                 self.__tempfiles.append(filename)
   1803                 tfp = os.fdopen(fd, 'wb')
   1804             try:
   1805                 result = filename, headers
   1806                 if self.tempcache is not None:
   1807                     self.tempcache[url] = result
   1808                 bs = 1024*8
   1809                 size = -1
   1810                 read = 0
   1811                 blocknum = 0
   1812                 if "content-length" in headers:
   1813                     size = int(headers["Content-Length"])
   1814                 if reporthook:
   1815                     reporthook(blocknum, bs, size)
   1816                 while 1:
   1817                     block = fp.read(bs)
   1818                     if not block:
   1819                         break
   1820                     read += len(block)
   1821                     tfp.write(block)
   1822                     blocknum += 1
   1823                     if reporthook:
   1824                         reporthook(blocknum, bs, size)
   1825             finally:
   1826                 tfp.close()
   1827         finally:
   1828             fp.close()
   1829 
   1830         # raise exception if actual size does not match content-length header
   1831         if size >= 0 and read < size:
   1832             raise ContentTooShortError(
   1833                 "retrieval incomplete: got only %i out of %i bytes"
   1834                 % (read, size), result)
   1835 
   1836         return result
   1837 
   1838     # Each method named open_<type> knows how to open that type of URL
   1839 
   1840     def _open_generic_http(self, connection_factory, url, data):
   1841         """Make an HTTP connection using connection_class.
   1842 
   1843         This is an internal method that should be called from
   1844         open_http() or open_https().
   1845 
   1846         Arguments:
   1847         - connection_factory should take a host name and return an
   1848           HTTPConnection instance.
   1849         - url is the url to retrieval or a host, relative-path pair.
   1850         - data is payload for a POST request or None.
   1851         """
   1852 
   1853         user_passwd = None
   1854         proxy_passwd= None
   1855         if isinstance(url, str):
   1856             host, selector = splithost(url)
   1857             if host:
   1858                 user_passwd, host = splituser(host)
   1859                 host = unquote(host)
   1860             realhost = host
   1861         else:
   1862             host, selector = url
   1863             # check whether the proxy contains authorization information
   1864             proxy_passwd, host = splituser(host)
   1865             # now we proceed with the url we want to obtain
   1866             urltype, rest = splittype(selector)
   1867             url = rest
   1868             user_passwd = None
   1869             if urltype.lower() != 'http':
   1870                 realhost = None
   1871             else:
   1872                 realhost, rest = splithost(rest)
   1873                 if realhost:
   1874                     user_passwd, realhost = splituser(realhost)
   1875                 if user_passwd:
   1876                     selector = "%s://%s%s" % (urltype, realhost, rest)
   1877                 if proxy_bypass(realhost):
   1878                     host = realhost
   1879 
   1880         if not host: raise OSError('http error', 'no host given')
   1881 
   1882         if proxy_passwd:
   1883             proxy_passwd = unquote(proxy_passwd)
   1884             proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
   1885         else:
   1886             proxy_auth = None
   1887 
   1888         if user_passwd:
   1889             user_passwd = unquote(user_passwd)
   1890             auth = base64.b64encode(user_passwd.encode()).decode('ascii')
   1891         else:
   1892             auth = None
   1893         http_conn = connection_factory(host)
   1894         headers = {}
   1895         if proxy_auth:
   1896             headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
   1897         if auth:
   1898             headers["Authorization"] =  "Basic %s" % auth
   1899         if realhost:
   1900             headers["Host"] = realhost
   1901 
   1902         # Add Connection:close as we don't support persistent connections yet.
   1903         # This helps in closing the socket and avoiding ResourceWarning
   1904 
   1905         headers["Connection"] = "close"
   1906 
   1907         for header, value in self.addheaders:
   1908             headers[header] = value
   1909 
   1910         if data is not None:
   1911             headers["Content-Type"] = "application/x-www-form-urlencoded"
   1912             http_conn.request("POST", selector, data, headers)
   1913         else:
   1914             http_conn.request("GET", selector, headers=headers)
   1915 
   1916         try:
   1917             response = http_conn.getresponse()
   1918         except http.client.BadStatusLine:
   1919             # something went wrong with the HTTP status line
   1920             raise URLError("http protocol error: bad status line")
   1921 
   1922         # According to RFC 2616, "2xx" code indicates that the client's
   1923         # request was successfully received, understood, and accepted.
   1924         if 200 <= response.status < 300:
   1925             return addinfourl(response, response.msg, "http:" + url,
   1926                               response.status)
   1927         else:
   1928             return self.http_error(
   1929                 url, response.fp,
   1930                 response.status, response.reason, response.msg, data)
   1931 
   1932     def open_http(self, url, data=None):
   1933         """Use HTTP protocol."""
   1934         return self._open_generic_http(http.client.HTTPConnection, url, data)
   1935 
   1936     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
   1937         """Handle http errors.
   1938 
   1939         Derived class can override this, or provide specific handlers
   1940         named http_error_DDD where DDD is the 3-digit error code."""
   1941         # First check if there's a specific handler for this error
   1942         name = 'http_error_%d' % errcode
   1943         if hasattr(self, name):
   1944             method = getattr(self, name)
   1945             if data is None:
   1946                 result = method(url, fp, errcode, errmsg, headers)
   1947             else:
   1948                 result = method(url, fp, errcode, errmsg, headers, data)
   1949             if result: return result
   1950         return self.http_error_default(url, fp, errcode, errmsg, headers)
   1951 
   1952     def http_error_default(self, url, fp, errcode, errmsg, headers):
   1953         """Default error handler: close the connection and raise OSError."""
   1954         fp.close()
   1955         raise HTTPError(url, errcode, errmsg, headers, None)
   1956 
   1957     if _have_ssl:
   1958         def _https_connection(self, host):
   1959             return http.client.HTTPSConnection(host,
   1960                                            key_file=self.key_file,
   1961                                            cert_file=self.cert_file)
   1962 
   1963         def open_https(self, url, data=None):
   1964             """Use HTTPS protocol."""
   1965             return self._open_generic_http(self._https_connection, url, data)
   1966 
   1967     def open_file(self, url):
   1968         """Use local file or FTP depending on form of URL."""
   1969         if not isinstance(url, str):
   1970             raise URLError('file error: proxy support for file protocol currently not implemented')
   1971         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
   1972             raise ValueError("file:// scheme is supported only on localhost")
   1973         else:
   1974             return self.open_local_file(url)
   1975 
   1976     def open_local_file(self, url):
   1977         """Use local file."""
   1978         import email.utils
   1979         import mimetypes
   1980         host, file = splithost(url)
   1981         localname = url2pathname(file)
   1982         try:
   1983             stats = os.stat(localname)
   1984         except OSError as e:
   1985             raise URLError(e.strerror, e.filename)
   1986         size = stats.st_size
   1987         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
   1988         mtype = mimetypes.guess_type(url)[0]
   1989         headers = email.message_from_string(
   1990             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
   1991             (mtype or 'text/plain', size, modified))
   1992         if not host:
   1993             urlfile = file
   1994             if file[:1] == '/':
   1995                 urlfile = 'file://' + file
   1996             return addinfourl(open(localname, 'rb'), headers, urlfile)
   1997         host, port = splitport(host)
   1998         if (not port
   1999            and socket.gethostbyname(host) in ((localhost(),) + thishost())):
   2000             urlfile = file
   2001             if file[:1] == '/':
   2002                 urlfile = 'file://' + file
   2003             elif file[:2] == './':
   2004                 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
   2005             return addinfourl(open(localname, 'rb'), headers, urlfile)
   2006         raise URLError('local file error: not on local host')
   2007 
   2008     def open_ftp(self, url):
   2009         """Use FTP protocol."""
   2010         if not isinstance(url, str):
   2011             raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
   2012         import mimetypes
   2013         host, path = splithost(url)
   2014         if not host: raise URLError('ftp error: no host given')
   2015         host, port = splitport(host)
   2016         user, host = splituser(host)
   2017         if user: user, passwd = splitpasswd(user)
   2018         else: passwd = None
   2019         host = unquote(host)
   2020         user = unquote(user or '')
   2021         passwd = unquote(passwd or '')
   2022         host = socket.gethostbyname(host)
   2023         if not port:
   2024             import ftplib
   2025             port = ftplib.FTP_PORT
   2026         else:
   2027             port = int(port)
   2028         path, attrs = splitattr(path)
   2029         path = unquote(path)
   2030         dirs = path.split('/')
   2031         dirs, file = dirs[:-1], dirs[-1]
   2032         if dirs and not dirs[0]: dirs = dirs[1:]
   2033         if dirs and not dirs[0]: dirs[0] = '/'
   2034         key = user, host, port, '/'.join(dirs)
   2035         # XXX thread unsafe!
   2036         if len(self.ftpcache) > MAXFTPCACHE:
   2037             # Prune the cache, rather arbitrarily
   2038             for k in list(self.ftpcache):
   2039                 if k != key:
   2040                     v = self.ftpcache[k]
   2041                     del self.ftpcache[k]
   2042                     v.close()
   2043         try:
   2044             if key not in self.ftpcache:
   2045                 self.ftpcache[key] = \
   2046                     ftpwrapper(user, passwd, host, port, dirs)
   2047             if not file: type = 'D'
   2048             else: type = 'I'
   2049             for attr in attrs:
   2050                 attr, value = splitvalue(attr)
   2051                 if attr.lower() == 'type' and \
   2052                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
   2053                     type = value.upper()
   2054             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
   2055             mtype = mimetypes.guess_type("ftp:" + url)[0]
   2056             headers = ""
   2057             if mtype:
   2058                 headers += "Content-Type: %s\n" % mtype
   2059             if retrlen is not None and retrlen >= 0:
   2060                 headers += "Content-Length: %d\n" % retrlen
   2061             headers = email.message_from_string(headers)
   2062             return addinfourl(fp, headers, "ftp:" + url)
   2063         except ftperrors() as exp:
   2064             raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
   2065 
   2066     def open_data(self, url, data=None):
   2067         """Use "data" URL."""
   2068         if not isinstance(url, str):
   2069             raise URLError('data error: proxy support for data protocol currently not implemented')
   2070         # ignore POSTed data
   2071         #
   2072         # syntax of data URLs:
   2073         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
   2074         # mediatype := [ type "/" subtype ] *( ";" parameter )
   2075         # data      := *urlchar
   2076         # parameter := attribute "=" value
   2077         try:
   2078             [type, data] = url.split(',', 1)
   2079         except ValueError:
   2080             raise OSError('data error', 'bad data URL')
   2081         if not type:
   2082             type = 'text/plain;charset=US-ASCII'
   2083         semi = type.rfind(';')
   2084         if semi >= 0 and '=' not in type[semi:]:
   2085             encoding = type[semi+1:]
   2086             type = type[:semi]
   2087         else:
   2088             encoding = ''
   2089         msg = []
   2090         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
   2091                                             time.gmtime(time.time())))
   2092         msg.append('Content-type: %s' % type)
   2093         if encoding == 'base64':
   2094             # XXX is this encoding/decoding ok?
   2095             data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
   2096         else:
   2097             data = unquote(data)
   2098         msg.append('Content-Length: %d' % len(data))
   2099         msg.append('')
   2100         msg.append(data)
   2101         msg = '\n'.join(msg)
   2102         headers = email.message_from_string(msg)
   2103         f = io.StringIO(msg)
   2104         #f.fileno = None     # needed for addinfourl
   2105         return addinfourl(f, headers, url)
   2106 
   2107 
   2108 class FancyURLopener(URLopener):
   2109     """Derived class with handlers for errors we can handle (perhaps)."""
   2110 
   2111     def __init__(self, *args, **kwargs):
   2112         URLopener.__init__(self, *args, **kwargs)
   2113         self.auth_cache = {}
   2114         self.tries = 0
   2115         self.maxtries = 10
   2116 
   2117     def http_error_default(self, url, fp, errcode, errmsg, headers):
   2118         """Default error handling -- don't raise an exception."""
   2119         return addinfourl(fp, headers, "http:" + url, errcode)
   2120 
   2121     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
   2122         """Error 302 -- relocated (temporarily)."""
   2123         self.tries += 1
   2124         try:
   2125             if self.maxtries and self.tries >= self.maxtries:
   2126                 if hasattr(self, "http_error_500"):
   2127                     meth = self.http_error_500
   2128                 else:
   2129                     meth = self.http_error_default
   2130                 return meth(url, fp, 500,
   2131                             "Internal Server Error: Redirect Recursion",
   2132                             headers)
   2133             result = self.redirect_internal(url, fp, errcode, errmsg,
   2134                                             headers, data)
   2135             return result
   2136         finally:
   2137             self.tries = 0
   2138 
   2139     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
   2140         if 'location' in headers:
   2141             newurl = headers['location']
   2142         elif 'uri' in headers:
   2143             newurl = headers['uri']
   2144         else:
   2145             return
   2146         fp.close()
   2147 
   2148         # In case the server sent a relative URL, join with original:
   2149         newurl = urljoin(self.type + ":" + url, newurl)
   2150 
   2151         urlparts = urlparse(newurl)
   2152 
   2153         # For security reasons, we don't allow redirection to anything other
   2154         # than http, https and ftp.
   2155 
   2156         # We are using newer HTTPError with older redirect_internal method
   2157         # This older method will get deprecated in 3.3
   2158 
   2159         if urlparts.scheme not in ('http', 'https', 'ftp', ''):
   2160             raise HTTPError(newurl, errcode,
   2161                             errmsg +
   2162                             " Redirection to url '%s' is not allowed." % newurl,
   2163                             headers, fp)
   2164 
   2165         return self.open(newurl)
   2166 
   2167     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
   2168         """Error 301 -- also relocated (permanently)."""
   2169         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
   2170 
   2171     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
   2172         """Error 303 -- also relocated (essentially identical to 302)."""
   2173         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
   2174 
   2175     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
   2176         """Error 307 -- relocated, but turn POST into error."""
   2177         if data is None:
   2178             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
   2179         else:
   2180             return self.http_error_default(url, fp, errcode, errmsg, headers)
   2181 
   2182     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
   2183             retry=False):
   2184         """Error 401 -- authentication required.
   2185         This function supports Basic authentication only."""
   2186         if 'www-authenticate' not in headers:
   2187             URLopener.http_error_default(self, url, fp,
   2188                                          errcode, errmsg, headers)
   2189         stuff = headers['www-authenticate']
   2190         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
   2191         if not match:
   2192             URLopener.http_error_default(self, url, fp,
   2193                                          errcode, errmsg, headers)
   2194         scheme, realm = match.groups()
   2195         if scheme.lower() != 'basic':
   2196             URLopener.http_error_default(self, url, fp,
   2197                                          errcode, errmsg, headers)
   2198         if not retry:
   2199             URLopener.http_error_default(self, url, fp, errcode, errmsg,
   2200                     headers)
   2201         name = 'retry_' + self.type + '_basic_auth'
   2202         if data is None:
   2203             return getattr(self,name)(url, realm)
   2204         else:
   2205             return getattr(self,name)(url, realm, data)
   2206 
   2207     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
   2208             retry=False):
   2209         """Error 407 -- proxy authentication required.
   2210         This function supports Basic authentication only."""
   2211         if 'proxy-authenticate' not in headers:
   2212             URLopener.http_error_default(self, url, fp,
   2213                                          errcode, errmsg, headers)
   2214         stuff = headers['proxy-authenticate']
   2215         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
   2216         if not match:
   2217             URLopener.http_error_default(self, url, fp,
   2218                                          errcode, errmsg, headers)
   2219         scheme, realm = match.groups()
   2220         if scheme.lower() != 'basic':
   2221             URLopener.http_error_default(self, url, fp,
   2222                                          errcode, errmsg, headers)
   2223         if not retry:
   2224             URLopener.http_error_default(self, url, fp, errcode, errmsg,
   2225                     headers)
   2226         name = 'retry_proxy_' + self.type + '_basic_auth'
   2227         if data is None:
   2228             return getattr(self,name)(url, realm)
   2229         else:
   2230             return getattr(self,name)(url, realm, data)
   2231 
   2232     def retry_proxy_http_basic_auth(self, url, realm, data=None):
   2233         host, selector = splithost(url)
   2234         newurl = 'http://' + host + selector
   2235         proxy = self.proxies['http']
   2236         urltype, proxyhost = splittype(proxy)
   2237         proxyhost, proxyselector = splithost(proxyhost)
   2238         i = proxyhost.find('@') + 1
   2239         proxyhost = proxyhost[i:]
   2240         user, passwd = self.get_user_passwd(proxyhost, realm, i)
   2241         if not (user or passwd): return None
   2242         proxyhost = "%s:%s@%s" % (quote(user, safe=''),
   2243                                   quote(passwd, safe=''), proxyhost)
   2244         self.proxies['http'] = 'http://' + proxyhost + proxyselector
   2245         if data is None:
   2246             return self.open(newurl)
   2247         else:
   2248             return self.open(newurl, data)
   2249 
   2250     def retry_proxy_https_basic_auth(self, url, realm, data=None):
   2251         host, selector = splithost(url)
   2252         newurl = 'https://' + host + selector
   2253         proxy = self.proxies['https']
   2254         urltype, proxyhost = splittype(proxy)
   2255         proxyhost, proxyselector = splithost(proxyhost)
   2256         i = proxyhost.find('@') + 1
   2257         proxyhost = proxyhost[i:]
   2258         user, passwd = self.get_user_passwd(proxyhost, realm, i)
   2259         if not (user or passwd): return None
   2260         proxyhost = "%s:%s@%s" % (quote(user, safe=''),
   2261                                   quote(passwd, safe=''), proxyhost)
   2262         self.proxies['https'] = 'https://' + proxyhost + proxyselector
   2263         if data is None:
   2264             return self.open(newurl)
   2265         else:
   2266             return self.open(newurl, data)
   2267 
   2268     def retry_http_basic_auth(self, url, realm, data=None):
   2269         host, selector = splithost(url)
   2270         i = host.find('@') + 1
   2271         host = host[i:]
   2272         user, passwd = self.get_user_passwd(host, realm, i)
   2273         if not (user or passwd): return None
   2274         host = "%s:%s@%s" % (quote(user, safe=''),
   2275                              quote(passwd, safe=''), host)
   2276         newurl = 'http://' + host + selector
   2277         if data is None:
   2278             return self.open(newurl)
   2279         else:
   2280             return self.open(newurl, data)
   2281 
   2282     def retry_https_basic_auth(self, url, realm, data=None):
   2283         host, selector = splithost(url)
   2284         i = host.find('@') + 1
   2285         host = host[i:]
   2286         user, passwd = self.get_user_passwd(host, realm, i)
   2287         if not (user or passwd): return None
   2288         host = "%s:%s@%s" % (quote(user, safe=''),
   2289                              quote(passwd, safe=''), host)
   2290         newurl = 'https://' + host + selector
   2291         if data is None:
   2292             return self.open(newurl)
   2293         else:
   2294             return self.open(newurl, data)
   2295 
   2296     def get_user_passwd(self, host, realm, clear_cache=0):
   2297         key = realm + '@' + host.lower()
   2298         if key in self.auth_cache:
   2299             if clear_cache:
   2300                 del self.auth_cache[key]
   2301             else:
   2302                 return self.auth_cache[key]
   2303         user, passwd = self.prompt_user_passwd(host, realm)
   2304         if user or passwd: self.auth_cache[key] = (user, passwd)
   2305         return user, passwd
   2306 
   2307     def prompt_user_passwd(self, host, realm):
   2308         """Override this in a GUI environment!"""
   2309         import getpass
   2310         try:
   2311             user = input("Enter username for %s at %s: " % (realm, host))
   2312             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
   2313                 (user, realm, host))
   2314             return user, passwd
   2315         except KeyboardInterrupt:
   2316             print()
   2317             return None, None
   2318 
   2319 
   2320 # Utility functions
   2321 
   2322 _localhost = None
   2323 def localhost():
   2324     """Return the IP address of the magic hostname 'localhost'."""
   2325     global _localhost
   2326     if _localhost is None:
   2327         _localhost = socket.gethostbyname('localhost')
   2328     return _localhost
   2329 
   2330 _thishost = None
   2331 def thishost():
   2332     """Return the IP addresses of the current host."""
   2333     global _thishost
   2334     if _thishost is None:
   2335         try:
   2336             _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
   2337         except socket.gaierror:
   2338             _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
   2339     return _thishost
   2340 
   2341 _ftperrors = None
   2342 def ftperrors():
   2343     """Return the set of errors raised by the FTP class."""
   2344     global _ftperrors
   2345     if _ftperrors is None:
   2346         import ftplib
   2347         _ftperrors = ftplib.all_errors
   2348     return _ftperrors
   2349 
   2350 _noheaders = None
   2351 def noheaders():
   2352     """Return an empty email Message object."""
   2353     global _noheaders
   2354     if _noheaders is None:
   2355         _noheaders = email.message_from_string("")
   2356     return _noheaders
   2357 
   2358 
   2359 # Utility classes
   2360 
   2361 class ftpwrapper:
   2362     """Class used by open_ftp() for cache of open FTP connections."""
   2363 
   2364     def __init__(self, user, passwd, host, port, dirs, timeout=None,
   2365                  persistent=True):
   2366         self.user = user
   2367         self.passwd = passwd
   2368         self.host = host
   2369         self.port = port
   2370         self.dirs = dirs
   2371         self.timeout = timeout
   2372         self.refcount = 0
   2373         self.keepalive = persistent
   2374         try:
   2375             self.init()
   2376         except:
   2377             self.close()
   2378             raise
   2379 
   2380     def init(self):
   2381         import ftplib
   2382         self.busy = 0
   2383         self.ftp = ftplib.FTP()
   2384         self.ftp.connect(self.host, self.port, self.timeout)
   2385         self.ftp.login(self.user, self.passwd)
   2386         _target = '/'.join(self.dirs)
   2387         self.ftp.cwd(_target)
   2388 
   2389     def retrfile(self, file, type):
   2390         import ftplib
   2391         self.endtransfer()
   2392         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
   2393         else: cmd = 'TYPE ' + type; isdir = 0
   2394         try:
   2395             self.ftp.voidcmd(cmd)
   2396         except ftplib.all_errors:
   2397             self.init()
   2398             self.ftp.voidcmd(cmd)
   2399         conn = None
   2400         if file and not isdir:
   2401             # Try to retrieve as a file
   2402             try:
   2403                 cmd = 'RETR ' + file
   2404                 conn, retrlen = self.ftp.ntransfercmd(cmd)
   2405             except ftplib.error_perm as reason:
   2406                 if str(reason)[:3] != '550':
   2407                     raise URLError('ftp error: %r' % reason).with_traceback(
   2408                         sys.exc_info()[2])
   2409         if not conn:
   2410             # Set transfer mode to ASCII!
   2411             self.ftp.voidcmd('TYPE A')
   2412             # Try a directory listing. Verify that directory exists.
   2413             if file:
   2414                 pwd = self.ftp.pwd()
   2415                 try:
   2416                     try:
   2417                         self.ftp.cwd(file)
   2418                     except ftplib.error_perm as reason:
   2419                         raise URLError('ftp error: %r' % reason) from reason
   2420                 finally:
   2421                     self.ftp.cwd(pwd)
   2422                 cmd = 'LIST ' + file
   2423             else:
   2424                 cmd = 'LIST'
   2425             conn, retrlen = self.ftp.ntransfercmd(cmd)
   2426         self.busy = 1
   2427 
   2428         ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
   2429         self.refcount += 1
   2430         conn.close()
   2431         # Pass back both a suitably decorated object and a retrieval length
   2432         return (ftpobj, retrlen)
   2433 
   2434     def endtransfer(self):
   2435         self.busy = 0
   2436 
   2437     def close(self):
   2438         self.keepalive = False
   2439         if self.refcount <= 0:
   2440             self.real_close()
   2441 
   2442     def file_close(self):
   2443         self.endtransfer()
   2444         self.refcount -= 1
   2445         if self.refcount <= 0 and not self.keepalive:
   2446             self.real_close()
   2447 
   2448     def real_close(self):
   2449         self.endtransfer()
   2450         try:
   2451             self.ftp.close()
   2452         except ftperrors():
   2453             pass
   2454 
   2455 # Proxy handling
   2456 def getproxies_environment():
   2457     """Return a dictionary of scheme -> proxy server URL mappings.
   2458 
   2459     Scan the environment for variables named <scheme>_proxy;
   2460     this seems to be the standard convention.  If you need a
   2461     different way, you can pass a proxies dictionary to the
   2462     [Fancy]URLopener constructor.
   2463 
   2464     """
   2465     proxies = {}
   2466     # in order to prefer lowercase variables, process environment in
   2467     # two passes: first matches any, second pass matches lowercase only
   2468     for name, value in os.environ.items():
   2469         name = name.lower()
   2470         if value and name[-6:] == '_proxy':
   2471             proxies[name[:-6]] = value
   2472     # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
   2473     # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
   2474     # header from the client
   2475     # If "proxy" is lowercase, it will still be used thanks to the next block
   2476     if 'REQUEST_METHOD' in os.environ:
   2477         proxies.pop('http', None)
   2478     for name, value in os.environ.items():
   2479         if name[-6:] == '_proxy':
   2480             name = name.lower()
   2481             if value:
   2482                 proxies[name[:-6]] = value
   2483             else:
   2484                 proxies.pop(name[:-6], None)
   2485     return proxies
   2486 
   2487 def proxy_bypass_environment(host, proxies=None):
   2488     """Test if proxies should not be used for a particular host.
   2489 
   2490     Checks the proxy dict for the value of no_proxy, which should
   2491     be a list of comma separated DNS suffixes, or '*' for all hosts.
   2492 
   2493     """
   2494     if proxies is None:
   2495         proxies = getproxies_environment()
   2496     # don't bypass, if no_proxy isn't specified
   2497     try:
   2498         no_proxy = proxies['no']
   2499     except KeyError:
   2500         return 0
   2501     # '*' is special case for always bypass
   2502     if no_proxy == '*':
   2503         return 1
   2504     # strip port off host
   2505     hostonly, port = splitport(host)
   2506     # check if the host ends with any of the DNS suffixes
   2507     no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
   2508     for name in no_proxy_list:
   2509         if name:
   2510             name = name.lstrip('.')  # ignore leading dots
   2511             name = re.escape(name)
   2512             pattern = r'(.+\.)?%s$' % name
   2513             if (re.match(pattern, hostonly, re.I)
   2514                     or re.match(pattern, host, re.I)):
   2515                 return 1
   2516     # otherwise, don't bypass
   2517     return 0
   2518 
   2519 
   2520 # This code tests an OSX specific data structure but is testable on all
   2521 # platforms
   2522 def _proxy_bypass_macosx_sysconf(host, proxy_settings):
   2523     """
   2524     Return True iff this host shouldn't be accessed using a proxy
   2525 
   2526     This function uses the MacOSX framework SystemConfiguration
   2527     to fetch the proxy information.
   2528 
   2529     proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
   2530     { 'exclude_simple': bool,
   2531       'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
   2532     }
   2533     """
   2534     from fnmatch import fnmatch
   2535 
   2536     hostonly, port = splitport(host)
   2537 
   2538     def ip2num(ipAddr):
   2539         parts = ipAddr.split('.')
   2540         parts = list(map(int, parts))
   2541         if len(parts) != 4:
   2542             parts = (parts + [0, 0, 0, 0])[:4]
   2543         return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
   2544 
   2545     # Check for simple host names:
   2546     if '.' not in host:
   2547         if proxy_settings['exclude_simple']:
   2548             return True
   2549 
   2550     hostIP = None
   2551 
   2552     for value in proxy_settings.get('exceptions', ()):
   2553         # Items in the list are strings like these: *.local, 169.254/16
   2554         if not value: continue
   2555 
   2556         m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
   2557         if m is not None:
   2558             if hostIP is None:
   2559                 try:
   2560                     hostIP = socket.gethostbyname(hostonly)
   2561                     hostIP = ip2num(hostIP)
   2562                 except OSError:
   2563                     continue
   2564 
   2565             base = ip2num(m.group(1))
   2566             mask = m.group(2)
   2567             if mask is None:
   2568                 mask = 8 * (m.group(1).count('.') + 1)
   2569             else:
   2570                 mask = int(mask[1:])
   2571             mask = 32 - mask
   2572 
   2573             if (hostIP >> mask) == (base >> mask):
   2574                 return True
   2575 
   2576         elif fnmatch(host, value):
   2577             return True
   2578 
   2579     return False
   2580 
   2581 
   2582 if sys.platform == 'darwin':
   2583     from _scproxy import _get_proxy_settings, _get_proxies
   2584 
   2585     def proxy_bypass_macosx_sysconf(host):
   2586         proxy_settings = _get_proxy_settings()
   2587         return _proxy_bypass_macosx_sysconf(host, proxy_settings)
   2588 
   2589     def getproxies_macosx_sysconf():
   2590         """Return a dictionary of scheme -> proxy server URL mappings.
   2591 
   2592         This function uses the MacOSX framework SystemConfiguration
   2593         to fetch the proxy information.
   2594         """
   2595         return _get_proxies()
   2596 
   2597 
   2598 
   2599     def proxy_bypass(host):
   2600         """Return True, if host should be bypassed.
   2601 
   2602         Checks proxy settings gathered from the environment, if specified,
   2603         or from the MacOSX framework SystemConfiguration.
   2604 
   2605         """
   2606         proxies = getproxies_environment()
   2607         if proxies:
   2608             return proxy_bypass_environment(host, proxies)
   2609         else:
   2610             return proxy_bypass_macosx_sysconf(host)
   2611 
   2612     def getproxies():
   2613         return getproxies_environment() or getproxies_macosx_sysconf()
   2614 
   2615 
   2616 elif os.name == 'nt':
   2617     def getproxies_registry():
   2618         """Return a dictionary of scheme -> proxy server URL mappings.
   2619 
   2620         Win32 uses the registry to store proxies.
   2621 
   2622         """
   2623         proxies = {}
   2624         try:
   2625             import winreg
   2626         except ImportError:
   2627             # Std module, so should be around - but you never know!
   2628             return proxies
   2629         try:
   2630             internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
   2631                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
   2632             proxyEnable = winreg.QueryValueEx(internetSettings,
   2633                                                'ProxyEnable')[0]
   2634             if proxyEnable:
   2635                 # Returned as Unicode but problems if not converted to ASCII
   2636                 proxyServer = str(winreg.QueryValueEx(internetSettings,
   2637                                                        'ProxyServer')[0])
   2638                 if '=' in proxyServer:
   2639                     # Per-protocol settings
   2640                     for p in proxyServer.split(';'):
   2641                         protocol, address = p.split('=', 1)
   2642                         # See if address has a type:// prefix
   2643                         if not re.match('^([^/:]+)://', address):
   2644                             address = '%s://%s' % (protocol, address)
   2645                         proxies[protocol] = address
   2646                 else:
   2647                     # Use one setting for all protocols
   2648                     if proxyServer[:5] == 'http:':
   2649                         proxies['http'] = proxyServer
   2650                     else:
   2651                         proxies['http'] = 'http://%s' % proxyServer
   2652                         proxies['https'] = 'https://%s' % proxyServer
   2653                         proxies['ftp'] = 'ftp://%s' % proxyServer
   2654             internetSettings.Close()
   2655         except (OSError, ValueError, TypeError):
   2656             # Either registry key not found etc, or the value in an
   2657             # unexpected format.
   2658             # proxies already set up to be empty so nothing to do
   2659             pass
   2660         return proxies
   2661 
   2662     def getproxies():
   2663         """Return a dictionary of scheme -> proxy server URL mappings.
   2664 
   2665         Returns settings gathered from the environment, if specified,
   2666         or the registry.
   2667 
   2668         """
   2669         return getproxies_environment() or getproxies_registry()
   2670 
   2671     def proxy_bypass_registry(host):
   2672         try:
   2673             import winreg
   2674         except ImportError:
   2675             # Std modules, so should be around - but you never know!
   2676             return 0
   2677         try:
   2678             internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
   2679                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
   2680             proxyEnable = winreg.QueryValueEx(internetSettings,
   2681                                                'ProxyEnable')[0]
   2682             proxyOverride = str(winreg.QueryValueEx(internetSettings,
   2683                                                      'ProxyOverride')[0])
   2684             # ^^^^ Returned as Unicode but problems if not converted to ASCII
   2685         except OSError:
   2686             return 0
   2687         if not proxyEnable or not proxyOverride:
   2688             return 0
   2689         # try to make a host list from name and IP address.
   2690         rawHost, port = splitport(host)
   2691         host = [rawHost]
   2692         try:
   2693             addr = socket.gethostbyname(rawHost)
   2694             if addr != rawHost:
   2695                 host.append(addr)
   2696         except OSError:
   2697             pass
   2698         try:
   2699             fqdn = socket.getfqdn(rawHost)
   2700             if fqdn != rawHost:
   2701                 host.append(fqdn)
   2702         except OSError:
   2703             pass
   2704         # make a check value list from the registry entry: replace the
   2705         # '<local>' string by the localhost entry and the corresponding
   2706         # canonical entry.
   2707         proxyOverride = proxyOverride.split(';')
   2708         # now check if we match one of the registry values.
   2709         for test in proxyOverride:
   2710             if test == '<local>':
   2711                 if '.' not in rawHost:
   2712                     return 1
   2713             test = test.replace(".", r"\.")     # mask dots
   2714             test = test.replace("*", r".*")     # change glob sequence
   2715             test = test.replace("?", r".")      # change glob char
   2716             for val in host:
   2717                 if re.match(test, val, re.I):
   2718                     return 1
   2719         return 0
   2720 
   2721     def proxy_bypass(host):
   2722         """Return True, if host should be bypassed.
   2723 
   2724         Checks proxy settings gathered from the environment, if specified,
   2725         or the registry.
   2726 
   2727         """
   2728         proxies = getproxies_environment()
   2729         if proxies:
   2730             return proxy_bypass_environment(host, proxies)
   2731         else:
   2732             return proxy_bypass_registry(host)
   2733 
   2734 else:
   2735     # By default use environment variables
   2736     getproxies = getproxies_environment
   2737     proxy_bypass = proxy_bypass_environment
   2738