Home | History | Annotate | Download | only in urllib
      1 """An extensible library for opening URLs using a variety of protocols
      2 
      3 The simplest way to use this module is to call the urlopen function,
      4 which accepts a string containing a URL or a Request object (described
      5 below).  It opens the URL and returns the results as file-like
      6 object; the returned object has some extra methods described below.
      7 
      8 The OpenerDirector manages a collection of Handler objects that do
      9 all the actual work.  Each Handler implements a particular protocol or
     10 option.  The OpenerDirector is a composite object that invokes the
     11 Handlers needed to open the requested URL.  For example, the
     12 HTTPHandler performs HTTP GET and POST requests and deals with
     13 non-error returns.  The HTTPRedirectHandler automatically deals with
     14 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
     15 deals with digest authentication.
     16 
     17 urlopen(url, data=None) -- Basic usage is the same as original
     18 urllib.  pass the url and optionally data to post to an HTTP URL, and
     19 get a file-like object back.  One difference is that you can also pass
     20 a Request instance instead of URL.  Raises a URLError (subclass of
     21 OSError); for HTTP errors, raises an HTTPError, which can also be
     22 treated as a valid response.
     23 
     24 build_opener -- Function that creates a new OpenerDirector instance.
     25 Will install the default handlers.  Accepts one or more Handlers as
     26 arguments, either instances or Handler classes that it will
     27 instantiate.  If one of the argument is a subclass of the default
     28 handler, the argument will be installed instead of the default.
     29 
     30 install_opener -- Installs a new opener as the default opener.
     31 
     32 objects of interest:
     33 
     34 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
     35 the Handler classes, while dealing with requests and responses.
     36 
     37 Request -- An object that encapsulates the state of a request.  The
     38 state can be as simple as the URL.  It can also include extra HTTP
     39 headers, e.g. a User-Agent.
     40 
     41 BaseHandler --
     42 
     43 internals:
     44 BaseHandler and parent
     45 _call_chain conventions
     46 
     47 Example usage:
     48 
     49 import urllib.request
     50 
     51 # set up authentication info
     52 authinfo = urllib.request.HTTPBasicAuthHandler()
     53 authinfo.add_password(realm='PDQ Application',
     54                       uri='https://mahler:8092/site-updates.py',
     55                       user='klem',
     56                       passwd='geheim$parole')
     57 
     58 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
     59 
     60 # build a new opener that adds authentication and caching FTP handlers
     61 opener = urllib.request.build_opener(proxy_support, authinfo,
     62                                      urllib.request.CacheFTPHandler)
     63 
     64 # install it
     65 urllib.request.install_opener(opener)
     66 
     67 f = urllib.request.urlopen('http://www.python.org/')
     68 """
     69 
     70 # XXX issues:
     71 # If an authentication error handler that tries to perform
     72 # authentication for some reason but fails, how should the error be
     73 # signalled?  The client needs to know the HTTP error code.  But if
     74 # the handler knows that the problem was, e.g., that it didn't know
     75 # that hash algo that requested in the challenge, it would be good to
     76 # pass that information along to the client, too.
     77 # ftp errors aren't handled cleanly
     78 # check digest against correct (i.e. non-apache) implementation
     79 
     80 # Possible extensions:
     81 # complex proxies  XXX not sure what exactly was meant by this
     82 # abstract factory for opener
     83 
     84 import base64
     85 import bisect
     86 import email
     87 import hashlib
     88 import http.client
     89 import io
     90 import os
     91 import posixpath
     92 import re
     93 import socket
     94 import string
     95 import sys
     96 import time
     97 import collections
     98 import tempfile
     99 import contextlib
    100 import warnings
    101 
    102 
    103 from urllib.error import URLError, HTTPError, ContentTooShortError
    104 from urllib.parse import (
    105     urlparse, urlsplit, urljoin, unwrap, quote, unquote,
    106     splittype, splithost, splitport, splituser, splitpasswd,
    107     splitattr, splitquery, splitvalue, splittag, to_bytes,
    108     unquote_to_bytes, urlunparse)
    109 from urllib.response import addinfourl, addclosehook
    110 
    111 # check for SSL
    112 try:
    113     import ssl
    114 except ImportError:
    115     _have_ssl = False
    116 else:
    117     _have_ssl = True
    118 
    119 __all__ = [
    120     # Classes
    121     'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
    122     'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
    123     'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
    124     'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
    125     'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
    126     'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
    127     'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
    128     'UnknownHandler', 'HTTPErrorProcessor',
    129     # Functions
    130     'urlopen', 'install_opener', 'build_opener',
    131     'pathname2url', 'url2pathname', 'getproxies',
    132     # Legacy interface
    133     'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
    134 ]
    135 
    136 # used in User-Agent header sent
    137 __version__ = '%d.%d' % sys.version_info[:2]
    138 
    139 _opener = None
    140 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
    141             *, cafile=None, capath=None, cadefault=False, context=None):
    142     '''Open the URL url, which can be either a string or a Request object.
    143 
    144     *data* must be an object specifying additional data to be sent to
    145     the server, or None if no such data is needed.  See Request for
    146     details.
    147 
    148     urllib.request module uses HTTP/1.1 and includes a "Connection:close"
    149     header in its HTTP requests.
    150 
    151     The optional *timeout* parameter specifies a timeout in seconds for
    152     blocking operations like the connection attempt (if not specified, the
    153     global default timeout setting will be used). This only works for HTTP,
    154     HTTPS and FTP connections.
    155 
    156     If *context* is specified, it must be a ssl.SSLContext instance describing
    157     the various SSL options. See HTTPSConnection for more details.
    158 
    159     The optional *cafile* and *capath* parameters specify a set of trusted CA
    160     certificates for HTTPS requests. cafile should point to a single file
    161     containing a bundle of CA certificates, whereas capath should point to a
    162     directory of hashed certificate files. More information can be found in
    163     ssl.SSLContext.load_verify_locations().
    164 
    165     The *cadefault* parameter is ignored.
    166 
    167     This function always returns an object which can work as a context
    168     manager and has methods such as
    169 
    170     * geturl() - return the URL of the resource retrieved, commonly used to
    171       determine if a redirect was followed
    172 
    173     * info() - return the meta-information of the page, such as headers, in the
    174       form of an email.message_from_string() instance (see Quick Reference to
    175       HTTP Headers)
    176 
    177     * getcode() - return the HTTP status code of the response.  Raises URLError
    178       on errors.
    179 
    180     For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
    181     object slightly modified. In addition to the three new methods above, the
    182     msg attribute contains the same information as the reason attribute ---
    183     the reason phrase returned by the server --- instead of the response
    184     headers as it is specified in the documentation for HTTPResponse.
    185 
    186     For FTP, file, and data URLs and requests explicitly handled by legacy
    187     URLopener and FancyURLopener classes, this function returns a
    188     urllib.response.addinfourl object.
    189 
    190     Note that None may be returned if no handler handles the request (though
    191     the default installed global OpenerDirector uses UnknownHandler to ensure
    192     this never happens).
    193 
    194     In addition, if proxy settings are detected (for example, when a *_proxy
    195     environment variable like http_proxy is set), ProxyHandler is default
    196     installed and makes sure the requests are handled through the proxy.
    197 
    198     '''
    199     global _opener
    200     if cafile or capath or cadefault:
    201         import warnings
    202         warnings.warn("cafile, cpath and cadefault are deprecated, use a "
    203                       "custom context instead.", DeprecationWarning, 2)
    204         if context is not None:
    205             raise ValueError(
    206                 "You can't pass both context and any of cafile, capath, and "
    207                 "cadefault"
    208             )
    209         if not _have_ssl:
    210             raise ValueError('SSL support not available')
    211         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
    212                                              cafile=cafile,
    213                                              capath=capath)
    214         https_handler = HTTPSHandler(context=context)
    215         opener = build_opener(https_handler)
    216     elif context:
    217         https_handler = HTTPSHandler(context=context)
    218         opener = build_opener(https_handler)
    219     elif _opener is None:
    220         _opener = opener = build_opener()
    221     else:
    222         opener = _opener
    223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):
    226     global _opener
    227     _opener = opener
    228 
    229 _url_tempfiles = []
    230 def urlretrieve(url, filename=None, reporthook=None, data=None):
    231     """
    232     Retrieve a URL into a temporary location on disk.
    233 
    234     Requires a URL argument. If a filename is passed, it is used as
    235     the temporary file location. The reporthook argument should be
    236     a callable that accepts a block number, a read size, and the
    237     total file size of the URL target. The data argument should be
    238     valid URL encoded data.
    239 
    240     If a filename is passed and the URL points to a local resource,
    241     the result is a copy from local file to new file.
    242 
    243     Returns a tuple containing the path to the newly created
    244     data file as well as the resulting HTTPMessage object.
    245     """
    246     url_type, path = splittype(url)
    247 
    248     with contextlib.closing(urlopen(url, data)) as fp:
    249         headers = fp.info()
    250 
    251         # Just return the local path and the "headers" for file://
    252         # URLs. No sense in performing a copy unless requested.
    253         if url_type == "file" and not filename:
    254             return os.path.normpath(path), headers
    255 
    256         # Handle temporary file setup.
    257         if filename:
    258             tfp = open(filename, 'wb')
    259         else:
    260             tfp = tempfile.NamedTemporaryFile(delete=False)
    261             filename = tfp.name
    262             _url_tempfiles.append(filename)
    263 
    264         with tfp:
    265             result = filename, headers
    266             bs = 1024*8
    267             size = -1
    268             read = 0
    269             blocknum = 0
    270             if "content-length" in headers:
    271                 size = int(headers["Content-Length"])
    272 
    273             if reporthook:
    274                 reporthook(blocknum, bs, size)
    275 
    276             while True:
    277                 block = fp.read(bs)
    278                 if not block:
    279                     break
    280                 read += len(block)
    281                 tfp.write(block)
    282                 blocknum += 1
    283                 if reporthook:
    284                     reporthook(blocknum, bs, size)
    285 
    286     if size >= 0 and read < size:
    287         raise ContentTooShortError(
    288             "retrieval incomplete: got only %i out of %i bytes"
    289             % (read, size), result)
    290 
    291     return result
    292 
    293 def urlcleanup():
    294     """Clean up temporary files from urlretrieve calls."""
    295     for temp_file in _url_tempfiles:
    296         try:
    297             os.unlink(temp_file)
    298         except OSError:
    299             pass
    300 
    301     del _url_tempfiles[:]
    302     global _opener
    303     if _opener:
    304         _opener = None
    305 
    306 # copied from cookielib.py
    307 _cut_port_re = re.compile(r":\d+$", re.ASCII)
    308 def request_host(request):
    309     """Return request-host, as defined by RFC 2965.
    310 
    311     Variation from RFC: returned value is lowercased, for convenient
    312     comparison.
    313 
    314     """
    315     url = request.full_url
    316     host = urlparse(url)[1]
    317     if host == "":
    318         host = request.get_header("Host", "")
    319 
    320     # remove port, if present
    321     host = _cut_port_re.sub("", host, 1)
    322     return host.lower()
    323 
    324 class Request:
    325 
    326     def __init__(self, url, data=None, headers={},
    327                  origin_req_host=None, unverifiable=False,
    328                  method=None):
    329         self.full_url = url
    330         self.headers = {}
    331         self.unredirected_hdrs = {}
    332         self._data = None
    333         self.data = data
    334         self._tunnel_host = None
    335         for key, value in headers.items():
    336             self.add_header(key, value)
    337         if origin_req_host is None:
    338             origin_req_host = request_host(self)
    339         self.origin_req_host = origin_req_host
    340         self.unverifiable = unverifiable
    341         if method:
    342             self.method = method
    343 
    344     @property
    345     def full_url(self):
    346         if self.fragment:
    347             return '{}#{}'.format(self._full_url, self.fragment)
    348         return self._full_url
    349 
    350     @full_url.setter
    351     def full_url(self, url):
    352         # unwrap('<URL:type://host/path>') --> 'type://host/path'
    353         self._full_url = unwrap(url)
    354         self._full_url, self.fragment = splittag(self._full_url)
    355         self._parse()
    356 
    357     @full_url.deleter
    358     def full_url(self):
    359         self._full_url = None
    360         self.fragment = None
    361         self.selector = ''
    362 
    363     @property
    364     def data(self):
    365         return self._data
    366 
    367     @data.setter
    368     def data(self, data):
    369         if data != self._data:
    370             self._data = data
    371             # issue 16464
    372             # if we change data we need to remove content-length header
    373             # (cause it's most probably calculated for previous value)
    374             if self.has_header("Content-length"):
    375                 self.remove_header("Content-length")
    376 
    377     @data.deleter
    378     def data(self):
    379         self.data = None
    380 
    381     def _parse(self):
    382         self.type, rest = splittype(self._full_url)
    383         if self.type is None:
    384             raise ValueError("unknown url type: %r" % self.full_url)
    385         self.host, self.selector = splithost(rest)
    386         if self.host:
    387             self.host = unquote(self.host)
    388 
    389     def get_method(self):
    390         """Return a string indicating the HTTP request method."""
    391         default_method = "POST" if self.data is not None else "GET"
    392         return getattr(self, 'method', default_method)
    393 
    394     def get_full_url(self):
    395         return self.full_url
    396 
    397     def set_proxy(self, host, type):
    398         if self.type == 'https' and not self._tunnel_host:
    399             self._tunnel_host = self.host
    400         else:
    401             self.type= type
    402             self.selector = self.full_url
    403         self.host = host
    404 
    405     def has_proxy(self):
    406         return self.selector == self.full_url
    407 
    408     def add_header(self, key, val):
    409         # useful for something like authentication
    410         self.headers[key.capitalize()] = val
    411 
    412     def add_unredirected_header(self, key, val):
    413         # will not be added to a redirected request
    414         self.unredirected_hdrs[key.capitalize()] = val
    415 
    416     def has_header(self, header_name):
    417         return (header_name in self.headers or
    418                 header_name in self.unredirected_hdrs)
    419 
    420     def get_header(self, header_name, default=None):
    421         return self.headers.get(
    422             header_name,
    423             self.unredirected_hdrs.get(header_name, default))
    424 
    425     def remove_header(self, header_name):
    426         self.headers.pop(header_name, None)
    427         self.unredirected_hdrs.pop(header_name, None)
    428 
    429     def header_items(self):
    430         hdrs = self.unredirected_hdrs.copy()
    431         hdrs.update(self.headers)
    432         return list(hdrs.items())
    433 
    434 class OpenerDirector:
    435     def __init__(self):
    436         client_version = "Python-urllib/%s" % __version__
    437         self.addheaders = [('User-agent', client_version)]
    438         # self.handlers is retained only for backward compatibility
    439         self.handlers = []
    440         # manage the individual handlers
    441         self.handle_open = {}
    442         self.handle_error = {}
    443         self.process_response = {}
    444         self.process_request = {}
    445 
    446     def add_handler(self, handler):
    447         if not hasattr(handler, "add_parent"):
    448             raise TypeError("expected BaseHandler instance, got %r" %
    449                             type(handler))
    450 
    451         added = False
    452         for meth in dir(handler):
    453             if meth in ["redirect_request", "do_open", "proxy_open"]:
    454                 # oops, coincidental match
    455                 continue
    456 
    457             i = meth.find("_")
    458             protocol = meth[:i]
    459             condition = meth[i+1:]
    460 
    461             if condition.startswith("error"):
    462                 j = condition.find("_") + i + 1
    463                 kind = meth[j+1:]
    464                 try:
    465                     kind = int(kind)
    466                 except ValueError:
    467                     pass
    468                 lookup = self.handle_error.get(protocol, {})
    469                 self.handle_error[protocol] = lookup
    470             elif condition == "open":
    471                 kind = protocol
    472                 lookup = self.handle_open
    473             elif condition == "response":
    474                 kind = protocol
    475                 lookup = self.process_response
    476             elif condition == "request":
    477                 kind = protocol
    478                 lookup = self.process_request
    479             else:
    480                 continue
    481 
    482             handlers = lookup.setdefault(kind, [])
    483             if handlers:
    484                 bisect.insort(handlers, handler)
    485             else:
    486                 handlers.append(handler)
    487             added = True
    488 
    489         if added:
    490             bisect.insort(self.handlers, handler)
    491             handler.add_parent(self)
    492 
    493     def close(self):
    494         # Only exists for backwards compatibility.
    495         pass
    496 
    497     def _call_chain(self, chain, kind, meth_name, *args):
    498         # Handlers raise an exception if no one else should try to handle
    499         # the request, or return None if they can't but another handler
    500         # could.  Otherwise, they return the response.
    501         handlers = chain.get(kind, ())
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
    504             result = func(*args)
    505             if result is not None:
    506                 return result
    507 
    508     def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
    509         # accept a URL or a Request object
    510         if isinstance(fullurl, str):
    511             req = Request(fullurl, data)
    512         else:
    513             req = fullurl
    514             if data is not None:
    515                 req.data = data
    516 
    517         req.timeout = timeout
    518         protocol = req.type
    519 
    520         # pre-process request
    521         meth_name = protocol+"_request"
    522         for processor in self.process_request.get(protocol, []):
    523             meth = getattr(processor, meth_name)
    524             req = meth(req)
    525 
    526         response = self._open(req, data)
    527 
    528         # post-process response
    529         meth_name = protocol+"_response"
    530         for processor in self.process_response.get(protocol, []):
    531             meth = getattr(processor, meth_name)
    532             response = meth(req, response)
    533 
    534         return response
    535 
    536     def _open(self, req, data=None):
    537         result = self._call_chain(self.handle_open, 'default',
    538                                   'default_open', req)
    539         if result:
    540             return result
    541 
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
    544                                   '_open', req)
    545         if result:
    546             return result
    547 
    548         return self._call_chain(self.handle_open, 'unknown',
    549                                 'unknown_open', req)
    550 
    551     def error(self, proto, *args):
    552         if proto in ('http', 'https'):
    553             # XXX http[s] protocols are special-cased
    554             dict = self.handle_error['http'] # https is not different than http
    555             proto = args[2]  # YUCK!
    556             meth_name = 'http_error_%s' % proto
    557             http_err = 1
    558             orig_args = args
    559         else:
    560             dict = self.handle_error
    561             meth_name = proto + '_error'
    562             http_err = 0
    563         args = (dict, proto, meth_name) + args
    564         result = self._call_chain(*args)
    565         if result:
    566             return result
    567 
    568         if http_err:
    569             args = (dict, 'default', 'http_error_default') + orig_args
    570             return self._call_chain(*args)
    571 
    572 # XXX probably also want an abstract factory that knows when it makes
    573 # sense to skip a superclass in favor of a subclass and when it might
    574 # make sense to include both
    575 
    576 def build_opener(*handlers):
    577     """Create an opener object from a list of handlers.
    578 
    579     The opener will use several default handlers, including support
    580     for HTTP, FTP and when applicable HTTPS.
    581 
    582     If any of the handlers passed as arguments are subclasses of the
    583     default handlers, the default handlers will not be used.
    584     """
    585     opener = OpenerDirector()
    586     default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
    587                        HTTPDefaultErrorHandler, HTTPRedirectHandler,
    588                        FTPHandler, FileHandler, HTTPErrorProcessor,
    589                        DataHandler]
    590     if hasattr(http.client, "HTTPSConnection"):
    591         default_classes.append(HTTPSHandler)
    592     skip = set()
    593     for klass in default_classes:
    594         for check in handlers:
    595             if isinstance(check, type):
    596                 if issubclass(check, klass):
    597                     skip.add(klass)
    598             elif isinstance(check, klass):
    599                 skip.add(klass)
    600     for klass in skip:
    601         default_classes.remove(klass)
    602 
    603     for klass in default_classes:
    604         opener.add_handler(klass())
    605 
    606     for h in handlers:
    607         if isinstance(h, type):
    608             h = h()
    609         opener.add_handler(h)
    610     return opener
    611 
    612 class BaseHandler:
    613     handler_order = 500
    614 
    615     def add_parent(self, parent):
    616         self.parent = parent
    617 
    618     def close(self):
    619         # Only exists for backwards compatibility
    620         pass
    621 
    622     def __lt__(self, other):
    623         if not hasattr(other, "handler_order"):
    624             # Try to preserve the old behavior of having custom classes
    625             # inserted after default ones (works only for custom user
    626             # classes which are not aware of handler_order).
    627             return True
    628         return self.handler_order < other.handler_order
    629 
    630 
    631 class HTTPErrorProcessor(BaseHandler):
    632     """Process HTTP error responses."""
    633     handler_order = 1000  # after all other processing
    634 
    635     def http_response(self, request, response):
    636         code, msg, hdrs = response.code, response.msg, response.info()
    637 
    638         # According to RFC 2616, "2xx" code indicates that the client's
    639         # request was successfully received, understood, and accepted.
    640         if not (200 <= code < 300):
    641             response = self.parent.error(
    642                 'http', request, response, code, msg, hdrs)
    643 
    644         return response
    645 
    646     https_response = http_response
    647 
    648 class HTTPDefaultErrorHandler(BaseHandler):
    649     def http_error_default(self, req, fp, code, msg, hdrs):
    650         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    651 
    652 class HTTPRedirectHandler(BaseHandler):
    653     # maximum number of redirections to any single URL
    654     # this is needed because of the state that cookies introduce
    655     max_repeats = 4
    656     # maximum total number of redirections (regardless of URL) before
    657     # assuming we're in a loop
    658     max_redirections = 10
    659 
    660     def redirect_request(self, req, fp, code, msg, headers, newurl):
    661         """Return a Request or None in response to a redirect.
    662 
    663         This is called by the http_error_30x methods when a
    664         redirection response is received.  If a redirection should
    665         take place, return a new Request to allow http_error_30x to
    666         perform the redirect.  Otherwise, raise HTTPError if no-one
    667         else should try to handle this url.  Return None if you can't
    668         but another Handler might.
    669         """
    670         m = req.get_method()
    671         if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
    672             or code in (301, 302, 303) and m == "POST")):
    673             raise HTTPError(req.full_url, code, msg, headers, fp)
    674 
    675         # Strictly (according to RFC 2616), 301 or 302 in response to
    676         # a POST MUST NOT cause a redirection without confirmation
    677         # from the user (of urllib.request, in this case).  In practice,
    678         # essentially all clients do redirect in this case, so we do
    679         # the same.
    680 
    681         # Be conciliant with URIs containing a space.  This is mainly
    682         # redundant with the more complete encoding done in http_error_302(),
    683         # but it is kept for compatibility with other callers.
    684         newurl = newurl.replace(' ', '%20')
    685 
    686         CONTENT_HEADERS = ("content-length", "content-type")
    687         newheaders = dict((k, v) for k, v in req.headers.items()
    688                           if k.lower() not in CONTENT_HEADERS)
    689         return Request(newurl,
    690                        headers=newheaders,
    691                        origin_req_host=req.origin_req_host,
    692                        unverifiable=True)
    693 
    694     # Implementation note: To avoid the server sending us into an
    695     # infinite loop, the request object needs to track what URLs we
    696     # have already seen.  Do this by adding a handler-specific
    697     # attribute to the Request object.
    698     def http_error_302(self, req, fp, code, msg, headers):
    699         # Some servers (incorrectly) return multiple Location headers
    700         # (so probably same goes for URI).  Use first header.
    701         if "location" in headers:
    702             newurl = headers["location"]
    703         elif "uri" in headers:
    704             newurl = headers["uri"]
    705         else:
    706             return
    707 
    708         # fix a possible malformed URL
    709         urlparts = urlparse(newurl)
    710 
    711         # For security reasons we don't allow redirection to anything other
    712         # than http, https or ftp.
    713 
    714         if urlparts.scheme not in ('http', 'https', 'ftp', ''):
    715             raise HTTPError(
    716                 newurl, code,
    717                 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
    718                 headers, fp)
    719 
    720         if not urlparts.path and urlparts.netloc:
    721             urlparts = list(urlparts)
    722             urlparts[2] = "/"
    723         newurl = urlunparse(urlparts)
    724 
    725         # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
    726         # original bytes and percent-encode non-ASCII bytes, and any special
    727         # characters such as the space.
    728         newurl = quote(
    729             newurl, encoding="iso-8859-1", safe=string.punctuation)
    730         newurl = urljoin(req.full_url, newurl)
    731 
    732         # XXX Probably want to forget about the state of the current
    733         # request, although that might interact poorly with other
    734         # handlers that also use handler-specific request attributes
    735         new = self.redirect_request(req, fp, code, msg, headers, newurl)
    736         if new is None:
    737             return
    738 
    739         # loop detection
    740         # .redirect_dict has a key url if url was previously visited.
    741         if hasattr(req, 'redirect_dict'):
    742             visited = new.redirect_dict = req.redirect_dict
    743             if (visited.get(newurl, 0) >= self.max_repeats or
    744                 len(visited) >= self.max_redirections):
    745                 raise HTTPError(req.full_url, code,
    746                                 self.inf_msg + msg, headers, fp)
    747         else:
    748             visited = new.redirect_dict = req.redirect_dict = {}
    749         visited[newurl] = visited.get(newurl, 0) + 1
    750 
    751         # Don't close the fp until we are sure that we won't use it
    752         # with HTTPError.
    753         fp.read()
    754         fp.close()
    755 
    756         return self.parent.open(new, timeout=req.timeout)
    757 
    758     http_error_301 = http_error_303 = http_error_307 = http_error_302
    759 
    760     inf_msg = "The HTTP server returned a redirect error that would " \
    761               "lead to an infinite loop.\n" \
    762               "The last 30x error message was:\n"
    763 
    764 
    765 def _parse_proxy(proxy):
    766     """Return (scheme, user, password, host/port) given a URL or an authority.
    767 
    768     If a URL is supplied, it must have an authority (host:port) component.
    769     According to RFC 3986, having an authority component means the URL must
    770     have two slashes after the scheme.
    771     """
    772     scheme, r_scheme = splittype(proxy)
    773     if not r_scheme.startswith("/"):
    774         # authority
    775         scheme = None
    776         authority = proxy
    777     else:
    778         # URL
    779         if not r_scheme.startswith("//"):
    780             raise ValueError("proxy URL with no authority: %r" % proxy)
    781         # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
    782         # and 3.3.), path is empty or starts with '/'
    783         end = r_scheme.find("/", 2)
    784         if end == -1:
    785             end = None
    786         authority = r_scheme[2:end]
    787     userinfo, hostport = splituser(authority)
    788     if userinfo is not None:
    789         user, password = splitpasswd(userinfo)
    790     else:
    791         user = password = None
    792     return scheme, user, password, hostport
    793 
    794 class ProxyHandler(BaseHandler):
    795     # Proxies must be in front
    796     handler_order = 100
    797 
    798     def __init__(self, proxies=None):
    799         if proxies is None:
    800             proxies = getproxies()
    801         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
    802         self.proxies = proxies
    803         for type, url in proxies.items():
    804             setattr(self, '%s_open' % type,
    805                     lambda r, proxy=url, type=type, meth=self.proxy_open:
    806                         meth(r, proxy, type))
    807 
    808     def proxy_open(self, req, proxy, type):
    809         orig_type = req.type
    810         proxy_type, user, password, hostport = _parse_proxy(proxy)
    811         if proxy_type is None:
    812             proxy_type = orig_type
    813 
    814         if req.host and proxy_bypass(req.host):
    815             return None
    816 
    817         if user and password:
    818             user_pass = '%s:%s' % (unquote(user),
    819                                    unquote(password))
    820             creds = base64.b64encode(user_pass.encode()).decode("ascii")
    821             req.add_header('Proxy-authorization', 'Basic ' + creds)
    822         hostport = unquote(hostport)
    823         req.set_proxy(hostport, proxy_type)
    824         if orig_type == proxy_type or orig_type == 'https':
    825             # let other handlers take care of it
    826             return None
    827         else:
    828             # need to start over, because the other handlers don't
    829             # grok the proxy's URL type
    830             # e.g. if we have a constructor arg proxies like so:
    831             # {'http': 'ftp://proxy.example.com'}, we may end up turning
    832             # a request for http://acme.example.com/a into one for
    833             # ftp://proxy.example.com/a
    834             return self.parent.open(req, timeout=req.timeout)
    835 
    836 class HTTPPasswordMgr:
    837 
    838     def __init__(self):
    839         self.passwd = {}
    840 
    841     def add_password(self, realm, uri, user, passwd):
    842         # uri could be a single URI or a sequence
    843         if isinstance(uri, str):
    844             uri = [uri]
    845         if realm not in self.passwd:
    846             self.passwd[realm] = {}
    847         for default_port in True, False:
    848             reduced_uri = tuple(
    849                 [self.reduce_uri(u, default_port) for u in uri])
    850             self.passwd[realm][reduced_uri] = (user, passwd)
    851 
    852     def find_user_password(self, realm, authuri):
    853         domains = self.passwd.get(realm, {})
    854         for default_port in True, False:
    855             reduced_authuri = self.reduce_uri(authuri, default_port)
    856             for uris, authinfo in domains.items():
    857                 for uri in uris:
    858                     if self.is_suburi(uri, reduced_authuri):
    859                         return authinfo
    860         return None, None
    861 
    862     def reduce_uri(self, uri, default_port=True):
    863         """Accept authority or URI and extract only the authority and path."""
    864         # note HTTP URLs do not have a userinfo component
    865         parts = urlsplit(uri)
    866         if parts[1]:
    867             # URI
    868             scheme = parts[0]
    869             authority = parts[1]
    870             path = parts[2] or '/'
    871         else:
    872             # host or host:port
    873             scheme = None
    874             authority = uri
    875             path = '/'
    876         host, port = splitport(authority)
    877         if default_port and port is None and scheme is not None:
    878             dport = {"http": 80,
    879                      "https": 443,
    880                      }.get(scheme)
    881             if dport is not None:
    882                 authority = "%s:%d" % (host, dport)
    883         return authority, path
    884 
    885     def is_suburi(self, base, test):
    886         """Check if test is below base in a URI tree
    887 
    888         Both args must be URIs in reduced form.
    889         """
    890         if base == test:
    891             return True
    892         if base[0] != test[0]:
    893             return False
    894         common = posixpath.commonprefix((base[1], test[1]))
    895         if len(common) == len(base[1]):
    896             return True
    897         return False
    898 
    899 
    900 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
    901 
    902     def find_user_password(self, realm, authuri):
    903         user, password = HTTPPasswordMgr.find_user_password(self, realm,
    904                                                             authuri)
    905         if user is not None:
    906             return user, password
    907         return HTTPPasswordMgr.find_user_password(self, None, authuri)
    908 
    909 
    910 class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
    911 
    912     def __init__(self, *args, **kwargs):
    913         self.authenticated = {}
    914         super().__init__(*args, **kwargs)
    915 
    916     def add_password(self, realm, uri, user, passwd, is_authenticated=False):
    917         self.update_authenticated(uri, is_authenticated)
    918         # Add a default for prior auth requests
    919         if realm is not None:
    920             super().add_password(None, uri, user, passwd)
    921         super().add_password(realm, uri, user, passwd)
    922 
    923     def update_authenticated(self, uri, is_authenticated=False):
    924         # uri could be a single URI or a sequence
    925         if isinstance(uri, str):
    926             uri = [uri]
    927 
    928         for default_port in True, False:
    929             for u in uri:
    930                 reduced_uri = self.reduce_uri(u, default_port)
    931                 self.authenticated[reduced_uri] = is_authenticated
    932 
    933     def is_authenticated(self, authuri):
    934         for default_port in True, False:
    935             reduced_authuri = self.reduce_uri(authuri, default_port)
    936             for uri in self.authenticated:
    937                 if self.is_suburi(uri, reduced_authuri):
    938                     return self.authenticated[uri]
    939 
    940 
    941 class AbstractBasicAuthHandler:
    942 
    943     # XXX this allows for multiple auth-schemes, but will stupidly pick
    944     # the last one with a realm specified.
    945 
    946     # allow for double- and single-quoted realm values
    947     # (single quotes are a violation of the RFC, but appear in the wild)
    948     rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
    949                     'realm=(["\']?)([^"\']*)\\2', re.I)
    950 
    951     # XXX could pre-emptively send auth info already accepted (RFC 2617,
    952     # end of section 2, and section 1.2 immediately after "credentials"
    953     # production).
    954 
    955     def __init__(self, password_mgr=None):
    956         if password_mgr is None:
    957             password_mgr = HTTPPasswordMgr()
    958         self.passwd = password_mgr
    959         self.add_password = self.passwd.add_password
    960 
    961     def http_error_auth_reqed(self, authreq, host, req, headers):
    962         # host may be an authority (without userinfo) or a URL with an
    963         # authority
    964         # XXX could be multiple headers
    965         authreq = headers.get(authreq, None)
    966 
    967         if authreq:
    968             scheme = authreq.split()[0]
    969             if scheme.lower() != 'basic':
    970                 raise ValueError("AbstractBasicAuthHandler does not"
    971                                  " support the following scheme: '%s'" %
    972                                  scheme)
    973             else:
    974                 mo = AbstractBasicAuthHandler.rx.search(authreq)
    975                 if mo:
    976                     scheme, quote, realm = mo.groups()
    977                     if quote not in ['"',"'"]:
    978                         warnings.warn("Basic Auth Realm was unquoted",
    979                                       UserWarning, 2)
    980                     if scheme.lower() == 'basic':
    981                         return self.retry_http_basic_auth(host, req, realm)
    982 
    983     def retry_http_basic_auth(self, host, req, realm):
    984         user, pw = self.passwd.find_user_password(realm, host)
    985         if pw is not None:
    986             raw = "%s:%s" % (user, pw)
    987             auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
    988             if req.get_header(self.auth_header, None) == auth:
    989                 return None
    990             req.add_unredirected_header(self.auth_header, auth)
    991             return self.parent.open(req, timeout=req.timeout)
    992         else:
    993             return None
    994 
    995     def http_request(self, req):
    996         if (not hasattr(self.passwd, 'is_authenticated') or
    997            not self.passwd.is_authenticated(req.full_url)):
    998             return req
    999 
   1000         if not req.has_header('Authorization'):
   1001             user, passwd = self.passwd.find_user_password(None, req.full_url)
   1002             credentials = '{0}:{1}'.format(user, passwd).encode()
   1003             auth_str = base64.standard_b64encode(credentials).decode()
   1004             req.add_unredirected_header('Authorization',
   1005                                         'Basic {}'.format(auth_str.strip()))
   1006         return req
   1007 
   1008     def http_response(self, req, response):
   1009         if hasattr(self.passwd, 'is_authenticated'):
   1010             if 200 <= response.code < 300:
   1011                 self.passwd.update_authenticated(req.full_url, True)
   1012             else:
   1013                 self.passwd.update_authenticated(req.full_url, False)
   1014         return response
   1015 
   1016     https_request = http_request
   1017     https_response = http_response
   1018 
   1019 
   1020 
   1021 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
   1022 
   1023     auth_header = 'Authorization'
   1024 
   1025     def http_error_401(self, req, fp, code, msg, headers):
   1026         url = req.full_url
   1027         response = self.http_error_auth_reqed('www-authenticate',
   1028                                           url, req, headers)
   1029         return response
   1030 
   1031 
   1032 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
   1033 
   1034     auth_header = 'Proxy-authorization'
   1035 
   1036     def http_error_407(self, req, fp, code, msg, headers):
   1037         # http_error_auth_reqed requires that there is no userinfo component in
   1038         # authority.  Assume there isn't one, since urllib.request does not (and
   1039         # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
   1040         # userinfo.
   1041         authority = req.host
   1042         response = self.http_error_auth_reqed('proxy-authenticate',
   1043                                           authority, req, headers)
   1044         return response
   1045 
   1046 
   1047 # Return n random bytes.
   1048 _randombytes = os.urandom
   1049 
   1050 
   1051 class AbstractDigestAuthHandler:
   1052     # Digest authentication is specified in RFC 2617.
   1053 
   1054     # XXX The client does not inspect the Authentication-Info header
   1055     # in a successful response.
   1056 
   1057     # XXX It should be possible to test this implementation against
   1058     # a mock server that just generates a static set of challenges.
   1059 
   1060     # XXX qop="auth-int" supports is shaky
   1061 
   1062     def __init__(self, passwd=None):
   1063         if passwd is None:
   1064             passwd = HTTPPasswordMgr()
   1065         self.passwd = passwd
   1066         self.add_password = self.passwd.add_password
   1067         self.retried = 0
   1068         self.nonce_count = 0
   1069         self.last_nonce = None
   1070 
   1071     def reset_retry_count(self):
   1072         self.retried = 0
   1073 
   1074     def http_error_auth_reqed(self, auth_header, host, req, headers):
   1075         authreq = headers.get(auth_header, None)
   1076         if self.retried > 5:
   1077             # Don't fail endlessly - if we failed once, we'll probably
   1078             # fail a second time. Hm. Unless the Password Manager is
   1079             # prompting for the information. Crap. This isn't great
   1080             # but it's better than the current 'repeat until recursion
   1081             # depth exceeded' approach <wink>
   1082             raise HTTPError(req.full_url, 401, "digest auth failed",
   1083                             headers, None)
   1084         else:
   1085             self.retried += 1
   1086         if authreq:
   1087             scheme = authreq.split()[0]
   1088             if scheme.lower() == 'digest':
   1089                 return self.retry_http_digest_auth(req, authreq)
   1090             elif scheme.lower() != 'basic':
   1091                 raise ValueError("AbstractDigestAuthHandler does not support"
   1092                                  " the following scheme: '%s'" % scheme)
   1093 
   1094     def retry_http_digest_auth(self, req, auth):
   1095         token, challenge = auth.split(' ', 1)
   1096         chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
   1097         auth = self.get_authorization(req, chal)
   1098         if auth:
   1099             auth_val = 'Digest %s' % auth
   1100             if req.headers.get(self.auth_header, None) == auth_val:
   1101                 return None
   1102             req.add_unredirected_header(self.auth_header, auth_val)
   1103             resp = self.parent.open(req, timeout=req.timeout)
   1104             return resp
   1105 
   1106     def get_cnonce(self, nonce):
   1107         # The cnonce-value is an opaque
   1108         # quoted string value provided by the client and used by both client
   1109         # and server to avoid chosen plaintext attacks, to provide mutual
   1110         # authentication, and to provide some message integrity protection.
   1111         # This isn't a fabulous effort, but it's probably Good Enough.
   1112         s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
   1113         b = s.encode("ascii") + _randombytes(8)
   1114         dig = hashlib.sha1(b).hexdigest()
   1115         return dig[:16]
   1116 
   1117     def get_authorization(self, req, chal):
   1118         try:
   1119             realm = chal['realm']
   1120             nonce = chal['nonce']
   1121             qop = chal.get('qop')
   1122             algorithm = chal.get('algorithm', 'MD5')
   1123             # mod_digest doesn't send an opaque, even though it isn't
   1124             # supposed to be optional
   1125             opaque = chal.get('opaque', None)
   1126         except KeyError:
   1127             return None
   1128 
   1129         H, KD = self.get_algorithm_impls(algorithm)
   1130         if H is None:
   1131             return None
   1132 
   1133         user, pw = self.passwd.find_user_password(realm, req.full_url)
   1134         if user is None:
   1135             return None
   1136 
   1137         # XXX not implemented yet
   1138         if req.data is not None:
   1139             entdig = self.get_entity_digest(req.data, chal)
   1140         else:
   1141             entdig = None
   1142 
   1143         A1 = "%s:%s:%s" % (user, realm, pw)
   1144         A2 = "%s:%s" % (req.get_method(),
   1145                         # XXX selector: what about proxies and full urls
   1146                         req.selector)
   1147         if qop == 'auth':
   1148             if nonce == self.last_nonce:
   1149                 self.nonce_count += 1
   1150             else:
   1151                 self.nonce_count = 1
   1152                 self.last_nonce = nonce
   1153             ncvalue = '%08x' % self.nonce_count
   1154             cnonce = self.get_cnonce(nonce)
   1155             noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
   1156             respdig = KD(H(A1), noncebit)
   1157         elif qop is None:
   1158             respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
   1159         else:
   1160             # XXX handle auth-int.
   1161             raise URLError("qop '%s' is not supported." % qop)
   1162 
   1163         # XXX should the partial digests be encoded too?
   1164 
   1165         base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
   1166                'response="%s"' % (user, realm, nonce, req.selector,
   1167                                   respdig)
   1168         if opaque:
   1169             base += ', opaque="%s"' % opaque
   1170         if entdig:
   1171             base += ', digest="%s"' % entdig
   1172         base += ', algorithm="%s"' % algorithm
   1173         if qop:
   1174             base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
   1175         return base
   1176 
   1177     def get_algorithm_impls(self, algorithm):
   1178         # lambdas assume digest modules are imported at the top level
   1179         if algorithm == 'MD5':
   1180             H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
   1181         elif algorithm == 'SHA':
   1182             H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
   1183         # XXX MD5-sess
   1184         else:
   1185             raise ValueError("Unsupported digest authentication "
   1186                              "algorithm %r" % algorithm)
   1187         KD = lambda s, d: H("%s:%s" % (s, d))
   1188         return H, KD
   1189 
   1190     def get_entity_digest(self, data, chal):
   1191         # XXX not implemented yet
   1192         return None
   1193 
   1194 
   1195 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
   1196     """An authentication protocol defined by RFC 2069
   1197 
   1198     Digest authentication improves on basic authentication because it
   1199     does not transmit passwords in the clear.
   1200     """
   1201 
   1202     auth_header = 'Authorization'
   1203     handler_order = 490  # before Basic auth
   1204 
   1205     def http_error_401(self, req, fp, code, msg, headers):
   1206         host = urlparse(req.full_url)[1]
   1207         retry = self.http_error_auth_reqed('www-authenticate',
   1208                                            host, req, headers)
   1209         self.reset_retry_count()
   1210         return retry
   1211 
   1212 
   1213 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
   1214 
   1215     auth_header = 'Proxy-Authorization'
   1216     handler_order = 490  # before Basic auth
   1217 
   1218     def http_error_407(self, req, fp, code, msg, headers):
   1219         host = req.host
   1220         retry = self.http_error_auth_reqed('proxy-authenticate',
   1221                                            host, req, headers)
   1222         self.reset_retry_count()
   1223         return retry
   1224 
   1225 class AbstractHTTPHandler(BaseHandler):
   1226 
   1227     def __init__(self, debuglevel=0):
   1228         self._debuglevel = debuglevel
   1229 
   1230     def set_http_debuglevel(self, level):
   1231         self._debuglevel = level
   1232 
   1233     def _get_content_length(self, request):
   1234         return http.client.HTTPConnection._get_content_length(
   1235             request.data,
   1236             request.get_method())
   1237 
   1238     def do_request_(self, request):
   1239         host = request.host
   1240         if not host:
   1241             raise URLError('no host given')
   1242 
   1243         if request.data is not None:  # POST
   1244             data = request.data
   1245             if isinstance(data, str):
   1246                 msg = "POST data should be bytes, an iterable of bytes, " \
   1247                       "or a file object. It cannot be of type str."
   1248                 raise TypeError(msg)
   1249             if not request.has_header('Content-type'):
   1250                 request.add_unredirected_header(
   1251                     'Content-type',
   1252                     'application/x-www-form-urlencoded')
   1253             if (not request.has_header('Content-length')
   1254                     and not request.has_header('Transfer-encoding')):
   1255                 content_length = self._get_content_length(request)
   1256                 if content_length is not None:
   1257                     request.add_unredirected_header(
   1258                             'Content-length', str(content_length))
   1259                 else:
   1260                     request.add_unredirected_header(
   1261                             'Transfer-encoding', 'chunked')
   1262 
   1263         sel_host = host
   1264         if request.has_proxy():
   1265             scheme, sel = splittype(request.selector)
   1266             sel_host, sel_path = splithost(sel)
   1267         if not request.has_header('Host'):
   1268             request.add_unredirected_header('Host', sel_host)
   1269         for name, value in self.parent.addheaders:
   1270             name = name.capitalize()
   1271             if not request.has_header(name):
   1272                 request.add_unredirected_header(name, value)
   1273 
   1274         return request
   1275 
   1276     def do_open(self, http_class, req, **http_conn_args):
   1277         """Return an HTTPResponse object for the request, using http_class.
   1278 
   1279         http_class must implement the HTTPConnection API from http.client.
   1280         """
   1281         host = req.host
   1282         if not host:
   1283             raise URLError('no host given')
   1284 
   1285         # will parse host:port
   1286         h = http_class(host, timeout=req.timeout, **http_conn_args)
   1287         h.set_debuglevel(self._debuglevel)
   1288 
   1289         headers = dict(req.unredirected_hdrs)
   1290         headers.update(dict((k, v) for k, v in req.headers.items()
   1291                             if k not in headers))
   1292 
   1293         # TODO(jhylton): Should this be redesigned to handle
   1294         # persistent connections?
   1295 
   1296         # We want to make an HTTP/1.1 request, but the addinfourl
   1297         # class isn't prepared to deal with a persistent connection.
   1298         # It will try to read all remaining data from the socket,
   1299         # which will block while the server waits for the next request.
   1300         # So make sure the connection gets closed after the (only)
   1301         # request.
   1302         headers["Connection"] = "close"
   1303         headers = dict((name.title(), val) for name, val in headers.items())
   1304 
   1305         if req._tunnel_host:
   1306             tunnel_headers = {}
   1307             proxy_auth_hdr = "Proxy-Authorization"
   1308             if proxy_auth_hdr in headers:
   1309                 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
   1310                 # Proxy-Authorization should not be sent to origin
   1311                 # server.
   1312                 del headers[proxy_auth_hdr]
   1313             h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
   1314 
   1315         try:
   1316             try:
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
   1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:
   1323             h.close()
   1324             raise
   1325 
   1326         # If the server does not send us a 'Connection: close' header,
   1327         # HTTPConnection assumes the socket should be left open. Manually
   1328         # mark the socket to be closed when this response object goes away.
   1329         if h.sock:
   1330             h.sock.close()
   1331             h.sock = None
   1332 
   1333         r.url = req.get_full_url()
   1334         # This line replaces the .msg attribute of the HTTPResponse
   1335         # with .headers, because urllib clients expect the response to
   1336         # have the reason in .msg.  It would be good to mark this
   1337         # attribute is deprecated and get then to use info() or
   1338         # .headers.
   1339         r.msg = r.reason
   1340         return r
   1341 
   1342 
   1343 class HTTPHandler(AbstractHTTPHandler):
   1344 
   1345     def http_open(self, req):
   1346         return self.do_open(http.client.HTTPConnection, req)
   1347 
   1348     http_request = AbstractHTTPHandler.do_request_
   1349 
   1350 if hasattr(http.client, 'HTTPSConnection'):
   1351 
   1352     class HTTPSHandler(AbstractHTTPHandler):
   1353 
   1354         def __init__(self, debuglevel=0, context=None, check_hostname=None):
   1355             AbstractHTTPHandler.__init__(self, debuglevel)
   1356             self._context = context
   1357             self._check_hostname = check_hostname
   1358 
   1359         def https_open(self, req):
   1360             return self.do_open(http.client.HTTPSConnection, req,
   1361                 context=self._context, check_hostname=self._check_hostname)
   1362 
   1363         https_request = AbstractHTTPHandler.do_request_
   1364 
   1365     __all__.append('HTTPSHandler')
   1366 
   1367 class HTTPCookieProcessor(BaseHandler):
   1368     def __init__(self, cookiejar=None):
   1369         import http.cookiejar
   1370         if cookiejar is None:
   1371             cookiejar = http.cookiejar.CookieJar()
   1372         self.cookiejar = cookiejar
   1373 
   1374     def http_request(self, request):
   1375         self.cookiejar.add_cookie_header(request)
   1376         return request
   1377 
   1378     def http_response(self, request, response):
   1379         self.cookiejar.extract_cookies(response, request)
   1380         return response
   1381 
   1382     https_request = http_request
   1383     https_response = http_response
   1384 
   1385 class UnknownHandler(BaseHandler):
   1386     def unknown_open(self, req):
   1387         type = req.type
   1388         raise URLError('unknown url type: %s' % type)
   1389 
   1390 def parse_keqv_list(l):
   1391     """Parse list of key=value strings where keys are not duplicated."""
   1392     parsed = {}
   1393     for elt in l:
   1394         k, v = elt.split('=', 1)
   1395         if v[0] == '"' and v[-1] == '"':
   1396             v = v[1:-1]
   1397         parsed[k] = v
   1398     return parsed
   1399 
   1400 def parse_http_list(s):
   1401     """Parse lists as described by RFC 2068 Section 2.
   1402 
   1403     In particular, parse comma-separated lists where the elements of
   1404     the list may include quoted-strings.  A quoted-string could
   1405     contain a comma.  A non-quoted string could have quotes in the
   1406     middle.  Neither commas nor quotes count if they are escaped.
   1407     Only double-quotes count, not single-quotes.
   1408     """
   1409     res = []
   1410     part = ''
   1411 
   1412     escape = quote = False
   1413     for cur in s:
   1414         if escape:
   1415             part += cur
   1416             escape = False
   1417             continue
   1418         if quote:
   1419             if cur == '\\':
   1420                 escape = True
   1421                 continue
   1422             elif cur == '"':
   1423                 quote = False
   1424             part += cur
   1425             continue
   1426 
   1427         if cur == ',':
   1428             res.append(part)
   1429             part = ''
   1430             continue
   1431 
   1432         if cur == '"':
   1433             quote = True
   1434 
   1435         part += cur
   1436 
   1437     # append last part
   1438     if part:
   1439         res.append(part)
   1440 
   1441     return [part.strip() for part in res]
   1442 
   1443 class FileHandler(BaseHandler):
   1444     # Use local file or FTP depending on form of URL
   1445     def file_open(self, req):
   1446         url = req.selector
   1447         if url[:2] == '//' and url[2:3] != '/' and (req.host and
   1448                 req.host != 'localhost'):
   1449             if not req.host in self.get_names():
   1450                 raise URLError("file:// scheme is supported only on localhost")
   1451         else:
   1452             return self.open_local_file(req)
   1453 
   1454     # names for the localhost
   1455     names = None
   1456     def get_names(self):
   1457         if FileHandler.names is None:
   1458             try:
   1459                 FileHandler.names = tuple(
   1460                     socket.gethostbyname_ex('localhost')[2] +
   1461                     socket.gethostbyname_ex(socket.gethostname())[2])
   1462             except socket.gaierror:
   1463                 FileHandler.names = (socket.gethostbyname('localhost'),)
   1464         return FileHandler.names
   1465 
   1466     # not entirely sure what the rules are here
   1467     def open_local_file(self, req):
   1468         import email.utils
   1469         import mimetypes
   1470         host = req.host
   1471         filename = req.selector
   1472         localfile = url2pathname(filename)
   1473         try:
   1474             stats = os.stat(localfile)
   1475             size = stats.st_size
   1476             modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
   1477             mtype = mimetypes.guess_type(filename)[0]
   1478             headers = email.message_from_string(
   1479                 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
   1480                 (mtype or 'text/plain', size, modified))
   1481             if host:
   1482                 host, port = splitport(host)
   1483             if not host or \
   1484                 (not port and _safe_gethostbyname(host) in self.get_names()):
   1485                 if host:
   1486                     origurl = 'file://' + host + filename
   1487                 else:
   1488                     origurl = 'file://' + filename
   1489                 return addinfourl(open(localfile, 'rb'), headers, origurl)
   1490         except OSError as exp:
   1491             # users shouldn't expect OSErrors coming from urlopen()
   1492             raise URLError(exp)
   1493         raise URLError('file not on local host')
   1494 
   1495 def _safe_gethostbyname(host):
   1496     try:
   1497         return socket.gethostbyname(host)
   1498     except socket.gaierror:
   1499         return None
   1500 
   1501 class FTPHandler(BaseHandler):
   1502     def ftp_open(self, req):
   1503         import ftplib
   1504         import mimetypes
   1505         host = req.host
   1506         if not host:
   1507             raise URLError('ftp error: no host given')
   1508         host, port = splitport(host)
   1509         if port is None:
   1510             port = ftplib.FTP_PORT
   1511         else:
   1512             port = int(port)
   1513 
   1514         # username/password handling
   1515         user, host = splituser(host)
   1516         if user:
   1517             user, passwd = splitpasswd(user)
   1518         else:
   1519             passwd = None
   1520         host = unquote(host)
   1521         user = user or ''
   1522         passwd = passwd or ''
   1523 
   1524         try:
   1525             host = socket.gethostbyname(host)
   1526         except OSError as msg:
   1527             raise URLError(msg)
   1528         path, attrs = splitattr(req.selector)
   1529         dirs = path.split('/')
   1530         dirs = list(map(unquote, dirs))
   1531         dirs, file = dirs[:-1], dirs[-1]
   1532         if dirs and not dirs[0]:
   1533             dirs = dirs[1:]
   1534         try:
   1535             fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
   1536             type = file and 'I' or 'D'
   1537             for attr in attrs:
   1538                 attr, value = splitvalue(attr)
   1539                 if attr.lower() == 'type' and \
   1540                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
   1541                     type = value.upper()
   1542             fp, retrlen = fw.retrfile(file, type)
   1543             headers = ""
   1544             mtype = mimetypes.guess_type(req.full_url)[0]
   1545             if mtype:
   1546                 headers += "Content-type: %s\n" % mtype
   1547             if retrlen is not None and retrlen >= 0:
   1548                 headers += "Content-length: %d\n" % retrlen
   1549             headers = email.message_from_string(headers)
   1550             return addinfourl(fp, headers, req.full_url)
   1551         except ftplib.all_errors as exp:
   1552             exc = URLError('ftp error: %r' % exp)
   1553             raise exc.with_traceback(sys.exc_info()[2])
   1554 
   1555     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
   1556         return ftpwrapper(user, passwd, host, port, dirs, timeout,
   1557                           persistent=False)
   1558 
   1559 class CacheFTPHandler(FTPHandler):
   1560     # XXX would be nice to have pluggable cache strategies
   1561     # XXX this stuff is definitely not thread safe
   1562     def __init__(self):
   1563         self.cache = {}
   1564         self.timeout = {}
   1565         self.soonest = 0
   1566         self.delay = 60
   1567         self.max_conns = 16
   1568 
   1569     def setTimeout(self, t):
   1570         self.delay = t
   1571 
   1572     def setMaxConns(self, m):
   1573         self.max_conns = m
   1574 
   1575     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
   1576         key = user, host, port, '/'.join(dirs), timeout
   1577         if key in self.cache:
   1578             self.timeout[key] = time.time() + self.delay
   1579         else:
   1580             self.cache[key] = ftpwrapper(user, passwd, host, port,
   1581                                          dirs, timeout)
   1582             self.timeout[key] = time.time() + self.delay
   1583         self.check_cache()
   1584         return self.cache[key]
   1585 
   1586     def check_cache(self):
   1587         # first check for old ones
   1588         t = time.time()
   1589         if self.soonest <= t:
   1590             for k, v in list(self.timeout.items()):
   1591                 if v < t:
   1592                     self.cache[k].close()
   1593                     del self.cache[k]
   1594                     del self.timeout[k]
   1595         self.soonest = min(list(self.timeout.values()))
   1596 
   1597         # then check the size
   1598         if len(self.cache) == self.max_conns:
   1599             for k, v in list(self.timeout.items()):
   1600                 if v == self.soonest:
   1601                     del self.cache[k]
   1602                     del self.timeout[k]
   1603                     break
   1604             self.soonest = min(list(self.timeout.values()))
   1605 
   1606     def clear_cache(self):
   1607         for conn in self.cache.values():
   1608             conn.close()
   1609         self.cache.clear()
   1610         self.timeout.clear()
   1611 
   1612 class DataHandler(BaseHandler):
   1613     def data_open(self, req):
   1614         # data URLs as specified in RFC 2397.
   1615         #
   1616         # ignores POSTed data
   1617         #
   1618         # syntax:
   1619         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
   1620         # mediatype := [ type "/" subtype ] *( ";" parameter )
   1621         # data      := *urlchar
   1622         # parameter := attribute "=" value
   1623         url = req.full_url
   1624 
   1625         scheme, data = url.split(":",1)
   1626         mediatype, data = data.split(",",1)
   1627 
   1628         # even base64 encoded data URLs might be quoted so unquote in any case:
   1629         data = unquote_to_bytes(data)
   1630         if mediatype.endswith(";base64"):
   1631             data = base64.decodebytes(data)
   1632             mediatype = mediatype[:-7]
   1633 
   1634         if not mediatype:
   1635             mediatype = "text/plain;charset=US-ASCII"
   1636 
   1637         headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
   1638             (mediatype, len(data)))
   1639 
   1640         return addinfourl(io.BytesIO(data), headers, url)
   1641 
   1642 
   1643 # Code move from the old urllib module
   1644 
   1645 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
   1646 
   1647 # Helper for non-unix systems
   1648 if os.name == 'nt':
   1649     from nturl2path import url2pathname, pathname2url
   1650 else:
   1651     def url2pathname(pathname):
   1652         """OS-specific conversion from a relative URL of the 'file' scheme
   1653         to a file system path; not recommended for general use."""
   1654         return unquote(pathname)
   1655 
   1656     def pathname2url(pathname):
   1657         """OS-specific conversion from a file system path to a relative URL
   1658         of the 'file' scheme; not recommended for general use."""
   1659         return quote(pathname)
   1660 
   1661 # This really consists of two pieces:
   1662 # (1) a class which handles opening of all sorts of URLs
   1663 #     (plus assorted utilities etc.)
   1664 # (2) a set of functions for parsing URLs
   1665 # XXX Should these be separated out into different modules?
   1666 
   1667 
   1668 ftpcache = {}
   1669 class URLopener:
   1670     """Class to open URLs.
   1671     This is a class rather than just a subroutine because we may need
   1672     more than one set of global protocol-specific options.
   1673     Note -- this is a base class for those who don't want the
   1674     automatic handling of errors type 302 (relocated) and 401
   1675     (authorization needed)."""
   1676 
   1677     __tempfiles = None
   1678 
   1679     version = "Python-urllib/%s" % __version__
   1680 
   1681     # Constructor
   1682     def __init__(self, proxies=None, **x509):
   1683         msg = "%(class)s style of invoking requests is deprecated. " \
   1684               "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
   1685         warnings.warn(msg, DeprecationWarning, stacklevel=3)
   1686         if proxies is None:
   1687             proxies = getproxies()
   1688         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
   1689         self.proxies = proxies
   1690         self.key_file = x509.get('key_file')
   1691         self.cert_file = x509.get('cert_file')
   1692         self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
   1693         self.__tempfiles = []
   1694         self.__unlink = os.unlink # See cleanup()
   1695         self.tempcache = None
   1696         # Undocumented feature: if you assign {} to tempcache,
   1697         # it is used to cache files retrieved with
   1698         # self.retrieve().  This is not enabled by default
   1699         # since it does not work for changing documents (and I
   1700         # haven't got the logic to check expiration headers
   1701         # yet).
   1702         self.ftpcache = ftpcache
   1703         # Undocumented feature: you can use a different
   1704         # ftp cache by assigning to the .ftpcache member;
   1705         # in case you want logically independent URL openers
   1706         # XXX This is not threadsafe.  Bah.
   1707 
   1708     def __del__(self):
   1709         self.close()
   1710 
   1711     def close(self):
   1712         self.cleanup()
   1713 
   1714     def cleanup(self):
   1715         # This code sometimes runs when the rest of this module
   1716         # has already been deleted, so it can't use any globals
   1717         # or import anything.
   1718         if self.__tempfiles:
   1719             for file in self.__tempfiles:
   1720                 try:
   1721                     self.__unlink(file)
   1722                 except OSError:
   1723                     pass
   1724             del self.__tempfiles[:]
   1725         if self.tempcache:
   1726             self.tempcache.clear()
   1727 
   1728     def addheader(self, *args):
   1729         """Add a header to be used by the HTTP interface only
   1730         e.g. u.addheader('Accept', 'sound/basic')"""
   1731         self.addheaders.append(args)
   1732 
   1733     # External interface
   1734     def open(self, fullurl, data=None):
   1735         """Use URLopener().open(file) instead of open(file, 'r')."""
   1736         fullurl = unwrap(to_bytes(fullurl))
   1737         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
   1738         if self.tempcache and fullurl in self.tempcache:
   1739             filename, headers = self.tempcache[fullurl]
   1740             fp = open(filename, 'rb')
   1741             return addinfourl(fp, headers, fullurl)
   1742         urltype, url = splittype(fullurl)
   1743         if not urltype:
   1744             urltype = 'file'
   1745         if urltype in self.proxies:
   1746             proxy = self.proxies[urltype]
   1747             urltype, proxyhost = splittype(proxy)
   1748             host, selector = splithost(proxyhost)
   1749             url = (host, fullurl) # Signal special case to open_*()
   1750         else:
   1751             proxy = None
   1752         name = 'open_' + urltype
   1753         self.type = urltype
   1754         name = name.replace('-', '_')
   1755         if not hasattr(self, name):
   1756             if proxy:
   1757                 return self.open_unknown_proxy(proxy, fullurl, data)
   1758             else:
   1759                 return self.open_unknown(fullurl, data)
   1760         try:
   1761             if data is None:
   1762                 return getattr(self, name)(url)
   1763             else:
   1764                 return getattr(self, name)(url, data)
   1765         except (HTTPError, URLError):
   1766             raise
   1767         except OSError as msg:
   1768             raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
   1769 
   1770     def open_unknown(self, fullurl, data=None):
   1771         """Overridable interface to open unknown URL type."""
   1772         type, url = splittype(fullurl)
   1773         raise OSError('url error', 'unknown url type', type)
   1774 
   1775     def open_unknown_proxy(self, proxy, fullurl, data=None):
   1776         """Overridable interface to open unknown URL type."""
   1777         type, url = splittype(fullurl)
   1778         raise OSError('url error', 'invalid proxy for %s' % type, proxy)
   1779 
   1780     # External interface
   1781     def retrieve(self, url, filename=None, reporthook=None, data=None):
   1782         """retrieve(url) returns (filename, headers) for a local object
   1783         or (tempfilename, headers) for a remote object."""
   1784         url = unwrap(to_bytes(url))
   1785         if self.tempcache and url in self.tempcache:
   1786             return self.tempcache[url]
   1787         type, url1 = splittype(url)
   1788         if filename is None and (not type or type == 'file'):
   1789             try:
   1790                 fp = self.open_local_file(url1)
   1791                 hdrs = fp.info()
   1792                 fp.close()
   1793                 return url2pathname(splithost(url1)[1]), hdrs
   1794             except OSError as msg:
   1795                 pass
   1796         fp = self.open(url, data)
   1797         try:
   1798             headers = fp.info()
   1799             if filename:
   1800                 tfp = open(filename, 'wb')
   1801             else:
   1802                 import tempfile
   1803                 garbage, path = splittype(url)
   1804                 garbage, path = splithost(path or "")
   1805                 path, garbage = splitquery(path or "")
   1806                 path, garbage = splitattr(path or "")
   1807                 suffix = os.path.splitext(path)[1]
   1808                 (fd, filename) = tempfile.mkstemp(suffix)
   1809                 self.__tempfiles.append(filename)
   1810                 tfp = os.fdopen(fd, 'wb')
   1811             try:
   1812                 result = filename, headers
   1813                 if self.tempcache is not None:
   1814                     self.tempcache[url] = result
   1815                 bs = 1024*8
   1816                 size = -1
   1817                 read = 0
   1818                 blocknum = 0
   1819                 if "content-length" in headers:
   1820                     size = int(headers["Content-Length"])
   1821                 if reporthook:
   1822                     reporthook(blocknum, bs, size)
   1823                 while 1:
   1824                     block = fp.read(bs)
   1825                     if not block:
   1826                         break
   1827                     read += len(block)
   1828                     tfp.write(block)
   1829                     blocknum += 1
   1830                     if reporthook:
   1831                         reporthook(blocknum, bs, size)
   1832             finally:
   1833                 tfp.close()
   1834         finally:
   1835             fp.close()
   1836 
   1837         # raise exception if actual size does not match content-length header
   1838         if size >= 0 and read < size:
   1839             raise ContentTooShortError(
   1840                 "retrieval incomplete: got only %i out of %i bytes"
   1841                 % (read, size), result)
   1842 
   1843         return result
   1844 
   1845     # Each method named open_<type> knows how to open that type of URL
   1846 
   1847     def _open_generic_http(self, connection_factory, url, data):
   1848         """Make an HTTP connection using connection_class.
   1849 
   1850         This is an internal method that should be called from
   1851         open_http() or open_https().
   1852 
   1853         Arguments:
   1854         - connection_factory should take a host name and return an
   1855           HTTPConnection instance.
   1856         - url is the url to retrieval or a host, relative-path pair.
   1857         - data is payload for a POST request or None.
   1858         """
   1859 
   1860         user_passwd = None
   1861         proxy_passwd= None
   1862         if isinstance(url, str):
   1863             host, selector = splithost(url)
   1864             if host:
   1865                 user_passwd, host = splituser(host)
   1866                 host = unquote(host)
   1867             realhost = host
   1868         else:
   1869             host, selector = url
   1870             # check whether the proxy contains authorization information
   1871             proxy_passwd, host = splituser(host)
   1872             # now we proceed with the url we want to obtain
   1873             urltype, rest = splittype(selector)
   1874             url = rest
   1875             user_passwd = None
   1876             if urltype.lower() != 'http':
   1877                 realhost = None
   1878             else:
   1879                 realhost, rest = splithost(rest)
   1880                 if realhost:
   1881                     user_passwd, realhost = splituser(realhost)
   1882                 if user_passwd:
   1883                     selector = "%s://%s%s" % (urltype, realhost, rest)
   1884                 if proxy_bypass(realhost):
   1885                     host = realhost
   1886 
   1887         if not host: raise OSError('http error', 'no host given')
   1888 
   1889         if proxy_passwd:
   1890             proxy_passwd = unquote(proxy_passwd)
   1891             proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
   1892         else:
   1893             proxy_auth = None
   1894 
   1895         if user_passwd:
   1896             user_passwd = unquote(user_passwd)
   1897             auth = base64.b64encode(user_passwd.encode()).decode('ascii')
   1898         else:
   1899             auth = None
   1900         http_conn = connection_factory(host)
   1901         headers = {}
   1902         if proxy_auth:
   1903             headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
   1904         if auth:
   1905             headers["Authorization"] =  "Basic %s" % auth
   1906         if realhost:
   1907             headers["Host"] = realhost
   1908 
   1909         # Add Connection:close as we don't support persistent connections yet.
   1910         # This helps in closing the socket and avoiding ResourceWarning
   1911 
   1912         headers["Connection"] = "close"
   1913 
   1914         for header, value in self.addheaders:
   1915             headers[header] = value
   1916 
   1917         if data is not None:
   1918             headers["Content-Type"] = "application/x-www-form-urlencoded"
   1919             http_conn.request("POST", selector, data, headers)
   1920         else:
   1921             http_conn.request("GET", selector, headers=headers)
   1922 
   1923         try:
   1924             response = http_conn.getresponse()
   1925         except http.client.BadStatusLine:
   1926             # something went wrong with the HTTP status line
   1927             raise URLError("http protocol error: bad status line")
   1928 
   1929         # According to RFC 2616, "2xx" code indicates that the client's
   1930         # request was successfully received, understood, and accepted.
   1931         if 200 <= response.status < 300:
   1932             return addinfourl(response, response.msg, "http:" + url,
   1933                               response.status)
   1934         else:
   1935             return self.http_error(
   1936                 url, response.fp,
   1937                 response.status, response.reason, response.msg, data)
   1938 
   1939     def open_http(self, url, data=None):
   1940         """Use HTTP protocol."""
   1941         return self._open_generic_http(http.client.HTTPConnection, url, data)
   1942 
   1943     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
   1944         """Handle http errors.
   1945 
   1946         Derived class can override this, or provide specific handlers
   1947         named http_error_DDD where DDD is the 3-digit error code."""
   1948         # First check if there's a specific handler for this error
   1949         name = 'http_error_%d' % errcode
   1950         if hasattr(self, name):
   1951             method = getattr(self, name)
   1952             if data is None:
   1953                 result = method(url, fp, errcode, errmsg, headers)
   1954             else:
   1955                 result = method(url, fp, errcode, errmsg, headers, data)
   1956             if result: return result
   1957         return self.http_error_default(url, fp, errcode, errmsg, headers)
   1958 
   1959     def http_error_default(self, url, fp, errcode, errmsg, headers):
   1960         """Default error handler: close the connection and raise OSError."""
   1961         fp.close()
   1962         raise HTTPError(url, errcode, errmsg, headers, None)
   1963 
   1964     if _have_ssl:
   1965         def _https_connection(self, host):
   1966             return http.client.HTTPSConnection(host,
   1967                                            key_file=self.key_file,
   1968                                            cert_file=self.cert_file)
   1969 
   1970         def open_https(self, url, data=None):
   1971             """Use HTTPS protocol."""
   1972             return self._open_generic_http(self._https_connection, url, data)
   1973 
   1974     def open_file(self, url):
   1975         """Use local file or FTP depending on form of URL."""
   1976         if not isinstance(url, str):
   1977             raise URLError('file error: proxy support for file protocol currently not implemented')
   1978         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
   1979             raise ValueError("file:// scheme is supported only on localhost")
   1980         else:
   1981             return self.open_local_file(url)
   1982 
   1983     def open_local_file(self, url):
   1984         """Use local file."""
   1985         import email.utils
   1986         import mimetypes
   1987         host, file = splithost(url)
   1988         localname = url2pathname(file)
   1989         try:
   1990             stats = os.stat(localname)
   1991         except OSError as e:
   1992             raise URLError(e.strerror, e.filename)
   1993         size = stats.st_size
   1994         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
   1995         mtype = mimetypes.guess_type(url)[0]
   1996         headers = email.message_from_string(
   1997             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
   1998             (mtype or 'text/plain', size, modified))
   1999         if not host:
   2000             urlfile = file
   2001             if file[:1] == '/':
   2002                 urlfile = 'file://' + file
   2003             return addinfourl(open(localname, 'rb'), headers, urlfile)
   2004         host, port = splitport(host)
   2005         if (not port
   2006            and socket.gethostbyname(host) in ((localhost(),) + thishost())):
   2007             urlfile = file
   2008             if file[:1] == '/':
   2009                 urlfile = 'file://' + file
   2010             elif file[:2] == './':
   2011                 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
   2012             return addinfourl(open(localname, 'rb'), headers, urlfile)
   2013         raise URLError('local file error: not on local host')
   2014 
   2015     def open_ftp(self, url):
   2016         """Use FTP protocol."""
   2017         if not isinstance(url, str):
   2018             raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
   2019         import mimetypes
   2020         host, path = splithost(url)
   2021         if not host: raise URLError('ftp error: no host given')
   2022         host, port = splitport(host)
   2023         user, host = splituser(host)
   2024         if user: user, passwd = splitpasswd(user)
   2025         else: passwd = None
   2026         host = unquote(host)
   2027         user = unquote(user or '')
   2028         passwd = unquote(passwd or '')
   2029         host = socket.gethostbyname(host)
   2030         if not port:
   2031             import ftplib
   2032             port = ftplib.FTP_PORT
   2033         else:
   2034             port = int(port)
   2035         path, attrs = splitattr(path)
   2036         path = unquote(path)
   2037         dirs = path.split('/')
   2038         dirs, file = dirs[:-1], dirs[-1]
   2039         if dirs and not dirs[0]: dirs = dirs[1:]
   2040         if dirs and not dirs[0]: dirs[0] = '/'
   2041         key = user, host, port, '/'.join(dirs)
   2042         # XXX thread unsafe!
   2043         if len(self.ftpcache) > MAXFTPCACHE:
   2044             # Prune the cache, rather arbitrarily
   2045             for k in list(self.ftpcache):
   2046                 if k != key:
   2047                     v = self.ftpcache[k]
   2048                     del self.ftpcache[k]
   2049                     v.close()
   2050         try:
   2051             if key not in self.ftpcache:
   2052                 self.ftpcache[key] = \
   2053                     ftpwrapper(user, passwd, host, port, dirs)
   2054             if not file: type = 'D'
   2055             else: type = 'I'
   2056             for attr in attrs:
   2057                 attr, value = splitvalue(attr)
   2058                 if attr.lower() == 'type' and \
   2059                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
   2060                     type = value.upper()
   2061             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
   2062             mtype = mimetypes.guess_type("ftp:" + url)[0]
   2063             headers = ""
   2064             if mtype:
   2065                 headers += "Content-Type: %s\n" % mtype
   2066             if retrlen is not None and retrlen >= 0:
   2067                 headers += "Content-Length: %d\n" % retrlen
   2068             headers = email.message_from_string(headers)
   2069             return addinfourl(fp, headers, "ftp:" + url)
   2070         except ftperrors() as exp:
   2071             raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
   2072 
   2073     def open_data(self, url, data=None):
   2074         """Use "data" URL."""
   2075         if not isinstance(url, str):
   2076             raise URLError('data error: proxy support for data protocol currently not implemented')
   2077         # ignore POSTed data
   2078         #
   2079         # syntax of data URLs:
   2080         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
   2081         # mediatype := [ type "/" subtype ] *( ";" parameter )
   2082         # data      := *urlchar
   2083         # parameter := attribute "=" value
   2084         try:
   2085             [type, data] = url.split(',', 1)
   2086         except ValueError:
   2087             raise OSError('data error', 'bad data URL')
   2088         if not type:
   2089             type = 'text/plain;charset=US-ASCII'
   2090         semi = type.rfind(';')
   2091         if semi >= 0 and '=' not in type[semi:]:
   2092             encoding = type[semi+1:]
   2093             type = type[:semi]
   2094         else:
   2095             encoding = ''
   2096         msg = []
   2097         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
   2098                                             time.gmtime(time.time())))
   2099         msg.append('Content-type: %s' % type)
   2100         if encoding == 'base64':
   2101             # XXX is this encoding/decoding ok?
   2102             data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
   2103         else:
   2104             data = unquote(data)
   2105         msg.append('Content-Length: %d' % len(data))
   2106         msg.append('')
   2107         msg.append(data)
   2108         msg = '\n'.join(msg)
   2109         headers = email.message_from_string(msg)
   2110         f = io.StringIO(msg)
   2111         #f.fileno = None     # needed for addinfourl
   2112         return addinfourl(f, headers, url)
   2113 
   2114 
   2115 class FancyURLopener(URLopener):
   2116     """Derived class with handlers for errors we can handle (perhaps)."""
   2117 
   2118     def __init__(self, *args, **kwargs):
   2119         URLopener.__init__(self, *args, **kwargs)
   2120         self.auth_cache = {}
   2121         self.tries = 0
   2122         self.maxtries = 10
   2123 
   2124     def http_error_default(self, url, fp, errcode, errmsg, headers):
   2125         """Default error handling -- don't raise an exception."""
   2126         return addinfourl(fp, headers, "http:" + url, errcode)
   2127 
   2128     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
   2129         """Error 302 -- relocated (temporarily)."""
   2130         self.tries += 1
   2131         try:
   2132             if self.maxtries and self.tries >= self.maxtries:
   2133                 if hasattr(self, "http_error_500"):
   2134                     meth = self.http_error_500
   2135                 else:
   2136                     meth = self.http_error_default
   2137                 return meth(url, fp, 500,
   2138                             "Internal Server Error: Redirect Recursion",
   2139                             headers)
   2140             result = self.redirect_internal(url, fp, errcode, errmsg,
   2141                                             headers, data)
   2142             return result
   2143         finally:
   2144             self.tries = 0
   2145 
   2146     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
   2147         if 'location' in headers:
   2148             newurl = headers['location']
   2149         elif 'uri' in headers:
   2150             newurl = headers['uri']
   2151         else:
   2152             return
   2153         fp.close()
   2154 
   2155         # In case the server sent a relative URL, join with original:
   2156         newurl = urljoin(self.type + ":" + url, newurl)
   2157 
   2158         urlparts = urlparse(newurl)
   2159 
   2160         # For security reasons, we don't allow redirection to anything other
   2161         # than http, https and ftp.
   2162 
   2163         # We are using newer HTTPError with older redirect_internal method
   2164         # This older method will get deprecated in 3.3
   2165 
   2166         if urlparts.scheme not in ('http', 'https', 'ftp', ''):
   2167             raise HTTPError(newurl, errcode,
   2168                             errmsg +
   2169                             " Redirection to url '%s' is not allowed." % newurl,
   2170                             headers, fp)
   2171 
   2172         return self.open(newurl)
   2173 
   2174     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
   2175         """Error 301 -- also relocated (permanently)."""
   2176         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
   2177 
   2178     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
   2179         """Error 303 -- also relocated (essentially identical to 302)."""
   2180         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
   2181 
   2182     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
   2183         """Error 307 -- relocated, but turn POST into error."""
   2184         if data is None:
   2185             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
   2186         else:
   2187             return self.http_error_default(url, fp, errcode, errmsg, headers)
   2188 
   2189     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
   2190             retry=False):
   2191         """Error 401 -- authentication required.
   2192         This function supports Basic authentication only."""
   2193         if 'www-authenticate' not in headers:
   2194             URLopener.http_error_default(self, url, fp,
   2195                                          errcode, errmsg, headers)
   2196         stuff = headers['www-authenticate']
   2197         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
   2198         if not match:
   2199             URLopener.http_error_default(self, url, fp,
   2200                                          errcode, errmsg, headers)
   2201         scheme, realm = match.groups()
   2202         if scheme.lower() != 'basic':
   2203             URLopener.http_error_default(self, url, fp,
   2204                                          errcode, errmsg, headers)
   2205         if not retry:
   2206             URLopener.http_error_default(self, url, fp, errcode, errmsg,
   2207                     headers)
   2208         name = 'retry_' + self.type + '_basic_auth'
   2209         if data is None:
   2210             return getattr(self,name)(url, realm)
   2211         else:
   2212             return getattr(self,name)(url, realm, data)
   2213 
   2214     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
   2215             retry=False):
   2216         """Error 407 -- proxy authentication required.
   2217         This function supports Basic authentication only."""
   2218         if 'proxy-authenticate' not in headers:
   2219             URLopener.http_error_default(self, url, fp,
   2220                                          errcode, errmsg, headers)
   2221         stuff = headers['proxy-authenticate']
   2222         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
   2223         if not match:
   2224             URLopener.http_error_default(self, url, fp,
   2225                                          errcode, errmsg, headers)
   2226         scheme, realm = match.groups()
   2227         if scheme.lower() != 'basic':
   2228             URLopener.http_error_default(self, url, fp,
   2229                                          errcode, errmsg, headers)
   2230         if not retry:
   2231             URLopener.http_error_default(self, url, fp, errcode, errmsg,
   2232                     headers)
   2233         name = 'retry_proxy_' + self.type + '_basic_auth'
   2234         if data is None:
   2235             return getattr(self,name)(url, realm)
   2236         else:
   2237             return getattr(self,name)(url, realm, data)
   2238 
   2239     def retry_proxy_http_basic_auth(self, url, realm, data=None):
   2240         host, selector = splithost(url)
   2241         newurl = 'http://' + host + selector
   2242         proxy = self.proxies['http']
   2243         urltype, proxyhost = splittype(proxy)
   2244         proxyhost, proxyselector = splithost(proxyhost)
   2245         i = proxyhost.find('@') + 1
   2246         proxyhost = proxyhost[i:]
   2247         user, passwd = self.get_user_passwd(proxyhost, realm, i)
   2248         if not (user or passwd): return None
   2249         proxyhost = "%s:%s@%s" % (quote(user, safe=''),
   2250                                   quote(passwd, safe=''), proxyhost)
   2251         self.proxies['http'] = 'http://' + proxyhost + proxyselector
   2252         if data is None:
   2253             return self.open(newurl)
   2254         else:
   2255             return self.open(newurl, data)
   2256 
   2257     def retry_proxy_https_basic_auth(self, url, realm, data=None):
   2258         host, selector = splithost(url)
   2259         newurl = 'https://' + host + selector
   2260         proxy = self.proxies['https']
   2261         urltype, proxyhost = splittype(proxy)
   2262         proxyhost, proxyselector = splithost(proxyhost)
   2263         i = proxyhost.find('@') + 1
   2264         proxyhost = proxyhost[i:]
   2265         user, passwd = self.get_user_passwd(proxyhost, realm, i)
   2266         if not (user or passwd): return None
   2267         proxyhost = "%s:%s@%s" % (quote(user, safe=''),
   2268                                   quote(passwd, safe=''), proxyhost)
   2269         self.proxies['https'] = 'https://' + proxyhost + proxyselector
   2270         if data is None:
   2271             return self.open(newurl)
   2272         else:
   2273             return self.open(newurl, data)
   2274 
   2275     def retry_http_basic_auth(self, url, realm, data=None):
   2276         host, selector = splithost(url)
   2277         i = host.find('@') + 1
   2278         host = host[i:]
   2279         user, passwd = self.get_user_passwd(host, realm, i)
   2280         if not (user or passwd): return None
   2281         host = "%s:%s@%s" % (quote(user, safe=''),
   2282                              quote(passwd, safe=''), host)
   2283         newurl = 'http://' + host + selector
   2284         if data is None:
   2285             return self.open(newurl)
   2286         else:
   2287             return self.open(newurl, data)
   2288 
   2289     def retry_https_basic_auth(self, url, realm, data=None):
   2290         host, selector = splithost(url)
   2291         i = host.find('@') + 1
   2292         host = host[i:]
   2293         user, passwd = self.get_user_passwd(host, realm, i)
   2294         if not (user or passwd): return None
   2295         host = "%s:%s@%s" % (quote(user, safe=''),
   2296                              quote(passwd, safe=''), host)
   2297         newurl = 'https://' + host + selector
   2298         if data is None:
   2299             return self.open(newurl)
   2300         else:
   2301             return self.open(newurl, data)
   2302 
   2303     def get_user_passwd(self, host, realm, clear_cache=0):
   2304         key = realm + '@' + host.lower()
   2305         if key in self.auth_cache:
   2306             if clear_cache:
   2307                 del self.auth_cache[key]
   2308             else:
   2309                 return self.auth_cache[key]
   2310         user, passwd = self.prompt_user_passwd(host, realm)
   2311         if user or passwd: self.auth_cache[key] = (user, passwd)
   2312         return user, passwd
   2313 
   2314     def prompt_user_passwd(self, host, realm):
   2315         """Override this in a GUI environment!"""
   2316         import getpass
   2317         try:
   2318             user = input("Enter username for %s at %s: " % (realm, host))
   2319             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
   2320                 (user, realm, host))
   2321             return user, passwd
   2322         except KeyboardInterrupt:
   2323             print()
   2324             return None, None
   2325 
   2326 
   2327 # Utility functions
   2328 
   2329 _localhost = None
   2330 def localhost():
   2331     """Return the IP address of the magic hostname 'localhost'."""
   2332     global _localhost
   2333     if _localhost is None:
   2334         _localhost = socket.gethostbyname('localhost')
   2335     return _localhost
   2336 
   2337 _thishost = None
   2338 def thishost():
   2339     """Return the IP addresses of the current host."""
   2340     global _thishost
   2341     if _thishost is None:
   2342         try:
   2343             _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
   2344         except socket.gaierror:
   2345             _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
   2346     return _thishost
   2347 
   2348 _ftperrors = None
   2349 def ftperrors():
   2350     """Return the set of errors raised by the FTP class."""
   2351     global _ftperrors
   2352     if _ftperrors is None:
   2353         import ftplib
   2354         _ftperrors = ftplib.all_errors
   2355     return _ftperrors
   2356 
   2357 _noheaders = None
   2358 def noheaders():
   2359     """Return an empty email Message object."""
   2360     global _noheaders
   2361     if _noheaders is None:
   2362         _noheaders = email.message_from_string("")
   2363     return _noheaders
   2364 
   2365 
   2366 # Utility classes
   2367 
   2368 class ftpwrapper:
   2369     """Class used by open_ftp() for cache of open FTP connections."""
   2370 
   2371     def __init__(self, user, passwd, host, port, dirs, timeout=None,
   2372                  persistent=True):
   2373         self.user = user
   2374         self.passwd = passwd
   2375         self.host = host
   2376         self.port = port
   2377         self.dirs = dirs
   2378         self.timeout = timeout
   2379         self.refcount = 0
   2380         self.keepalive = persistent
   2381         try:
   2382             self.init()
   2383         except:
   2384             self.close()
   2385             raise
   2386 
   2387     def init(self):
   2388         import ftplib
   2389         self.busy = 0
   2390         self.ftp = ftplib.FTP()
   2391         self.ftp.connect(self.host, self.port, self.timeout)
   2392         self.ftp.login(self.user, self.passwd)
   2393         _target = '/'.join(self.dirs)
   2394         self.ftp.cwd(_target)
   2395 
   2396     def retrfile(self, file, type):
   2397         import ftplib
   2398         self.endtransfer()
   2399         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
   2400         else: cmd = 'TYPE ' + type; isdir = 0
   2401         try:
   2402             self.ftp.voidcmd(cmd)
   2403         except ftplib.all_errors:
   2404             self.init()
   2405             self.ftp.voidcmd(cmd)
   2406         conn = None
   2407         if file and not isdir:
   2408             # Try to retrieve as a file
   2409             try:
   2410                 cmd = 'RETR ' + file
   2411                 conn, retrlen = self.ftp.ntransfercmd(cmd)
   2412             except ftplib.error_perm as reason:
   2413                 if str(reason)[:3] != '550':
   2414                     raise URLError('ftp error: %r' % reason).with_traceback(
   2415                         sys.exc_info()[2])
   2416         if not conn:
   2417             # Set transfer mode to ASCII!
   2418             self.ftp.voidcmd('TYPE A')
   2419             # Try a directory listing. Verify that directory exists.
   2420             if file:
   2421                 pwd = self.ftp.pwd()
   2422                 try:
   2423                     try:
   2424                         self.ftp.cwd(file)
   2425                     except ftplib.error_perm as reason:
   2426                         raise URLError('ftp error: %r' % reason) from reason
   2427                 finally:
   2428                     self.ftp.cwd(pwd)
   2429                 cmd = 'LIST ' + file
   2430             else:
   2431                 cmd = 'LIST'
   2432             conn, retrlen = self.ftp.ntransfercmd(cmd)
   2433         self.busy = 1
   2434 
   2435         ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
   2436         self.refcount += 1
   2437         conn.close()
   2438         # Pass back both a suitably decorated object and a retrieval length
   2439         return (ftpobj, retrlen)
   2440 
   2441     def endtransfer(self):
   2442         self.busy = 0
   2443 
   2444     def close(self):
   2445         self.keepalive = False
   2446         if self.refcount <= 0:
   2447             self.real_close()
   2448 
   2449     def file_close(self):
   2450         self.endtransfer()
   2451         self.refcount -= 1
   2452         if self.refcount <= 0 and not self.keepalive:
   2453             self.real_close()
   2454 
   2455     def real_close(self):
   2456         self.endtransfer()
   2457         try:
   2458             self.ftp.close()
   2459         except ftperrors():
   2460             pass
   2461 
   2462 # Proxy handling
   2463 def getproxies_environment():
   2464     """Return a dictionary of scheme -> proxy server URL mappings.
   2465 
   2466     Scan the environment for variables named <scheme>_proxy;
   2467     this seems to be the standard convention.  If you need a
   2468     different way, you can pass a proxies dictionary to the
   2469     [Fancy]URLopener constructor.
   2470 
   2471     """
   2472     proxies = {}
   2473     # in order to prefer lowercase variables, process environment in
   2474     # two passes: first matches any, second pass matches lowercase only
   2475     for name, value in os.environ.items():
   2476         name = name.lower()
   2477         if value and name[-6:] == '_proxy':
   2478             proxies[name[:-6]] = value
   2479     # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
   2480     # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
   2481     # header from the client
   2482     # If "proxy" is lowercase, it will still be used thanks to the next block
   2483     if 'REQUEST_METHOD' in os.environ:
   2484         proxies.pop('http', None)
   2485     for name, value in os.environ.items():
   2486         if name[-6:] == '_proxy':
   2487             name = name.lower()
   2488             if value:
   2489                 proxies[name[:-6]] = value
   2490             else:
   2491                 proxies.pop(name[:-6], None)
   2492     return proxies
   2493 
   2494 def proxy_bypass_environment(host, proxies=None):
   2495     """Test if proxies should not be used for a particular host.
   2496 
   2497     Checks the proxy dict for the value of no_proxy, which should
   2498     be a list of comma separated DNS suffixes, or '*' for all hosts.
   2499 
   2500     """
   2501     if proxies is None:
   2502         proxies = getproxies_environment()
   2503     # don't bypass, if no_proxy isn't specified
   2504     try:
   2505         no_proxy = proxies['no']
   2506     except KeyError:
   2507         return 0
   2508     # '*' is special case for always bypass
   2509     if no_proxy == '*':
   2510         return 1
   2511     # strip port off host
   2512     hostonly, port = splitport(host)
   2513     # check if the host ends with any of the DNS suffixes
   2514     no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
   2515     for name in no_proxy_list:
   2516         if name:
   2517             name = name.lstrip('.')  # ignore leading dots
   2518             name = re.escape(name)
   2519             pattern = r'(.+\.)?%s$' % name
   2520             if (re.match(pattern, hostonly, re.I)
   2521                     or re.match(pattern, host, re.I)):
   2522                 return 1
   2523     # otherwise, don't bypass
   2524     return 0
   2525 
   2526 
   2527 # This code tests an OSX specific data structure but is testable on all
   2528 # platforms
   2529 def _proxy_bypass_macosx_sysconf(host, proxy_settings):
   2530     """
   2531     Return True iff this host shouldn't be accessed using a proxy
   2532 
   2533     This function uses the MacOSX framework SystemConfiguration
   2534     to fetch the proxy information.
   2535 
   2536     proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
   2537     { 'exclude_simple': bool,
   2538       'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
   2539     }
   2540     """
   2541     from fnmatch import fnmatch
   2542 
   2543     hostonly, port = splitport(host)
   2544 
   2545     def ip2num(ipAddr):
   2546         parts = ipAddr.split('.')
   2547         parts = list(map(int, parts))
   2548         if len(parts) != 4:
   2549             parts = (parts + [0, 0, 0, 0])[:4]
   2550         return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
   2551 
   2552     # Check for simple host names:
   2553     if '.' not in host:
   2554         if proxy_settings['exclude_simple']:
   2555             return True
   2556 
   2557     hostIP = None
   2558 
   2559     for value in proxy_settings.get('exceptions', ()):
   2560         # Items in the list are strings like these: *.local, 169.254/16
   2561         if not value: continue
   2562 
   2563         m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
   2564         if m is not None:
   2565             if hostIP is None:
   2566                 try:
   2567                     hostIP = socket.gethostbyname(hostonly)
   2568                     hostIP = ip2num(hostIP)
   2569                 except OSError:
   2570                     continue
   2571 
   2572             base = ip2num(m.group(1))
   2573             mask = m.group(2)
   2574             if mask is None:
   2575                 mask = 8 * (m.group(1).count('.') + 1)
   2576             else:
   2577                 mask = int(mask[1:])
   2578             mask = 32 - mask
   2579 
   2580             if (hostIP >> mask) == (base >> mask):
   2581                 return True
   2582 
   2583         elif fnmatch(host, value):
   2584             return True
   2585 
   2586     return False
   2587 
   2588 
   2589 if sys.platform == 'darwin':
   2590     from _scproxy import _get_proxy_settings, _get_proxies
   2591 
   2592     def proxy_bypass_macosx_sysconf(host):
   2593         proxy_settings = _get_proxy_settings()
   2594         return _proxy_bypass_macosx_sysconf(host, proxy_settings)
   2595 
   2596     def getproxies_macosx_sysconf():
   2597         """Return a dictionary of scheme -> proxy server URL mappings.
   2598 
   2599         This function uses the MacOSX framework SystemConfiguration
   2600         to fetch the proxy information.
   2601         """
   2602         return _get_proxies()
   2603 
   2604 
   2605 
   2606     def proxy_bypass(host):
   2607         """Return True, if host should be bypassed.
   2608 
   2609         Checks proxy settings gathered from the environment, if specified,
   2610         or from the MacOSX framework SystemConfiguration.
   2611 
   2612         """
   2613         proxies = getproxies_environment()
   2614         if proxies:
   2615             return proxy_bypass_environment(host, proxies)
   2616         else:
   2617             return proxy_bypass_macosx_sysconf(host)
   2618 
   2619     def getproxies():
   2620         return getproxies_environment() or getproxies_macosx_sysconf()
   2621 
   2622 
   2623 elif os.name == 'nt':
   2624     def getproxies_registry():
   2625         """Return a dictionary of scheme -> proxy server URL mappings.
   2626 
   2627         Win32 uses the registry to store proxies.
   2628 
   2629         """
   2630         proxies = {}
   2631         try:
   2632             import winreg
   2633         except ImportError:
   2634             # Std module, so should be around - but you never know!
   2635             return proxies
   2636         try:
   2637             internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
   2638                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
   2639             proxyEnable = winreg.QueryValueEx(internetSettings,
   2640                                                'ProxyEnable')[0]
   2641             if proxyEnable:
   2642                 # Returned as Unicode but problems if not converted to ASCII
   2643                 proxyServer = str(winreg.QueryValueEx(internetSettings,
   2644                                                        'ProxyServer')[0])
   2645                 if '=' in proxyServer:
   2646                     # Per-protocol settings
   2647                     for p in proxyServer.split(';'):
   2648                         protocol, address = p.split('=', 1)
   2649                         # See if address has a type:// prefix
   2650                         if not re.match('^([^/:]+)://', address):
   2651                             address = '%s://%s' % (protocol, address)
   2652                         proxies[protocol] = address
   2653                 else:
   2654                     # Use one setting for all protocols
   2655                     if proxyServer[:5] == 'http:':
   2656                         proxies['http'] = proxyServer
   2657                     else:
   2658                         proxies['http'] = 'http://%s' % proxyServer
   2659                         proxies['https'] = 'https://%s' % proxyServer
   2660                         proxies['ftp'] = 'ftp://%s' % proxyServer
   2661             internetSettings.Close()
   2662         except (OSError, ValueError, TypeError):
   2663             # Either registry key not found etc, or the value in an
   2664             # unexpected format.
   2665             # proxies already set up to be empty so nothing to do
   2666             pass
   2667         return proxies
   2668 
   2669     def getproxies():
   2670         """Return a dictionary of scheme -> proxy server URL mappings.
   2671 
   2672         Returns settings gathered from the environment, if specified,
   2673         or the registry.
   2674 
   2675         """
   2676         return getproxies_environment() or getproxies_registry()
   2677 
   2678     def proxy_bypass_registry(host):
   2679         try:
   2680             import winreg
   2681         except ImportError:
   2682             # Std modules, so should be around - but you never know!
   2683             return 0
   2684         try:
   2685             internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
   2686                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
   2687             proxyEnable = winreg.QueryValueEx(internetSettings,
   2688                                                'ProxyEnable')[0]
   2689             proxyOverride = str(winreg.QueryValueEx(internetSettings,
   2690                                                      'ProxyOverride')[0])
   2691             # ^^^^ Returned as Unicode but problems if not converted to ASCII
   2692         except OSError:
   2693             return 0
   2694         if not proxyEnable or not proxyOverride:
   2695             return 0
   2696         # try to make a host list from name and IP address.
   2697         rawHost, port = splitport(host)
   2698         host = [rawHost]
   2699         try:
   2700             addr = socket.gethostbyname(rawHost)
   2701             if addr != rawHost:
   2702                 host.append(addr)
   2703         except OSError:
   2704             pass
   2705         try:
   2706             fqdn = socket.getfqdn(rawHost)
   2707             if fqdn != rawHost:
   2708                 host.append(fqdn)
   2709         except OSError:
   2710             pass
   2711         # make a check value list from the registry entry: replace the
   2712         # '<local>' string by the localhost entry and the corresponding
   2713         # canonical entry.
   2714         proxyOverride = proxyOverride.split(';')
   2715         # now check if we match one of the registry values.
   2716         for test in proxyOverride:
   2717             if test == '<local>':
   2718                 if '.' not in rawHost:
   2719                     return 1
   2720             test = test.replace(".", r"\.")     # mask dots
   2721             test = test.replace("*", r".*")     # change glob sequence
   2722             test = test.replace("?", r".")      # change glob char
   2723             for val in host:
   2724                 if re.match(test, val, re.I):
   2725                     return 1
   2726         return 0
   2727 
   2728     def proxy_bypass(host):
   2729         """Return True, if host should be bypassed.
   2730 
   2731         Checks proxy settings gathered from the environment, if specified,
   2732         or the registry.
   2733 
   2734         """
   2735         proxies = getproxies_environment()
   2736         if proxies:
   2737             return proxy_bypass_environment(host, proxies)
   2738         else:
   2739             return proxy_bypass_registry(host)
   2740 
   2741 else:
   2742     # By default use environment variables
   2743     getproxies = getproxies_environment
   2744     proxy_bypass = proxy_bypass_environment
   2745