Home | History | Annotate | Download | only in test
      1 """Regression tests for what was in Python 2's "urllib" module"""
      2 
      3 import urllib.parse
      4 import urllib.request
      5 import urllib.error
      6 import http.client
      7 import email.message
      8 import io
      9 import unittest
     10 from unittest.mock import patch
     11 from test import support
     12 import os
     13 try:
     14     import ssl
     15 except ImportError:
     16     ssl = None
     17 import sys
     18 import tempfile
     19 from nturl2path import url2pathname, pathname2url
     20 
     21 from base64 import b64encode
     22 import collections
     23 
     24 
     25 def hexescape(char):
     26     """Escape char as RFC 2396 specifies"""
     27     hex_repr = hex(ord(char))[2:].upper()
     28     if len(hex_repr) == 1:
     29         hex_repr = "0%s" % hex_repr
     30     return "%" + hex_repr
     31 
     32 # Shortcut for testing FancyURLopener
     33 _urlopener = None
     34 
     35 
     36 def urlopen(url, data=None, proxies=None):
     37     """urlopen(url [, data]) -> open file-like object"""
     38     global _urlopener
     39     if proxies is not None:
     40         opener = urllib.request.FancyURLopener(proxies=proxies)
     41     elif not _urlopener:
     42         opener = FancyURLopener()
     43         _urlopener = opener
     44     else:
     45         opener = _urlopener
     46     if data is None:
     47         return opener.open(url)
     48     else:
     49         return opener.open(url, data)
     50 
     51 
     52 def FancyURLopener():
     53     with support.check_warnings(
     54             ('FancyURLopener style of invoking requests is deprecated.',
     55             DeprecationWarning)):
     56         return urllib.request.FancyURLopener()
     57 
     58 
     59 def fakehttp(fakedata):
     60     class FakeSocket(io.BytesIO):
     61         io_refs = 1
     62 
     63         def sendall(self, data):
     64             FakeHTTPConnection.buf = data
     65 
     66         def makefile(self, *args, **kwds):
     67             self.io_refs += 1
     68             return self
     69 
     70         def read(self, amt=None):
     71             if self.closed:
     72                 return b""
     73             return io.BytesIO.read(self, amt)
     74 
     75         def readline(self, length=None):
     76             if self.closed:
     77                 return b""
     78             return io.BytesIO.readline(self, length)
     79 
     80         def close(self):
     81             self.io_refs -= 1
     82             if self.io_refs == 0:
     83                 io.BytesIO.close(self)
     84 
     85     class FakeHTTPConnection(http.client.HTTPConnection):
     86 
     87         # buffer to store data for verification in urlopen tests.
     88         buf = None
     89 
     90         def connect(self):
     91             self.sock = FakeSocket(self.fakedata)
     92             type(self).fakesock = self.sock
     93     FakeHTTPConnection.fakedata = fakedata
     94 
     95     return FakeHTTPConnection
     96 
     97 
     98 class FakeHTTPMixin(object):
     99     def fakehttp(self, fakedata):
    100         self._connection_class = http.client.HTTPConnection
    101         http.client.HTTPConnection = fakehttp(fakedata)
    102 
    103     def unfakehttp(self):
    104         http.client.HTTPConnection = self._connection_class
    105 
    106 
    107 class FakeFTPMixin(object):
    108     def fakeftp(self):
    109         class FakeFtpWrapper(object):
    110             def __init__(self,  user, passwd, host, port, dirs, timeout=None,
    111                      persistent=True):
    112                 pass
    113 
    114             def retrfile(self, file, type):
    115                 return io.BytesIO(), 0
    116 
    117             def close(self):
    118                 pass
    119 
    120         self._ftpwrapper_class = urllib.request.ftpwrapper
    121         urllib.request.ftpwrapper = FakeFtpWrapper
    122 
    123     def unfakeftp(self):
    124         urllib.request.ftpwrapper = self._ftpwrapper_class
    125 
    126 
    127 class urlopen_FileTests(unittest.TestCase):
    128     """Test urlopen() opening a temporary file.
    129 
    130     Try to test as much functionality as possible so as to cut down on reliance
    131     on connecting to the Net for testing.
    132 
    133     """
    134 
    135     def setUp(self):
    136         # Create a temp file to use for testing
    137         self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
    138                           "ascii")
    139         f = open(support.TESTFN, 'wb')
    140         try:
    141             f.write(self.text)
    142         finally:
    143             f.close()
    144         self.pathname = support.TESTFN
    145         self.returned_obj = urlopen("file:%s" % self.pathname)
    146 
    147     def tearDown(self):
    148         """Shut down the open object"""
    149         self.returned_obj.close()
    150         os.remove(support.TESTFN)
    151 
    152     def test_interface(self):
    153         # Make sure object returned by urlopen() has the specified methods
    154         for attr in ("read", "readline", "readlines", "fileno",
    155                      "close", "info", "geturl", "getcode", "__iter__"):
    156             self.assertTrue(hasattr(self.returned_obj, attr),
    157                          "object returned by urlopen() lacks %s attribute" %
    158                          attr)
    159 
    160     def test_read(self):
    161         self.assertEqual(self.text, self.returned_obj.read())
    162 
    163     def test_readline(self):
    164         self.assertEqual(self.text, self.returned_obj.readline())
    165         self.assertEqual(b'', self.returned_obj.readline(),
    166                          "calling readline() after exhausting the file did not"
    167                          " return an empty string")
    168 
    169     def test_readlines(self):
    170         lines_list = self.returned_obj.readlines()
    171         self.assertEqual(len(lines_list), 1,
    172                          "readlines() returned the wrong number of lines")
    173         self.assertEqual(lines_list[0], self.text,
    174                          "readlines() returned improper text")
    175 
    176     def test_fileno(self):
    177         file_num = self.returned_obj.fileno()
    178         self.assertIsInstance(file_num, int, "fileno() did not return an int")
    179         self.assertEqual(os.read(file_num, len(self.text)), self.text,
    180                          "Reading on the file descriptor returned by fileno() "
    181                          "did not return the expected text")
    182 
    183     def test_close(self):
    184         # Test close() by calling it here and then having it be called again
    185         # by the tearDown() method for the test
    186         self.returned_obj.close()
    187 
    188     def test_info(self):
    189         self.assertIsInstance(self.returned_obj.info(), email.message.Message)
    190 
    191     def test_geturl(self):
    192         self.assertEqual(self.returned_obj.geturl(), self.pathname)
    193 
    194     def test_getcode(self):
    195         self.assertIsNone(self.returned_obj.getcode())
    196 
    197     def test_iter(self):
    198         # Test iterator
    199         # Don't need to count number of iterations since test would fail the
    200         # instant it returned anything beyond the first line from the
    201         # comparison.
    202         # Use the iterator in the usual implicit way to test for ticket #4608.
    203         for line in self.returned_obj:
    204             self.assertEqual(line, self.text)
    205 
    206     def test_relativelocalfile(self):
    207         self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
    208 
    209 class ProxyTests(unittest.TestCase):
    210 
    211     def setUp(self):
    212         # Records changes to env vars
    213         self.env = support.EnvironmentVarGuard()
    214         # Delete all proxy related env vars
    215         for k in list(os.environ):
    216             if 'proxy' in k.lower():
    217                 self.env.unset(k)
    218 
    219     def tearDown(self):
    220         # Restore all proxy related env vars
    221         self.env.__exit__()
    222         del self.env
    223 
    224     def test_getproxies_environment_keep_no_proxies(self):
    225         self.env.set('NO_PROXY', 'localhost')
    226         proxies = urllib.request.getproxies_environment()
    227         # getproxies_environment use lowered case truncated (no '_proxy') keys
    228         self.assertEqual('localhost', proxies['no'])
    229         # List of no_proxies with space.
    230         self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
    231         self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
    232         self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
    233         self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
    234 
    235     def test_proxy_cgi_ignore(self):
    236         try:
    237             self.env.set('HTTP_PROXY', 'http://somewhere:3128')
    238             proxies = urllib.request.getproxies_environment()
    239             self.assertEqual('http://somewhere:3128', proxies['http'])
    240             self.env.set('REQUEST_METHOD', 'GET')
    241             proxies = urllib.request.getproxies_environment()
    242             self.assertNotIn('http', proxies)
    243         finally:
    244             self.env.unset('REQUEST_METHOD')
    245             self.env.unset('HTTP_PROXY')
    246 
    247     def test_proxy_bypass_environment_host_match(self):
    248         bypass = urllib.request.proxy_bypass_environment
    249         self.env.set('NO_PROXY',
    250                      'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
    251         self.assertTrue(bypass('localhost'))
    252         self.assertTrue(bypass('LocalHost'))                 # MixedCase
    253         self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
    254         self.assertTrue(bypass('newdomain.com:1234'))
    255         self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
    256         self.assertTrue(bypass('anotherdomain.com:8888'))
    257         self.assertTrue(bypass('www.newdomain.com:1234'))
    258         self.assertFalse(bypass('prelocalhost'))
    259         self.assertFalse(bypass('newdomain.com'))            # no port
    260         self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
    261 
    262 class ProxyTests_withOrderedEnv(unittest.TestCase):
    263 
    264     def setUp(self):
    265         # We need to test conditions, where variable order _is_ significant
    266         self._saved_env = os.environ
    267         # Monkey patch os.environ, start with empty fake environment
    268         os.environ = collections.OrderedDict()
    269 
    270     def tearDown(self):
    271         os.environ = self._saved_env
    272 
    273     def test_getproxies_environment_prefer_lowercase(self):
    274         # Test lowercase preference with removal
    275         os.environ['no_proxy'] = ''
    276         os.environ['No_Proxy'] = 'localhost'
    277         self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
    278         self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
    279         os.environ['http_proxy'] = ''
    280         os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
    281         proxies = urllib.request.getproxies_environment()
    282         self.assertEqual({}, proxies)
    283         # Test lowercase preference of proxy bypass and correct matching including ports
    284         os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
    285         os.environ['No_Proxy'] = 'xyz.com'
    286         self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
    287         self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
    288         self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
    289         self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
    290         self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
    291         # Test lowercase preference with replacement
    292         os.environ['http_proxy'] = 'http://somewhere:3128'
    293         os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
    294         proxies = urllib.request.getproxies_environment()
    295         self.assertEqual('http://somewhere:3128', proxies['http'])
    296 
    297 class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
    298     """Test urlopen() opening a fake http connection."""
    299 
    300     def check_read(self, ver):
    301         self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
    302         try:
    303             fp = urlopen("http://python.org/")
    304             self.assertEqual(fp.readline(), b"Hello!")
    305             self.assertEqual(fp.readline(), b"")
    306             self.assertEqual(fp.geturl(), 'http://python.org/')
    307             self.assertEqual(fp.getcode(), 200)
    308         finally:
    309             self.unfakehttp()
    310 
    311     def test_url_fragment(self):
    312         # Issue #11703: geturl() omits fragments in the original URL.
    313         url = 'http://docs.python.org/library/urllib.html#OK'
    314         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
    315         try:
    316             fp = urllib.request.urlopen(url)
    317             self.assertEqual(fp.geturl(), url)
    318         finally:
    319             self.unfakehttp()
    320 
    321     def test_willclose(self):
    322         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
    323         try:
    324             resp = urlopen("http://www.python.org")
    325             self.assertTrue(resp.fp.will_close)
    326         finally:
    327             self.unfakehttp()
    328 
    329     def test_read_0_9(self):
    330         # "0.9" response accepted (but not "simple responses" without
    331         # a status line)
    332         self.check_read(b"0.9")
    333 
    334     def test_read_1_0(self):
    335         self.check_read(b"1.0")
    336 
    337     def test_read_1_1(self):
    338         self.check_read(b"1.1")
    339 
    340     def test_read_bogus(self):
    341         # urlopen() should raise OSError for many error codes.
    342         self.fakehttp(b'''HTTP/1.1 401 Authentication Required
    343 Date: Wed, 02 Jan 2008 03:03:54 GMT
    344 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    345 Connection: close
    346 Content-Type: text/html; charset=iso-8859-1
    347 ''')
    348         try:
    349             self.assertRaises(OSError, urlopen, "http://python.org/")
    350         finally:
    351             self.unfakehttp()
    352 
    353     def test_invalid_redirect(self):
    354         # urlopen() should raise OSError for many error codes.
    355         self.fakehttp(b'''HTTP/1.1 302 Found
    356 Date: Wed, 02 Jan 2008 03:03:54 GMT
    357 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    358 Location: file://guidocomputer.athome.com:/python/license
    359 Connection: close
    360 Content-Type: text/html; charset=iso-8859-1
    361 ''')
    362         try:
    363             msg = "Redirection to url 'file:"
    364             with self.assertRaisesRegex(urllib.error.HTTPError, msg):
    365                 urlopen("http://python.org/")
    366         finally:
    367             self.unfakehttp()
    368 
    369     def test_redirect_limit_independent(self):
    370         # Ticket #12923: make sure independent requests each use their
    371         # own retry limit.
    372         for i in range(FancyURLopener().maxtries):
    373             self.fakehttp(b'''HTTP/1.1 302 Found
    374 Location: file://guidocomputer.athome.com:/python/license
    375 Connection: close
    376 ''')
    377             try:
    378                 self.assertRaises(urllib.error.HTTPError, urlopen,
    379                     "http://something")
    380             finally:
    381                 self.unfakehttp()
    382 
    383     def test_empty_socket(self):
    384         # urlopen() raises OSError if the underlying socket does not send any
    385         # data. (#1680230)
    386         self.fakehttp(b'')
    387         try:
    388             self.assertRaises(OSError, urlopen, "http://something")
    389         finally:
    390             self.unfakehttp()
    391 
    392     def test_missing_localfile(self):
    393         # Test for #10836
    394         with self.assertRaises(urllib.error.URLError) as e:
    395             urlopen('file://localhost/a/file/which/doesnot/exists.py')
    396         self.assertTrue(e.exception.filename)
    397         self.assertTrue(e.exception.reason)
    398 
    399     def test_file_notexists(self):
    400         fd, tmp_file = tempfile.mkstemp()
    401         tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
    402         try:
    403             self.assertTrue(os.path.exists(tmp_file))
    404             with urlopen(tmp_fileurl) as fobj:
    405                 self.assertTrue(fobj)
    406         finally:
    407             os.close(fd)
    408             os.unlink(tmp_file)
    409         self.assertFalse(os.path.exists(tmp_file))
    410         with self.assertRaises(urllib.error.URLError):
    411             urlopen(tmp_fileurl)
    412 
    413     def test_ftp_nohost(self):
    414         test_ftp_url = 'ftp:///path'
    415         with self.assertRaises(urllib.error.URLError) as e:
    416             urlopen(test_ftp_url)
    417         self.assertFalse(e.exception.filename)
    418         self.assertTrue(e.exception.reason)
    419 
    420     def test_ftp_nonexisting(self):
    421         with self.assertRaises(urllib.error.URLError) as e:
    422             urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
    423         self.assertFalse(e.exception.filename)
    424         self.assertTrue(e.exception.reason)
    425 
    426     @patch.object(urllib.request, 'MAXFTPCACHE', 0)
    427     def test_ftp_cache_pruning(self):
    428         self.fakeftp()
    429         try:
    430             urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
    431             urlopen('ftp://localhost')
    432         finally:
    433             self.unfakeftp()
    434 
    435 
    436     def test_userpass_inurl(self):
    437         self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
    438         try:
    439             fp = urlopen("http://user:pass@python.org/")
    440             self.assertEqual(fp.readline(), b"Hello!")
    441             self.assertEqual(fp.readline(), b"")
    442             self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
    443             self.assertEqual(fp.getcode(), 200)
    444         finally:
    445             self.unfakehttp()
    446 
    447     def test_userpass_inurl_w_spaces(self):
    448         self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
    449         try:
    450             userpass = "a b:c d"
    451             url = "http://{}@python.org/".format(userpass)
    452             fakehttp_wrapper = http.client.HTTPConnection
    453             authorization = ("Authorization: Basic %s\r\n" %
    454                              b64encode(userpass.encode("ASCII")).decode("ASCII"))
    455             fp = urlopen(url)
    456             # The authorization header must be in place
    457             self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
    458             self.assertEqual(fp.readline(), b"Hello!")
    459             self.assertEqual(fp.readline(), b"")
    460             # the spaces are quoted in URL so no match
    461             self.assertNotEqual(fp.geturl(), url)
    462             self.assertEqual(fp.getcode(), 200)
    463         finally:
    464             self.unfakehttp()
    465 
    466     def test_URLopener_deprecation(self):
    467         with support.check_warnings(('',DeprecationWarning)):
    468             urllib.request.URLopener()
    469 
    470     @unittest.skipUnless(ssl, "ssl module required")
    471     def test_cafile_and_context(self):
    472         context = ssl.create_default_context()
    473         with support.check_warnings(('', DeprecationWarning)):
    474             with self.assertRaises(ValueError):
    475                 urllib.request.urlopen(
    476                     "https://localhost", cafile="/nonexistent/path", context=context
    477                 )
    478 
    479 class urlopen_DataTests(unittest.TestCase):
    480     """Test urlopen() opening a data URL."""
    481 
    482     def setUp(self):
    483         # text containing URL special- and unicode-characters
    484         self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
    485         # 2x1 pixel RGB PNG image with one black and one white pixel
    486         self.image = (
    487             b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
    488             b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
    489             b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
    490             b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
    491 
    492         self.text_url = (
    493             "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
    494             "D%26%20%C3%B6%20%C3%84%20")
    495         self.text_url_base64 = (
    496             "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
    497             "sJT0mIPYgxCA%3D")
    498         # base64 encoded data URL that contains ignorable spaces,
    499         # such as "\n", " ", "%0A", and "%20".
    500         self.image_url = (
    501             "\n"
    502             "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
    503             "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
    504 
    505         self.text_url_resp = urllib.request.urlopen(self.text_url)
    506         self.text_url_base64_resp = urllib.request.urlopen(
    507             self.text_url_base64)
    508         self.image_url_resp = urllib.request.urlopen(self.image_url)
    509 
    510     def test_interface(self):
    511         # Make sure object returned by urlopen() has the specified methods
    512         for attr in ("read", "readline", "readlines",
    513                      "close", "info", "geturl", "getcode", "__iter__"):
    514             self.assertTrue(hasattr(self.text_url_resp, attr),
    515                          "object returned by urlopen() lacks %s attribute" %
    516                          attr)
    517 
    518     def test_info(self):
    519         self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
    520         self.assertEqual(self.text_url_base64_resp.info().get_params(),
    521             [('text/plain', ''), ('charset', 'ISO-8859-1')])
    522         self.assertEqual(self.image_url_resp.info()['content-length'],
    523             str(len(self.image)))
    524         self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
    525             [('text/plain', ''), ('charset', 'US-ASCII')])
    526 
    527     def test_geturl(self):
    528         self.assertEqual(self.text_url_resp.geturl(), self.text_url)
    529         self.assertEqual(self.text_url_base64_resp.geturl(),
    530             self.text_url_base64)
    531         self.assertEqual(self.image_url_resp.geturl(), self.image_url)
    532 
    533     def test_read_text(self):
    534         self.assertEqual(self.text_url_resp.read().decode(
    535             dict(self.text_url_resp.info().get_params())['charset']), self.text)
    536 
    537     def test_read_text_base64(self):
    538         self.assertEqual(self.text_url_base64_resp.read().decode(
    539             dict(self.text_url_base64_resp.info().get_params())['charset']),
    540             self.text)
    541 
    542     def test_read_image(self):
    543         self.assertEqual(self.image_url_resp.read(), self.image)
    544 
    545     def test_missing_comma(self):
    546         self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
    547 
    548     def test_invalid_base64_data(self):
    549         # missing padding character
    550         self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
    551 
    552 class urlretrieve_FileTests(unittest.TestCase):
    553     """Test urllib.urlretrieve() on local files"""
    554 
    555     def setUp(self):
    556         # Create a list of temporary files. Each item in the list is a file
    557         # name (absolute path or relative to the current working directory).
    558         # All files in this list will be deleted in the tearDown method. Note,
    559         # this only helps to makes sure temporary files get deleted, but it
    560         # does nothing about trying to close files that may still be open. It
    561         # is the responsibility of the developer to properly close files even
    562         # when exceptional conditions occur.
    563         self.tempFiles = []
    564 
    565         # Create a temporary file.
    566         self.registerFileForCleanUp(support.TESTFN)
    567         self.text = b'testing urllib.urlretrieve'
    568         try:
    569             FILE = open(support.TESTFN, 'wb')
    570             FILE.write(self.text)
    571             FILE.close()
    572         finally:
    573             try: FILE.close()
    574             except: pass
    575 
    576     def tearDown(self):
    577         # Delete the temporary files.
    578         for each in self.tempFiles:
    579             try: os.remove(each)
    580             except: pass
    581 
    582     def constructLocalFileUrl(self, filePath):
    583         filePath = os.path.abspath(filePath)
    584         try:
    585             filePath.encode("utf-8")
    586         except UnicodeEncodeError:
    587             raise unittest.SkipTest("filePath is not encodable to utf8")
    588         return "file://%s" % urllib.request.pathname2url(filePath)
    589 
    590     def createNewTempFile(self, data=b""):
    591         """Creates a new temporary file containing the specified data,
    592         registers the file for deletion during the test fixture tear down, and
    593         returns the absolute path of the file."""
    594 
    595         newFd, newFilePath = tempfile.mkstemp()
    596         try:
    597             self.registerFileForCleanUp(newFilePath)
    598             newFile = os.fdopen(newFd, "wb")
    599             newFile.write(data)
    600             newFile.close()
    601         finally:
    602             try: newFile.close()
    603             except: pass
    604         return newFilePath
    605 
    606     def registerFileForCleanUp(self, fileName):
    607         self.tempFiles.append(fileName)
    608 
    609     def test_basic(self):
    610         # Make sure that a local file just gets its own location returned and
    611         # a headers value is returned.
    612         result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
    613         self.assertEqual(result[0], support.TESTFN)
    614         self.assertIsInstance(result[1], email.message.Message,
    615                               "did not get an email.message.Message instance "
    616                               "as second returned value")
    617 
    618     def test_copy(self):
    619         # Test that setting the filename argument works.
    620         second_temp = "%s.2" % support.TESTFN
    621         self.registerFileForCleanUp(second_temp)
    622         result = urllib.request.urlretrieve(self.constructLocalFileUrl(
    623             support.TESTFN), second_temp)
    624         self.assertEqual(second_temp, result[0])
    625         self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
    626                                                   "made")
    627         FILE = open(second_temp, 'rb')
    628         try:
    629             text = FILE.read()
    630             FILE.close()
    631         finally:
    632             try: FILE.close()
    633             except: pass
    634         self.assertEqual(self.text, text)
    635 
    636     def test_reporthook(self):
    637         # Make sure that the reporthook works.
    638         def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
    639             self.assertIsInstance(block_count, int)
    640             self.assertIsInstance(block_read_size, int)
    641             self.assertIsInstance(file_size, int)
    642             self.assertEqual(block_count, count_holder[0])
    643             count_holder[0] = count_holder[0] + 1
    644         second_temp = "%s.2" % support.TESTFN
    645         self.registerFileForCleanUp(second_temp)
    646         urllib.request.urlretrieve(
    647             self.constructLocalFileUrl(support.TESTFN),
    648             second_temp, hooktester)
    649 
    650     def test_reporthook_0_bytes(self):
    651         # Test on zero length file. Should call reporthook only 1 time.
    652         report = []
    653         def hooktester(block_count, block_read_size, file_size, _report=report):
    654             _report.append((block_count, block_read_size, file_size))
    655         srcFileName = self.createNewTempFile()
    656         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
    657             support.TESTFN, hooktester)
    658         self.assertEqual(len(report), 1)
    659         self.assertEqual(report[0][2], 0)
    660 
    661     def test_reporthook_5_bytes(self):
    662         # Test on 5 byte file. Should call reporthook only 2 times (once when
    663         # the "network connection" is established and once when the block is
    664         # read).
    665         report = []
    666         def hooktester(block_count, block_read_size, file_size, _report=report):
    667             _report.append((block_count, block_read_size, file_size))
    668         srcFileName = self.createNewTempFile(b"x" * 5)
    669         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
    670             support.TESTFN, hooktester)
    671         self.assertEqual(len(report), 2)
    672         self.assertEqual(report[0][2], 5)
    673         self.assertEqual(report[1][2], 5)
    674 
    675     def test_reporthook_8193_bytes(self):
    676         # Test on 8193 byte file. Should call reporthook only 3 times (once
    677         # when the "network connection" is established, once for the next 8192
    678         # bytes, and once for the last byte).
    679         report = []
    680         def hooktester(block_count, block_read_size, file_size, _report=report):
    681             _report.append((block_count, block_read_size, file_size))
    682         srcFileName = self.createNewTempFile(b"x" * 8193)
    683         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
    684             support.TESTFN, hooktester)
    685         self.assertEqual(len(report), 3)
    686         self.assertEqual(report[0][2], 8193)
    687         self.assertEqual(report[0][1], 8192)
    688         self.assertEqual(report[1][1], 8192)
    689         self.assertEqual(report[2][1], 8192)
    690 
    691 
    692 class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
    693     """Test urllib.urlretrieve() using fake http connections"""
    694 
    695     def test_short_content_raises_ContentTooShortError(self):
    696         self.fakehttp(b'''HTTP/1.1 200 OK
    697 Date: Wed, 02 Jan 2008 03:03:54 GMT
    698 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    699 Connection: close
    700 Content-Length: 100
    701 Content-Type: text/html; charset=iso-8859-1
    702 
    703 FF
    704 ''')
    705 
    706         def _reporthook(par1, par2, par3):
    707             pass
    708 
    709         with self.assertRaises(urllib.error.ContentTooShortError):
    710             try:
    711                 urllib.request.urlretrieve('http://example.com/',
    712                                            reporthook=_reporthook)
    713             finally:
    714                 self.unfakehttp()
    715 
    716     def test_short_content_raises_ContentTooShortError_without_reporthook(self):
    717         self.fakehttp(b'''HTTP/1.1 200 OK
    718 Date: Wed, 02 Jan 2008 03:03:54 GMT
    719 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    720 Connection: close
    721 Content-Length: 100
    722 Content-Type: text/html; charset=iso-8859-1
    723 
    724 FF
    725 ''')
    726         with self.assertRaises(urllib.error.ContentTooShortError):
    727             try:
    728                 urllib.request.urlretrieve('http://example.com/')
    729             finally:
    730                 self.unfakehttp()
    731 
    732 
    733 class QuotingTests(unittest.TestCase):
    734     r"""Tests for urllib.quote() and urllib.quote_plus()
    735 
    736     According to RFC 2396 (Uniform Resource Identifiers), to escape a
    737     character you write it as '%' + <2 character US-ASCII hex value>.
    738     The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
    739     character properly. Case does not matter on the hex letters.
    740 
    741     The various character sets specified are:
    742 
    743     Reserved characters : ";/?:@&=+$,"
    744         Have special meaning in URIs and must be escaped if not being used for
    745         their special meaning
    746     Data characters : letters, digits, and "-_.!~*'()"
    747         Unreserved and do not need to be escaped; can be, though, if desired
    748     Control characters : 0x00 - 0x1F, 0x7F
    749         Have no use in URIs so must be escaped
    750     space : 0x20
    751         Must be escaped
    752     Delimiters : '<>#%"'
    753         Must be escaped
    754     Unwise : "{}|\^[]`"
    755         Must be escaped
    756 
    757     """
    758 
    759     def test_never_quote(self):
    760         # Make sure quote() does not quote letters, digits, and "_,.-"
    761         do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
    762                                  "abcdefghijklmnopqrstuvwxyz",
    763                                  "0123456789",
    764                                  "_.-"])
    765         result = urllib.parse.quote(do_not_quote)
    766         self.assertEqual(do_not_quote, result,
    767                          "using quote(): %r != %r" % (do_not_quote, result))
    768         result = urllib.parse.quote_plus(do_not_quote)
    769         self.assertEqual(do_not_quote, result,
    770                         "using quote_plus(): %r != %r" % (do_not_quote, result))
    771 
    772     def test_default_safe(self):
    773         # Test '/' is default value for 'safe' parameter
    774         self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
    775 
    776     def test_safe(self):
    777         # Test setting 'safe' parameter does what it should do
    778         quote_by_default = "<>"
    779         result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
    780         self.assertEqual(quote_by_default, result,
    781                          "using quote(): %r != %r" % (quote_by_default, result))
    782         result = urllib.parse.quote_plus(quote_by_default,
    783                                          safe=quote_by_default)
    784         self.assertEqual(quote_by_default, result,
    785                          "using quote_plus(): %r != %r" %
    786                          (quote_by_default, result))
    787         # Safe expressed as bytes rather than str
    788         result = urllib.parse.quote(quote_by_default, safe=b"<>")
    789         self.assertEqual(quote_by_default, result,
    790                          "using quote(): %r != %r" % (quote_by_default, result))
    791         # "Safe" non-ASCII characters should have no effect
    792         # (Since URIs are not allowed to have non-ASCII characters)
    793         result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
    794         expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
    795         self.assertEqual(expect, result,
    796                          "using quote(): %r != %r" %
    797                          (expect, result))
    798         # Same as above, but using a bytes rather than str
    799         result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
    800         expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
    801         self.assertEqual(expect, result,
    802                          "using quote(): %r != %r" %
    803                          (expect, result))
    804 
    805     def test_default_quoting(self):
    806         # Make sure all characters that should be quoted are by default sans
    807         # space (separate test for that).
    808         should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
    809         should_quote.append(r'<>#%"{}|\^[]`')
    810         should_quote.append(chr(127)) # For 0x7F
    811         should_quote = ''.join(should_quote)
    812         for char in should_quote:
    813             result = urllib.parse.quote(char)
    814             self.assertEqual(hexescape(char), result,
    815                              "using quote(): "
    816                              "%s should be escaped to %s, not %s" %
    817                              (char, hexescape(char), result))
    818             result = urllib.parse.quote_plus(char)
    819             self.assertEqual(hexescape(char), result,
    820                              "using quote_plus(): "
    821                              "%s should be escapes to %s, not %s" %
    822                              (char, hexescape(char), result))
    823         del should_quote
    824         partial_quote = "ab[]cd"
    825         expected = "ab%5B%5Dcd"
    826         result = urllib.parse.quote(partial_quote)
    827         self.assertEqual(expected, result,
    828                          "using quote(): %r != %r" % (expected, result))
    829         result = urllib.parse.quote_plus(partial_quote)
    830         self.assertEqual(expected, result,
    831                          "using quote_plus(): %r != %r" % (expected, result))
    832 
    833     def test_quoting_space(self):
    834         # Make sure quote() and quote_plus() handle spaces as specified in
    835         # their unique way
    836         result = urllib.parse.quote(' ')
    837         self.assertEqual(result, hexescape(' '),
    838                          "using quote(): %r != %r" % (result, hexescape(' ')))
    839         result = urllib.parse.quote_plus(' ')
    840         self.assertEqual(result, '+',
    841                          "using quote_plus(): %r != +" % result)
    842         given = "a b cd e f"
    843         expect = given.replace(' ', hexescape(' '))
    844         result = urllib.parse.quote(given)
    845         self.assertEqual(expect, result,
    846                          "using quote(): %r != %r" % (expect, result))
    847         expect = given.replace(' ', '+')
    848         result = urllib.parse.quote_plus(given)
    849         self.assertEqual(expect, result,
    850                          "using quote_plus(): %r != %r" % (expect, result))
    851 
    852     def test_quoting_plus(self):
    853         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
    854                          'alpha%2Bbeta+gamma')
    855         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
    856                          'alpha+beta+gamma')
    857         # Test with bytes
    858         self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
    859                          'alpha%2Bbeta+gamma')
    860         # Test with safe bytes
    861         self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
    862                          'alpha+beta+gamma')
    863 
    864     def test_quote_bytes(self):
    865         # Bytes should quote directly to percent-encoded values
    866         given = b"\xa2\xd8ab\xff"
    867         expect = "%A2%D8ab%FF"
    868         result = urllib.parse.quote(given)
    869         self.assertEqual(expect, result,
    870                          "using quote(): %r != %r" % (expect, result))
    871         # Encoding argument should raise type error on bytes input
    872         self.assertRaises(TypeError, urllib.parse.quote, given,
    873                             encoding="latin-1")
    874         # quote_from_bytes should work the same
    875         result = urllib.parse.quote_from_bytes(given)
    876         self.assertEqual(expect, result,
    877                          "using quote_from_bytes(): %r != %r"
    878                          % (expect, result))
    879 
    880     def test_quote_with_unicode(self):
    881         # Characters in Latin-1 range, encoded by default in UTF-8
    882         given = "\xa2\xd8ab\xff"
    883         expect = "%C2%A2%C3%98ab%C3%BF"
    884         result = urllib.parse.quote(given)
    885         self.assertEqual(expect, result,
    886                          "using quote(): %r != %r" % (expect, result))
    887         # Characters in Latin-1 range, encoded by with None (default)
    888         result = urllib.parse.quote(given, encoding=None, errors=None)
    889         self.assertEqual(expect, result,
    890                          "using quote(): %r != %r" % (expect, result))
    891         # Characters in Latin-1 range, encoded with Latin-1
    892         given = "\xa2\xd8ab\xff"
    893         expect = "%A2%D8ab%FF"
    894         result = urllib.parse.quote(given, encoding="latin-1")
    895         self.assertEqual(expect, result,
    896                          "using quote(): %r != %r" % (expect, result))
    897         # Characters in BMP, encoded by default in UTF-8
    898         given = "\u6f22\u5b57"              # "Kanji"
    899         expect = "%E6%BC%A2%E5%AD%97"
    900         result = urllib.parse.quote(given)
    901         self.assertEqual(expect, result,
    902                          "using quote(): %r != %r" % (expect, result))
    903         # Characters in BMP, encoded with Latin-1
    904         given = "\u6f22\u5b57"
    905         self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
    906                                     encoding="latin-1")
    907         # Characters in BMP, encoded with Latin-1, with replace error handling
    908         given = "\u6f22\u5b57"
    909         expect = "%3F%3F"                   # "??"
    910         result = urllib.parse.quote(given, encoding="latin-1",
    911                                     errors="replace")
    912         self.assertEqual(expect, result,
    913                          "using quote(): %r != %r" % (expect, result))
    914         # Characters in BMP, Latin-1, with xmlcharref error handling
    915         given = "\u6f22\u5b57"
    916         expect = "%26%2328450%3B%26%2323383%3B"     # "&#28450;&#23383;"
    917         result = urllib.parse.quote(given, encoding="latin-1",
    918                                     errors="xmlcharrefreplace")
    919         self.assertEqual(expect, result,
    920                          "using quote(): %r != %r" % (expect, result))
    921 
    922     def test_quote_plus_with_unicode(self):
    923         # Encoding (latin-1) test for quote_plus
    924         given = "\xa2\xd8 \xff"
    925         expect = "%A2%D8+%FF"
    926         result = urllib.parse.quote_plus(given, encoding="latin-1")
    927         self.assertEqual(expect, result,
    928                          "using quote_plus(): %r != %r" % (expect, result))
    929         # Errors test for quote_plus
    930         given = "ab\u6f22\u5b57 cd"
    931         expect = "ab%3F%3F+cd"
    932         result = urllib.parse.quote_plus(given, encoding="latin-1",
    933                                          errors="replace")
    934         self.assertEqual(expect, result,
    935                          "using quote_plus(): %r != %r" % (expect, result))
    936 
    937 
    938 class UnquotingTests(unittest.TestCase):
    939     """Tests for unquote() and unquote_plus()
    940 
    941     See the doc string for quoting_Tests for details on quoting and such.
    942 
    943     """
    944 
    945     def test_unquoting(self):
    946         # Make sure unquoting of all ASCII values works
    947         escape_list = []
    948         for num in range(128):
    949             given = hexescape(chr(num))
    950             expect = chr(num)
    951             result = urllib.parse.unquote(given)
    952             self.assertEqual(expect, result,
    953                              "using unquote(): %r != %r" % (expect, result))
    954             result = urllib.parse.unquote_plus(given)
    955             self.assertEqual(expect, result,
    956                              "using unquote_plus(): %r != %r" %
    957                              (expect, result))
    958             escape_list.append(given)
    959         escape_string = ''.join(escape_list)
    960         del escape_list
    961         result = urllib.parse.unquote(escape_string)
    962         self.assertEqual(result.count('%'), 1,
    963                          "using unquote(): not all characters escaped: "
    964                          "%s" % result)
    965         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
    966         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
    967         with support.check_warnings(('', BytesWarning), quiet=True):
    968             self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
    969 
    970     def test_unquoting_badpercent(self):
    971         # Test unquoting on bad percent-escapes
    972         given = '%xab'
    973         expect = given
    974         result = urllib.parse.unquote(given)
    975         self.assertEqual(expect, result, "using unquote(): %r != %r"
    976                          % (expect, result))
    977         given = '%x'
    978         expect = given
    979         result = urllib.parse.unquote(given)
    980         self.assertEqual(expect, result, "using unquote(): %r != %r"
    981                          % (expect, result))
    982         given = '%'
    983         expect = given
    984         result = urllib.parse.unquote(given)
    985         self.assertEqual(expect, result, "using unquote(): %r != %r"
    986                          % (expect, result))
    987         # unquote_to_bytes
    988         given = '%xab'
    989         expect = bytes(given, 'ascii')
    990         result = urllib.parse.unquote_to_bytes(given)
    991         self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
    992                          % (expect, result))
    993         given = '%x'
    994         expect = bytes(given, 'ascii')
    995         result = urllib.parse.unquote_to_bytes(given)
    996         self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
    997                          % (expect, result))
    998         given = '%'
    999         expect = bytes(given, 'ascii')
   1000         result = urllib.parse.unquote_to_bytes(given)
   1001         self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
   1002                          % (expect, result))
   1003         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
   1004         self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
   1005 
   1006     def test_unquoting_mixed_case(self):
   1007         # Test unquoting on mixed-case hex digits in the percent-escapes
   1008         given = '%Ab%eA'
   1009         expect = b'\xab\xea'
   1010         result = urllib.parse.unquote_to_bytes(given)
   1011         self.assertEqual(expect, result,
   1012                          "using unquote_to_bytes(): %r != %r"
   1013                          % (expect, result))
   1014 
   1015     def test_unquoting_parts(self):
   1016         # Make sure unquoting works when have non-quoted characters
   1017         # interspersed
   1018         given = 'ab%sd' % hexescape('c')
   1019         expect = "abcd"
   1020         result = urllib.parse.unquote(given)
   1021         self.assertEqual(expect, result,
   1022                          "using quote(): %r != %r" % (expect, result))
   1023         result = urllib.parse.unquote_plus(given)
   1024         self.assertEqual(expect, result,
   1025                          "using unquote_plus(): %r != %r" % (expect, result))
   1026 
   1027     def test_unquoting_plus(self):
   1028         # Test difference between unquote() and unquote_plus()
   1029         given = "are+there+spaces..."
   1030         expect = given
   1031         result = urllib.parse.unquote(given)
   1032         self.assertEqual(expect, result,
   1033                          "using unquote(): %r != %r" % (expect, result))
   1034         expect = given.replace('+', ' ')
   1035         result = urllib.parse.unquote_plus(given)
   1036         self.assertEqual(expect, result,
   1037                          "using unquote_plus(): %r != %r" % (expect, result))
   1038 
   1039     def test_unquote_to_bytes(self):
   1040         given = 'br%C3%BCckner_sapporo_20050930.doc'
   1041         expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
   1042         result = urllib.parse.unquote_to_bytes(given)
   1043         self.assertEqual(expect, result,
   1044                          "using unquote_to_bytes(): %r != %r"
   1045                          % (expect, result))
   1046         # Test on a string with unescaped non-ASCII characters
   1047         # (Technically an invalid URI; expect those characters to be UTF-8
   1048         # encoded).
   1049         result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
   1050         expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
   1051         self.assertEqual(expect, result,
   1052                          "using unquote_to_bytes(): %r != %r"
   1053                          % (expect, result))
   1054         # Test with a bytes as input
   1055         given = b'%A2%D8ab%FF'
   1056         expect = b'\xa2\xd8ab\xff'
   1057         result = urllib.parse.unquote_to_bytes(given)
   1058         self.assertEqual(expect, result,
   1059                          "using unquote_to_bytes(): %r != %r"
   1060                          % (expect, result))
   1061         # Test with a bytes as input, with unescaped non-ASCII bytes
   1062         # (Technically an invalid URI; expect those bytes to be preserved)
   1063         given = b'%A2\xd8ab%FF'
   1064         expect = b'\xa2\xd8ab\xff'
   1065         result = urllib.parse.unquote_to_bytes(given)
   1066         self.assertEqual(expect, result,
   1067                          "using unquote_to_bytes(): %r != %r"
   1068                          % (expect, result))
   1069 
   1070     def test_unquote_with_unicode(self):
   1071         # Characters in the Latin-1 range, encoded with UTF-8
   1072         given = 'br%C3%BCckner_sapporo_20050930.doc'
   1073         expect = 'br\u00fcckner_sapporo_20050930.doc'
   1074         result = urllib.parse.unquote(given)
   1075         self.assertEqual(expect, result,
   1076                          "using unquote(): %r != %r" % (expect, result))
   1077         # Characters in the Latin-1 range, encoded with None (default)
   1078         result = urllib.parse.unquote(given, encoding=None, errors=None)
   1079         self.assertEqual(expect, result,
   1080                          "using unquote(): %r != %r" % (expect, result))
   1081 
   1082         # Characters in the Latin-1 range, encoded with Latin-1
   1083         result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
   1084                                       encoding="latin-1")
   1085         expect = 'br\u00fcckner_sapporo_20050930.doc'
   1086         self.assertEqual(expect, result,
   1087                          "using unquote(): %r != %r" % (expect, result))
   1088 
   1089         # Characters in BMP, encoded with UTF-8
   1090         given = "%E6%BC%A2%E5%AD%97"
   1091         expect = "\u6f22\u5b57"             # "Kanji"
   1092         result = urllib.parse.unquote(given)
   1093         self.assertEqual(expect, result,
   1094                          "using unquote(): %r != %r" % (expect, result))
   1095 
   1096         # Decode with UTF-8, invalid sequence
   1097         given = "%F3%B1"
   1098         expect = "\ufffd"                   # Replacement character
   1099         result = urllib.parse.unquote(given)
   1100         self.assertEqual(expect, result,
   1101                          "using unquote(): %r != %r" % (expect, result))
   1102 
   1103         # Decode with UTF-8, invalid sequence, replace errors
   1104         result = urllib.parse.unquote(given, errors="replace")
   1105         self.assertEqual(expect, result,
   1106                          "using unquote(): %r != %r" % (expect, result))
   1107 
   1108         # Decode with UTF-8, invalid sequence, ignoring errors
   1109         given = "%F3%B1"
   1110         expect = ""
   1111         result = urllib.parse.unquote(given, errors="ignore")
   1112         self.assertEqual(expect, result,
   1113                          "using unquote(): %r != %r" % (expect, result))
   1114 
   1115         # A mix of non-ASCII and percent-encoded characters, UTF-8
   1116         result = urllib.parse.unquote("\u6f22%C3%BC")
   1117         expect = '\u6f22\u00fc'
   1118         self.assertEqual(expect, result,
   1119                          "using unquote(): %r != %r" % (expect, result))
   1120 
   1121         # A mix of non-ASCII and percent-encoded characters, Latin-1
   1122         # (Note, the string contains non-Latin-1-representable characters)
   1123         result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
   1124         expect = '\u6f22\u00fc'
   1125         self.assertEqual(expect, result,
   1126                          "using unquote(): %r != %r" % (expect, result))
   1127 
   1128 class urlencode_Tests(unittest.TestCase):
   1129     """Tests for urlencode()"""
   1130 
   1131     def help_inputtype(self, given, test_type):
   1132         """Helper method for testing different input types.
   1133 
   1134         'given' must lead to only the pairs:
   1135             * 1st, 1
   1136             * 2nd, 2
   1137             * 3rd, 3
   1138 
   1139         Test cannot assume anything about order.  Docs make no guarantee and
   1140         have possible dictionary input.
   1141 
   1142         """
   1143         expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
   1144         result = urllib.parse.urlencode(given)
   1145         for expected in expect_somewhere:
   1146             self.assertIn(expected, result,
   1147                          "testing %s: %s not found in %s" %
   1148                          (test_type, expected, result))
   1149         self.assertEqual(result.count('&'), 2,
   1150                          "testing %s: expected 2 '&'s; got %s" %
   1151                          (test_type, result.count('&')))
   1152         amp_location = result.index('&')
   1153         on_amp_left = result[amp_location - 1]
   1154         on_amp_right = result[amp_location + 1]
   1155         self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
   1156                      "testing %s: '&' not located in proper place in %s" %
   1157                      (test_type, result))
   1158         self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
   1159                          "testing %s: "
   1160                          "unexpected number of characters: %s != %s" %
   1161                          (test_type, len(result), (5 * 3) + 2))
   1162 
   1163     def test_using_mapping(self):
   1164         # Test passing in a mapping object as an argument.
   1165         self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
   1166                             "using dict as input type")
   1167 
   1168     def test_using_sequence(self):
   1169         # Test passing in a sequence of two-item sequences as an argument.
   1170         self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
   1171                             "using sequence of two-item tuples as input")
   1172 
   1173     def test_quoting(self):
   1174         # Make sure keys and values are quoted using quote_plus()
   1175         given = {"&":"="}
   1176         expect = "%s=%s" % (hexescape('&'), hexescape('='))
   1177         result = urllib.parse.urlencode(given)
   1178         self.assertEqual(expect, result)
   1179         given = {"key name":"A bunch of pluses"}
   1180         expect = "key+name=A+bunch+of+pluses"
   1181         result = urllib.parse.urlencode(given)
   1182         self.assertEqual(expect, result)
   1183 
   1184     def test_doseq(self):
   1185         # Test that passing True for 'doseq' parameter works correctly
   1186         given = {'sequence':['1', '2', '3']}
   1187         expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
   1188         result = urllib.parse.urlencode(given)
   1189         self.assertEqual(expect, result)
   1190         result = urllib.parse.urlencode(given, True)
   1191         for value in given["sequence"]:
   1192             expect = "sequence=%s" % value
   1193             self.assertIn(expect, result)
   1194         self.assertEqual(result.count('&'), 2,
   1195                          "Expected 2 '&'s, got %s" % result.count('&'))
   1196 
   1197     def test_empty_sequence(self):
   1198         self.assertEqual("", urllib.parse.urlencode({}))
   1199         self.assertEqual("", urllib.parse.urlencode([]))
   1200 
   1201     def test_nonstring_values(self):
   1202         self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
   1203         self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
   1204 
   1205     def test_nonstring_seq_values(self):
   1206         self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
   1207         self.assertEqual("a=None&a=a",
   1208                          urllib.parse.urlencode({"a": [None, "a"]}, True))
   1209         data = collections.OrderedDict([("a", 1), ("b", 1)])
   1210         self.assertEqual("a=a&a=b",
   1211                          urllib.parse.urlencode({"a": data}, True))
   1212 
   1213     def test_urlencode_encoding(self):
   1214         # ASCII encoding. Expect %3F with errors="replace'
   1215         given = (('\u00a0', '\u00c1'),)
   1216         expect = '%3F=%3F'
   1217         result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
   1218         self.assertEqual(expect, result)
   1219 
   1220         # Default is UTF-8 encoding.
   1221         given = (('\u00a0', '\u00c1'),)
   1222         expect = '%C2%A0=%C3%81'
   1223         result = urllib.parse.urlencode(given)
   1224         self.assertEqual(expect, result)
   1225 
   1226         # Latin-1 encoding.
   1227         given = (('\u00a0', '\u00c1'),)
   1228         expect = '%A0=%C1'
   1229         result = urllib.parse.urlencode(given, encoding="latin-1")
   1230         self.assertEqual(expect, result)
   1231 
   1232     def test_urlencode_encoding_doseq(self):
   1233         # ASCII Encoding. Expect %3F with errors="replace'
   1234         given = (('\u00a0', '\u00c1'),)
   1235         expect = '%3F=%3F'
   1236         result = urllib.parse.urlencode(given, doseq=True,
   1237                                         encoding="ASCII", errors="replace")
   1238         self.assertEqual(expect, result)
   1239 
   1240         # ASCII Encoding. On a sequence of values.
   1241         given = (("\u00a0", (1, "\u00c1")),)
   1242         expect = '%3F=1&%3F=%3F'
   1243         result = urllib.parse.urlencode(given, True,
   1244                                         encoding="ASCII", errors="replace")
   1245         self.assertEqual(expect, result)
   1246 
   1247         # Utf-8
   1248         given = (("\u00a0", "\u00c1"),)
   1249         expect = '%C2%A0=%C3%81'
   1250         result = urllib.parse.urlencode(given, True)
   1251         self.assertEqual(expect, result)
   1252 
   1253         given = (("\u00a0", (42, "\u00c1")),)
   1254         expect = '%C2%A0=42&%C2%A0=%C3%81'
   1255         result = urllib.parse.urlencode(given, True)
   1256         self.assertEqual(expect, result)
   1257 
   1258         # latin-1
   1259         given = (("\u00a0", "\u00c1"),)
   1260         expect = '%A0=%C1'
   1261         result = urllib.parse.urlencode(given, True, encoding="latin-1")
   1262         self.assertEqual(expect, result)
   1263 
   1264         given = (("\u00a0", (42, "\u00c1")),)
   1265         expect = '%A0=42&%A0=%C1'
   1266         result = urllib.parse.urlencode(given, True, encoding="latin-1")
   1267         self.assertEqual(expect, result)
   1268 
   1269     def test_urlencode_bytes(self):
   1270         given = ((b'\xa0\x24', b'\xc1\x24'),)
   1271         expect = '%A0%24=%C1%24'
   1272         result = urllib.parse.urlencode(given)
   1273         self.assertEqual(expect, result)
   1274         result = urllib.parse.urlencode(given, True)
   1275         self.assertEqual(expect, result)
   1276 
   1277         # Sequence of values
   1278         given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
   1279         expect = '%A0%24=42&%A0%24=%C1%24'
   1280         result = urllib.parse.urlencode(given, True)
   1281         self.assertEqual(expect, result)
   1282 
   1283     def test_urlencode_encoding_safe_parameter(self):
   1284 
   1285         # Send '$' (\x24) as safe character
   1286         # Default utf-8 encoding
   1287 
   1288         given = ((b'\xa0\x24', b'\xc1\x24'),)
   1289         result = urllib.parse.urlencode(given, safe=":$")
   1290         expect = '%A0$=%C1$'
   1291         self.assertEqual(expect, result)
   1292 
   1293         given = ((b'\xa0\x24', b'\xc1\x24'),)
   1294         result = urllib.parse.urlencode(given, doseq=True, safe=":$")
   1295         expect = '%A0$=%C1$'
   1296         self.assertEqual(expect, result)
   1297 
   1298         # Safe parameter in sequence
   1299         given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
   1300         expect = '%A0$=%C1$&%A0$=13&%A0$=42'
   1301         result = urllib.parse.urlencode(given, True, safe=":$")
   1302         self.assertEqual(expect, result)
   1303 
   1304         # Test all above in latin-1 encoding
   1305 
   1306         given = ((b'\xa0\x24', b'\xc1\x24'),)
   1307         result = urllib.parse.urlencode(given, safe=":$",
   1308                                         encoding="latin-1")
   1309         expect = '%A0$=%C1$'
   1310         self.assertEqual(expect, result)
   1311 
   1312         given = ((b'\xa0\x24', b'\xc1\x24'),)
   1313         expect = '%A0$=%C1$'
   1314         result = urllib.parse.urlencode(given, doseq=True, safe=":$",
   1315                                         encoding="latin-1")
   1316 
   1317         given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
   1318         expect = '%A0$=%C1$&%A0$=13&%A0$=42'
   1319         result = urllib.parse.urlencode(given, True, safe=":$",
   1320                                         encoding="latin-1")
   1321         self.assertEqual(expect, result)
   1322 
   1323 class Pathname_Tests(unittest.TestCase):
   1324     """Test pathname2url() and url2pathname()"""
   1325 
   1326     def test_basic(self):
   1327         # Make sure simple tests pass
   1328         expected_path = os.path.join("parts", "of", "a", "path")
   1329         expected_url = "parts/of/a/path"
   1330         result = urllib.request.pathname2url(expected_path)
   1331         self.assertEqual(expected_url, result,
   1332                          "pathname2url() failed; %s != %s" %
   1333                          (result, expected_url))
   1334         result = urllib.request.url2pathname(expected_url)
   1335         self.assertEqual(expected_path, result,
   1336                          "url2pathame() failed; %s != %s" %
   1337                          (result, expected_path))
   1338 
   1339     def test_quoting(self):
   1340         # Test automatic quoting and unquoting works for pathnam2url() and
   1341         # url2pathname() respectively
   1342         given = os.path.join("needs", "quot=ing", "here")
   1343         expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
   1344         result = urllib.request.pathname2url(given)
   1345         self.assertEqual(expect, result,
   1346                          "pathname2url() failed; %s != %s" %
   1347                          (expect, result))
   1348         expect = given
   1349         result = urllib.request.url2pathname(result)
   1350         self.assertEqual(expect, result,
   1351                          "url2pathname() failed; %s != %s" %
   1352                          (expect, result))
   1353         given = os.path.join("make sure", "using_quote")
   1354         expect = "%s/using_quote" % urllib.parse.quote("make sure")
   1355         result = urllib.request.pathname2url(given)
   1356         self.assertEqual(expect, result,
   1357                          "pathname2url() failed; %s != %s" %
   1358                          (expect, result))
   1359         given = "make+sure/using_unquote"
   1360         expect = os.path.join("make+sure", "using_unquote")
   1361         result = urllib.request.url2pathname(given)
   1362         self.assertEqual(expect, result,
   1363                          "url2pathname() failed; %s != %s" %
   1364                          (expect, result))
   1365 
   1366     @unittest.skipUnless(sys.platform == 'win32',
   1367                          'test specific to the urllib.url2path function.')
   1368     def test_ntpath(self):
   1369         given = ('/C:/', '///C:/', '/C|//')
   1370         expect = 'C:\\'
   1371         for url in given:
   1372             result = urllib.request.url2pathname(url)
   1373             self.assertEqual(expect, result,
   1374                              'urllib.request..url2pathname() failed; %s != %s' %
   1375                              (expect, result))
   1376         given = '///C|/path'
   1377         expect = 'C:\\path'
   1378         result = urllib.request.url2pathname(given)
   1379         self.assertEqual(expect, result,
   1380                          'urllib.request.url2pathname() failed; %s != %s' %
   1381                          (expect, result))
   1382 
   1383 class Utility_Tests(unittest.TestCase):
   1384     """Testcase to test the various utility functions in the urllib."""
   1385 
   1386     def test_thishost(self):
   1387         """Test the urllib.request.thishost utility function returns a tuple"""
   1388         self.assertIsInstance(urllib.request.thishost(), tuple)
   1389 
   1390 
   1391 class URLopener_Tests(unittest.TestCase):
   1392     """Testcase to test the open method of URLopener class."""
   1393 
   1394     def test_quoted_open(self):
   1395         class DummyURLopener(urllib.request.URLopener):
   1396             def open_spam(self, url):
   1397                 return url
   1398         with support.check_warnings(
   1399                 ('DummyURLopener style of invoking requests is deprecated.',
   1400                 DeprecationWarning)):
   1401             self.assertEqual(DummyURLopener().open(
   1402                 'spam://example/ /'),'//example/%20/')
   1403 
   1404             # test the safe characters are not quoted by urlopen
   1405             self.assertEqual(DummyURLopener().open(
   1406                 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
   1407                 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
   1408 
   1409 # Just commented them out.
   1410 # Can't really tell why keep failing in windows and sparc.
   1411 # Everywhere else they work ok, but on those machines, sometimes
   1412 # fail in one of the tests, sometimes in other. I have a linux, and
   1413 # the tests go ok.
   1414 # If anybody has one of the problematic environments, please help!
   1415 # .   Facundo
   1416 #
   1417 # def server(evt):
   1418 #     import socket, time
   1419 #     serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   1420 #     serv.settimeout(3)
   1421 #     serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
   1422 #     serv.bind(("", 9093))
   1423 #     serv.listen()
   1424 #     try:
   1425 #         conn, addr = serv.accept()
   1426 #         conn.send("1 Hola mundo\n")
   1427 #         cantdata = 0
   1428 #         while cantdata < 13:
   1429 #             data = conn.recv(13-cantdata)
   1430 #             cantdata += len(data)
   1431 #             time.sleep(.3)
   1432 #         conn.send("2 No more lines\n")
   1433 #         conn.close()
   1434 #     except socket.timeout:
   1435 #         pass
   1436 #     finally:
   1437 #         serv.close()
   1438 #         evt.set()
   1439 #
   1440 # class FTPWrapperTests(unittest.TestCase):
   1441 #
   1442 #     def setUp(self):
   1443 #         import ftplib, time, threading
   1444 #         ftplib.FTP.port = 9093
   1445 #         self.evt = threading.Event()
   1446 #         threading.Thread(target=server, args=(self.evt,)).start()
   1447 #         time.sleep(.1)
   1448 #
   1449 #     def tearDown(self):
   1450 #         self.evt.wait()
   1451 #
   1452 #     def testBasic(self):
   1453 #         # connects
   1454 #         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
   1455 #         ftp.close()
   1456 #
   1457 #     def testTimeoutNone(self):
   1458 #         # global default timeout is ignored
   1459 #         import socket
   1460 #         self.assertIsNone(socket.getdefaulttimeout())
   1461 #         socket.setdefaulttimeout(30)
   1462 #         try:
   1463 #             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
   1464 #         finally:
   1465 #             socket.setdefaulttimeout(None)
   1466 #         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
   1467 #         ftp.close()
   1468 #
   1469 #     def testTimeoutDefault(self):
   1470 #         # global default timeout is used
   1471 #         import socket
   1472 #         self.assertIsNone(socket.getdefaulttimeout())
   1473 #         socket.setdefaulttimeout(30)
   1474 #         try:
   1475 #             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
   1476 #         finally:
   1477 #             socket.setdefaulttimeout(None)
   1478 #         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
   1479 #         ftp.close()
   1480 #
   1481 #     def testTimeoutValue(self):
   1482 #         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
   1483 #                                 timeout=30)
   1484 #         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
   1485 #         ftp.close()
   1486 
   1487 
   1488 class RequestTests(unittest.TestCase):
   1489     """Unit tests for urllib.request.Request."""
   1490 
   1491     def test_default_values(self):
   1492         Request = urllib.request.Request
   1493         request = Request("http://www.python.org")
   1494         self.assertEqual(request.get_method(), 'GET')
   1495         request = Request("http://www.python.org", {})
   1496         self.assertEqual(request.get_method(), 'POST')
   1497 
   1498     def test_with_method_arg(self):
   1499         Request = urllib.request.Request
   1500         request = Request("http://www.python.org", method='HEAD')
   1501         self.assertEqual(request.method, 'HEAD')
   1502         self.assertEqual(request.get_method(), 'HEAD')
   1503         request = Request("http://www.python.org", {}, method='HEAD')
   1504         self.assertEqual(request.method, 'HEAD')
   1505         self.assertEqual(request.get_method(), 'HEAD')
   1506         request = Request("http://www.python.org", method='GET')
   1507         self.assertEqual(request.get_method(), 'GET')
   1508         request.method = 'HEAD'
   1509         self.assertEqual(request.get_method(), 'HEAD')
   1510 
   1511 
   1512 class URL2PathNameTests(unittest.TestCase):
   1513 
   1514     def test_converting_drive_letter(self):
   1515         self.assertEqual(url2pathname("///C|"), 'C:')
   1516         self.assertEqual(url2pathname("///C:"), 'C:')
   1517         self.assertEqual(url2pathname("///C|/"), 'C:\\')
   1518 
   1519     def test_converting_when_no_drive_letter(self):
   1520         # cannot end a raw string in \
   1521         self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
   1522         self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
   1523 
   1524     def test_simple_compare(self):
   1525         self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
   1526                          r'C:\foo\bar\spam.foo')
   1527 
   1528     def test_non_ascii_drive_letter(self):
   1529         self.assertRaises(IOError, url2pathname, "///\u00e8|/")
   1530 
   1531     def test_roundtrip_url2pathname(self):
   1532         list_of_paths = ['C:',
   1533                          r'\\\C\test\\',
   1534                          r'C:\foo\bar\spam.foo'
   1535                          ]
   1536         for path in list_of_paths:
   1537             self.assertEqual(url2pathname(pathname2url(path)), path)
   1538 
   1539 class PathName2URLTests(unittest.TestCase):
   1540 
   1541     def test_converting_drive_letter(self):
   1542         self.assertEqual(pathname2url("C:"), '///C:')
   1543         self.assertEqual(pathname2url("C:\\"), '///C:')
   1544 
   1545     def test_converting_when_no_drive_letter(self):
   1546         self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
   1547                          '/////folder/test/')
   1548         self.assertEqual(pathname2url(r"\\folder\test" "\\"),
   1549                          '////folder/test/')
   1550         self.assertEqual(pathname2url(r"\folder\test" "\\"),
   1551                          '/folder/test/')
   1552 
   1553     def test_simple_compare(self):
   1554         self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
   1555                          "///C:/foo/bar/spam.foo" )
   1556 
   1557     def test_long_drive_letter(self):
   1558         self.assertRaises(IOError, pathname2url, "XX:\\")
   1559 
   1560     def test_roundtrip_pathname2url(self):
   1561         list_of_paths = ['///C:',
   1562                          '/////folder/test/',
   1563                          '///C:/foo/bar/spam.foo']
   1564         for path in list_of_paths:
   1565             self.assertEqual(pathname2url(url2pathname(path)), path)
   1566 
   1567 if __name__ == '__main__':
   1568     unittest.main()
   1569