Home | History | Annotate | Download | only in test
      1 """Regression tests for urllib"""
      2 
      3 import collections
      4 import urllib
      5 import httplib
      6 import io
      7 import unittest
      8 import os
      9 import sys
     10 import mimetools
     11 import tempfile
     12 
     13 from test import test_support
     14 from base64 import b64encode
     15 
     16 
     17 def hexescape(char):
     18     """Escape char as RFC 2396 specifies"""
     19     hex_repr = hex(ord(char))[2:].upper()
     20     if len(hex_repr) == 1:
     21         hex_repr = "0%s" % hex_repr
     22     return "%" + hex_repr
     23 
     24 
     25 def fakehttp(fakedata):
     26     class FakeSocket(io.BytesIO):
     27 
     28         def sendall(self, data):
     29             FakeHTTPConnection.buf = data
     30 
     31         def makefile(self, *args, **kwds):
     32             return self
     33 
     34         def read(self, amt=None):
     35             if self.closed:
     36                 return b""
     37             return io.BytesIO.read(self, amt)
     38 
     39         def readline(self, length=None):
     40             if self.closed:
     41                 return b""
     42             return io.BytesIO.readline(self, length)
     43 
     44     class FakeHTTPConnection(httplib.HTTPConnection):
     45 
     46         # buffer to store data for verification in urlopen tests.
     47         buf = ""
     48 
     49         def connect(self):
     50             self.sock = FakeSocket(self.fakedata)
     51             self.__class__.fakesock = self.sock
     52     FakeHTTPConnection.fakedata = fakedata
     53 
     54     return FakeHTTPConnection
     55 
     56 
     57 class FakeHTTPMixin(object):
     58     def fakehttp(self, fakedata):
     59         assert httplib.HTTP._connection_class == httplib.HTTPConnection
     60 
     61         httplib.HTTP._connection_class = fakehttp(fakedata)
     62 
     63     def unfakehttp(self):
     64         httplib.HTTP._connection_class = httplib.HTTPConnection
     65 
     66 
     67 class urlopen_FileTests(unittest.TestCase):
     68     """Test urlopen() opening a temporary file.
     69 
     70     Try to test as much functionality as possible so as to cut down on reliance
     71     on connecting to the Net for testing.
     72 
     73     """
     74 
     75     def setUp(self):
     76         """Setup of a temp file to use for testing"""
     77         self.text = "test_urllib: %s\n" % self.__class__.__name__
     78         FILE = file(test_support.TESTFN, 'wb')
     79         try:
     80             FILE.write(self.text)
     81         finally:
     82             FILE.close()
     83         self.pathname = test_support.TESTFN
     84         self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
     85 
     86     def tearDown(self):
     87         """Shut down the open object"""
     88         self.returned_obj.close()
     89         os.remove(test_support.TESTFN)
     90 
     91     def test_interface(self):
     92         # Make sure object returned by urlopen() has the specified methods
     93         for attr in ("read", "readline", "readlines", "fileno",
     94                      "close", "info", "geturl", "getcode", "__iter__"):
     95             self.assertTrue(hasattr(self.returned_obj, attr),
     96                          "object returned by urlopen() lacks %s attribute" %
     97                          attr)
     98 
     99     def test_read(self):
    100         self.assertEqual(self.text, self.returned_obj.read())
    101 
    102     def test_readline(self):
    103         self.assertEqual(self.text, self.returned_obj.readline())
    104         self.assertEqual('', self.returned_obj.readline(),
    105                          "calling readline() after exhausting the file did not"
    106                          " return an empty string")
    107 
    108     def test_readlines(self):
    109         lines_list = self.returned_obj.readlines()
    110         self.assertEqual(len(lines_list), 1,
    111                          "readlines() returned the wrong number of lines")
    112         self.assertEqual(lines_list[0], self.text,
    113                          "readlines() returned improper text")
    114 
    115     def test_fileno(self):
    116         file_num = self.returned_obj.fileno()
    117         self.assertIsInstance(file_num, int, "fileno() did not return an int")
    118         self.assertEqual(os.read(file_num, len(self.text)), self.text,
    119                          "Reading on the file descriptor returned by fileno() "
    120                          "did not return the expected text")
    121 
    122     def test_close(self):
    123         # Test close() by calling it hear and then having it be called again
    124         # by the tearDown() method for the test
    125         self.returned_obj.close()
    126 
    127     def test_info(self):
    128         self.assertIsInstance(self.returned_obj.info(), mimetools.Message)
    129 
    130     def test_geturl(self):
    131         self.assertEqual(self.returned_obj.geturl(), self.pathname)
    132 
    133     def test_getcode(self):
    134         self.assertEqual(self.returned_obj.getcode(), None)
    135 
    136     def test_iter(self):
    137         # Test iterator
    138         # Don't need to count number of iterations since test would fail the
    139         # instant it returned anything beyond the first line from the
    140         # comparison
    141         for line in self.returned_obj.__iter__():
    142             self.assertEqual(line, self.text)
    143 
    144     def test_relativelocalfile(self):
    145         self.assertRaises(ValueError,urllib.urlopen,'./' + self.pathname)
    146 
    147 class ProxyTests(unittest.TestCase):
    148 
    149     def setUp(self):
    150         # Records changes to env vars
    151         self.env = test_support.EnvironmentVarGuard()
    152         # Delete all proxy related env vars
    153         for k in os.environ.keys():
    154             if 'proxy' in k.lower():
    155                 self.env.unset(k)
    156 
    157     def tearDown(self):
    158         # Restore all proxy related env vars
    159         self.env.__exit__()
    160         del self.env
    161 
    162     def test_getproxies_environment_keep_no_proxies(self):
    163         self.env.set('NO_PROXY', 'localhost')
    164         proxies = urllib.getproxies_environment()
    165         # getproxies_environment use lowered case truncated (no '_proxy') keys
    166         self.assertEqual('localhost', proxies['no'])
    167         # List of no_proxies with space.
    168         self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
    169         self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com'))
    170         self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888'))
    171         self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234'))
    172 
    173     def test_proxy_cgi_ignore(self):
    174         try:
    175             self.env.set('HTTP_PROXY', 'http://somewhere:3128')
    176             proxies = urllib.getproxies_environment()
    177             self.assertEqual('http://somewhere:3128', proxies['http'])
    178             self.env.set('REQUEST_METHOD', 'GET')
    179             proxies = urllib.getproxies_environment()
    180             self.assertNotIn('http', proxies)
    181         finally:
    182             self.env.unset('REQUEST_METHOD')
    183             self.env.unset('HTTP_PROXY')
    184 
    185     def test_proxy_bypass_environment_host_match(self):
    186         bypass = urllib.proxy_bypass_environment
    187         self.env.set('NO_PROXY',
    188             'localhost, anotherdomain.com, newdomain.com:1234')
    189         self.assertTrue(bypass('localhost'))
    190         self.assertTrue(bypass('LocalHost'))                 # MixedCase
    191         self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
    192         self.assertTrue(bypass('newdomain.com:1234'))
    193         self.assertTrue(bypass('anotherdomain.com:8888'))
    194         self.assertTrue(bypass('www.newdomain.com:1234'))
    195         self.assertFalse(bypass('prelocalhost'))
    196         self.assertFalse(bypass('newdomain.com'))            # no port
    197         self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
    198 
    199 class ProxyTests_withOrderedEnv(unittest.TestCase):
    200 
    201     def setUp(self):
    202         # We need to test conditions, where variable order _is_ significant
    203         self._saved_env = os.environ
    204         # Monkey patch os.environ, start with empty fake environment
    205         os.environ = collections.OrderedDict()
    206 
    207     def tearDown(self):
    208         os.environ = self._saved_env
    209 
    210     def test_getproxies_environment_prefer_lowercase(self):
    211         # Test lowercase preference with removal
    212         os.environ['no_proxy'] = ''
    213         os.environ['No_Proxy'] = 'localhost'
    214         self.assertFalse(urllib.proxy_bypass_environment('localhost'))
    215         self.assertFalse(urllib.proxy_bypass_environment('arbitrary'))
    216         os.environ['http_proxy'] = ''
    217         os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
    218         proxies = urllib.getproxies_environment()
    219         self.assertEqual({}, proxies)
    220         # Test lowercase preference of proxy bypass and correct matching including ports
    221         os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
    222         os.environ['No_Proxy'] = 'xyz.com'
    223         self.assertTrue(urllib.proxy_bypass_environment('localhost'))
    224         self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678'))
    225         self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234'))
    226         self.assertFalse(urllib.proxy_bypass_environment('my.proxy'))
    227         self.assertFalse(urllib.proxy_bypass_environment('arbitrary'))
    228         # Test lowercase preference with replacement
    229         os.environ['http_proxy'] = 'http://somewhere:3128'
    230         os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
    231         proxies = urllib.getproxies_environment()
    232         self.assertEqual('http://somewhere:3128', proxies['http'])
    233 
    234 
    235 class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
    236     """Test urlopen() opening a fake http connection."""
    237 
    238     def test_read(self):
    239         self.fakehttp('Hello!')
    240         try:
    241             fp = urllib.urlopen("http://python.org/")
    242             self.assertEqual(fp.readline(), 'Hello!')
    243             self.assertEqual(fp.readline(), '')
    244             self.assertEqual(fp.geturl(), 'http://python.org/')
    245             self.assertEqual(fp.getcode(), 200)
    246         finally:
    247             self.unfakehttp()
    248 
    249     def test_url_fragment(self):
    250         # Issue #11703: geturl() omits fragments in the original URL.
    251         url = 'http://docs.python.org/library/urllib.html#OK'
    252         self.fakehttp('Hello!')
    253         try:
    254             fp = urllib.urlopen(url)
    255             self.assertEqual(fp.geturl(), url)
    256         finally:
    257             self.unfakehttp()
    258 
    259     def test_read_bogus(self):
    260         # urlopen() should raise IOError for many error codes.
    261         self.fakehttp('''HTTP/1.1 401 Authentication Required
    262 Date: Wed, 02 Jan 2008 03:03:54 GMT
    263 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    264 Connection: close
    265 Content-Type: text/html; charset=iso-8859-1
    266 ''')
    267         try:
    268             self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
    269         finally:
    270             self.unfakehttp()
    271 
    272     def test_invalid_redirect(self):
    273         # urlopen() should raise IOError for many error codes.
    274         self.fakehttp("""HTTP/1.1 302 Found
    275 Date: Wed, 02 Jan 2008 03:03:54 GMT
    276 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    277 Location: file:README
    278 Connection: close
    279 Content-Type: text/html; charset=iso-8859-1
    280 """)
    281         try:
    282             msg = "Redirection to url 'file:"
    283             with self.assertRaisesRegexp(IOError, msg):
    284                 urllib.urlopen("http://python.org/")
    285         finally:
    286             self.unfakehttp()
    287 
    288     def test_redirect_limit_independent(self):
    289         # Ticket #12923: make sure independent requests each use their
    290         # own retry limit.
    291         for i in range(urllib.FancyURLopener().maxtries):
    292             self.fakehttp(b'''HTTP/1.1 302 Found
    293 Location: file://guidocomputer.athome.com:/python/license
    294 Connection: close
    295 ''')
    296             try:
    297                 self.assertRaises(IOError, urllib.urlopen,
    298                     "http://something")
    299             finally:
    300                 self.unfakehttp()
    301 
    302     def test_empty_socket(self):
    303         # urlopen() raises IOError if the underlying socket does not send any
    304         # data. (#1680230)
    305         self.fakehttp('')
    306         try:
    307             self.assertRaises(IOError, urllib.urlopen, 'http://something')
    308         finally:
    309             self.unfakehttp()
    310 
    311     def test_missing_localfile(self):
    312         self.assertRaises(IOError, urllib.urlopen,
    313                 'file://localhost/a/missing/file.py')
    314         fd, tmp_file = tempfile.mkstemp()
    315         tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
    316         self.assertTrue(os.path.exists(tmp_file))
    317         try:
    318             fp = urllib.urlopen(tmp_fileurl)
    319             fp.close()
    320         finally:
    321             os.close(fd)
    322             os.unlink(tmp_file)
    323 
    324         self.assertFalse(os.path.exists(tmp_file))
    325         self.assertRaises(IOError, urllib.urlopen, tmp_fileurl)
    326 
    327     def test_ftp_nonexisting(self):
    328         self.assertRaises(IOError, urllib.urlopen,
    329                 'ftp://localhost/not/existing/file.py')
    330 
    331 
    332     def test_userpass_inurl(self):
    333         self.fakehttp('Hello!')
    334         try:
    335             fakehttp_wrapper = httplib.HTTP._connection_class
    336             fp = urllib.urlopen("http://user:pass@python.org/")
    337             authorization = ("Authorization: Basic %s\r\n" %
    338                             b64encode('user:pass'))
    339             # The authorization header must be in place
    340             self.assertIn(authorization, fakehttp_wrapper.buf)
    341             self.assertEqual(fp.readline(), "Hello!")
    342             self.assertEqual(fp.readline(), "")
    343             self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
    344             self.assertEqual(fp.getcode(), 200)
    345         finally:
    346             self.unfakehttp()
    347 
    348     def test_userpass_with_spaces_inurl(self):
    349         self.fakehttp('Hello!')
    350         try:
    351             url = "http://a b:c d (at] python.org/"
    352             fakehttp_wrapper = httplib.HTTP._connection_class
    353             authorization = ("Authorization: Basic %s\r\n" %
    354                              b64encode('a b:c d'))
    355             fp = urllib.urlopen(url)
    356             # The authorization header must be in place
    357             self.assertIn(authorization, fakehttp_wrapper.buf)
    358             self.assertEqual(fp.readline(), "Hello!")
    359             self.assertEqual(fp.readline(), "")
    360             # the spaces are quoted in URL so no match
    361             self.assertNotEqual(fp.geturl(), url)
    362             self.assertEqual(fp.getcode(), 200)
    363         finally:
    364             self.unfakehttp()
    365 
    366 
    367 class urlretrieve_FileTests(unittest.TestCase):
    368     """Test urllib.urlretrieve() on local files"""
    369 
    370     def setUp(self):
    371         # Create a list of temporary files. Each item in the list is a file
    372         # name (absolute path or relative to the current working directory).
    373         # All files in this list will be deleted in the tearDown method. Note,
    374         # this only helps to makes sure temporary files get deleted, but it
    375         # does nothing about trying to close files that may still be open. It
    376         # is the responsibility of the developer to properly close files even
    377         # when exceptional conditions occur.
    378         self.tempFiles = []
    379 
    380         # Create a temporary file.
    381         self.registerFileForCleanUp(test_support.TESTFN)
    382         self.text = 'testing urllib.urlretrieve'
    383         try:
    384             FILE = file(test_support.TESTFN, 'wb')
    385             FILE.write(self.text)
    386             FILE.close()
    387         finally:
    388             try: FILE.close()
    389             except: pass
    390 
    391     def tearDown(self):
    392         # Delete the temporary files.
    393         for each in self.tempFiles:
    394             try: os.remove(each)
    395             except: pass
    396 
    397     def constructLocalFileUrl(self, filePath):
    398         return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
    399 
    400     def createNewTempFile(self, data=""):
    401         """Creates a new temporary file containing the specified data,
    402         registers the file for deletion during the test fixture tear down, and
    403         returns the absolute path of the file."""
    404 
    405         newFd, newFilePath = tempfile.mkstemp()
    406         try:
    407             self.registerFileForCleanUp(newFilePath)
    408             newFile = os.fdopen(newFd, "wb")
    409             newFile.write(data)
    410             newFile.close()
    411         finally:
    412             try: newFile.close()
    413             except: pass
    414         return newFilePath
    415 
    416     def registerFileForCleanUp(self, fileName):
    417         self.tempFiles.append(fileName)
    418 
    419     def test_basic(self):
    420         # Make sure that a local file just gets its own location returned and
    421         # a headers value is returned.
    422         result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
    423         self.assertEqual(result[0], test_support.TESTFN)
    424         self.assertIsInstance(result[1], mimetools.Message,
    425                               "did not get a mimetools.Message instance as "
    426                               "second returned value")
    427 
    428     def test_copy(self):
    429         # Test that setting the filename argument works.
    430         second_temp = "%s.2" % test_support.TESTFN
    431         self.registerFileForCleanUp(second_temp)
    432         result = urllib.urlretrieve(self.constructLocalFileUrl(
    433             test_support.TESTFN), second_temp)
    434         self.assertEqual(second_temp, result[0])
    435         self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
    436                                                   "made")
    437         FILE = file(second_temp, 'rb')
    438         try:
    439             text = FILE.read()
    440             FILE.close()
    441         finally:
    442             try: FILE.close()
    443             except: pass
    444         self.assertEqual(self.text, text)
    445 
    446     def test_reporthook(self):
    447         # Make sure that the reporthook works.
    448         def hooktester(count, block_size, total_size, count_holder=[0]):
    449             self.assertIsInstance(count, int)
    450             self.assertIsInstance(block_size, int)
    451             self.assertIsInstance(total_size, int)
    452             self.assertEqual(count, count_holder[0])
    453             count_holder[0] = count_holder[0] + 1
    454         second_temp = "%s.2" % test_support.TESTFN
    455         self.registerFileForCleanUp(second_temp)
    456         urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
    457             second_temp, hooktester)
    458 
    459     def test_reporthook_0_bytes(self):
    460         # Test on zero length file. Should call reporthook only 1 time.
    461         report = []
    462         def hooktester(count, block_size, total_size, _report=report):
    463             _report.append((count, block_size, total_size))
    464         srcFileName = self.createNewTempFile()
    465         urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
    466             test_support.TESTFN, hooktester)
    467         self.assertEqual(len(report), 1)
    468         self.assertEqual(report[0][2], 0)
    469 
    470     def test_reporthook_5_bytes(self):
    471         # Test on 5 byte file. Should call reporthook only 2 times (once when
    472         # the "network connection" is established and once when the block is
    473         # read). Since the block size is 8192 bytes, only one block read is
    474         # required to read the entire file.
    475         report = []
    476         def hooktester(count, block_size, total_size, _report=report):
    477             _report.append((count, block_size, total_size))
    478         srcFileName = self.createNewTempFile("x" * 5)
    479         urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
    480             test_support.TESTFN, hooktester)
    481         self.assertEqual(len(report), 2)
    482         self.assertEqual(report[0][1], 8192)
    483         self.assertEqual(report[0][2], 5)
    484 
    485     def test_reporthook_8193_bytes(self):
    486         # Test on 8193 byte file. Should call reporthook only 3 times (once
    487         # when the "network connection" is established, once for the next 8192
    488         # bytes, and once for the last byte).
    489         report = []
    490         def hooktester(count, block_size, total_size, _report=report):
    491             _report.append((count, block_size, total_size))
    492         srcFileName = self.createNewTempFile("x" * 8193)
    493         urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
    494             test_support.TESTFN, hooktester)
    495         self.assertEqual(len(report), 3)
    496         self.assertEqual(report[0][1], 8192)
    497         self.assertEqual(report[0][2], 8193)
    498 
    499 
    500 class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
    501     """Test urllib.urlretrieve() using fake http connections"""
    502 
    503     def test_short_content_raises_ContentTooShortError(self):
    504         self.fakehttp('''HTTP/1.1 200 OK
    505 Date: Wed, 02 Jan 2008 03:03:54 GMT
    506 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    507 Connection: close
    508 Content-Length: 100
    509 Content-Type: text/html; charset=iso-8859-1
    510 
    511 FF
    512 ''')
    513 
    514         def _reporthook(par1, par2, par3):
    515             pass
    516 
    517         try:
    518             self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
    519                     'http://example.com', reporthook=_reporthook)
    520         finally:
    521             self.unfakehttp()
    522 
    523     def test_short_content_raises_ContentTooShortError_without_reporthook(self):
    524         self.fakehttp('''HTTP/1.1 200 OK
    525 Date: Wed, 02 Jan 2008 03:03:54 GMT
    526 Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
    527 Connection: close
    528 Content-Length: 100
    529 Content-Type: text/html; charset=iso-8859-1
    530 
    531 FF
    532 ''')
    533         try:
    534             self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
    535         finally:
    536             self.unfakehttp()
    537 
    538 class QuotingTests(unittest.TestCase):
    539     """Tests for urllib.quote() and urllib.quote_plus()
    540 
    541     According to RFC 2396 ("Uniform Resource Identifiers), to escape a
    542     character you write it as '%' + <2 character US-ASCII hex value>.  The Python
    543     code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
    544     Case does not matter on the hex letters.
    545 
    546     The various character sets specified are:
    547 
    548     Reserved characters : ";/?:@&=+$,"
    549         Have special meaning in URIs and must be escaped if not being used for
    550         their special meaning
    551     Data characters : letters, digits, and "-_.!~*'()"
    552         Unreserved and do not need to be escaped; can be, though, if desired
    553     Control characters : 0x00 - 0x1F, 0x7F
    554         Have no use in URIs so must be escaped
    555     space : 0x20
    556         Must be escaped
    557     Delimiters : '<>#%"'
    558         Must be escaped
    559     Unwise : "{}|\^[]`"
    560         Must be escaped
    561 
    562     """
    563 
    564     def test_never_quote(self):
    565         # Make sure quote() does not quote letters, digits, and "_,.-"
    566         do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
    567                                  "abcdefghijklmnopqrstuvwxyz",
    568                                  "0123456789",
    569                                  "_.-"])
    570         result = urllib.quote(do_not_quote)
    571         self.assertEqual(do_not_quote, result,
    572                          "using quote(): %s != %s" % (do_not_quote, result))
    573         result = urllib.quote_plus(do_not_quote)
    574         self.assertEqual(do_not_quote, result,
    575                         "using quote_plus(): %s != %s" % (do_not_quote, result))
    576 
    577     def test_default_safe(self):
    578         # Test '/' is default value for 'safe' parameter
    579         self.assertEqual(urllib.quote.func_defaults[0], '/')
    580 
    581     def test_safe(self):
    582         # Test setting 'safe' parameter does what it should do
    583         quote_by_default = "<>"
    584         result = urllib.quote(quote_by_default, safe=quote_by_default)
    585         self.assertEqual(quote_by_default, result,
    586                          "using quote(): %s != %s" % (quote_by_default, result))
    587         result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
    588         self.assertEqual(quote_by_default, result,
    589                          "using quote_plus(): %s != %s" %
    590                          (quote_by_default, result))
    591 
    592     def test_default_quoting(self):
    593         # Make sure all characters that should be quoted are by default sans
    594         # space (separate test for that).
    595         should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
    596         should_quote.append('<>#%"{}|\^[]`')
    597         should_quote.append(chr(127)) # For 0x7F
    598         should_quote = ''.join(should_quote)
    599         for char in should_quote:
    600             result = urllib.quote(char)
    601             self.assertEqual(hexescape(char), result,
    602                              "using quote(): %s should be escaped to %s, not %s" %
    603                              (char, hexescape(char), result))
    604             result = urllib.quote_plus(char)
    605             self.assertEqual(hexescape(char), result,
    606                              "using quote_plus(): "
    607                              "%s should be escapes to %s, not %s" %
    608                              (char, hexescape(char), result))
    609         del should_quote
    610         partial_quote = "ab[]cd"
    611         expected = "ab%5B%5Dcd"
    612         result = urllib.quote(partial_quote)
    613         self.assertEqual(expected, result,
    614                          "using quote(): %s != %s" % (expected, result))
    615         result = urllib.quote_plus(partial_quote)
    616         self.assertEqual(expected, result,
    617                          "using quote_plus(): %s != %s" % (expected, result))
    618         self.assertRaises(TypeError, urllib.quote, None)
    619 
    620     def test_quoting_space(self):
    621         # Make sure quote() and quote_plus() handle spaces as specified in
    622         # their unique way
    623         result = urllib.quote(' ')
    624         self.assertEqual(result, hexescape(' '),
    625                          "using quote(): %s != %s" % (result, hexescape(' ')))
    626         result = urllib.quote_plus(' ')
    627         self.assertEqual(result, '+',
    628                          "using quote_plus(): %s != +" % result)
    629         given = "a b cd e f"
    630         expect = given.replace(' ', hexescape(' '))
    631         result = urllib.quote(given)
    632         self.assertEqual(expect, result,
    633                          "using quote(): %s != %s" % (expect, result))
    634         expect = given.replace(' ', '+')
    635         result = urllib.quote_plus(given)
    636         self.assertEqual(expect, result,
    637                          "using quote_plus(): %s != %s" % (expect, result))
    638 
    639     def test_quoting_plus(self):
    640         self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
    641                          'alpha%2Bbeta+gamma')
    642         self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
    643                          'alpha+beta+gamma')
    644 
    645 class UnquotingTests(unittest.TestCase):
    646     """Tests for unquote() and unquote_plus()
    647 
    648     See the doc string for quoting_Tests for details on quoting and such.
    649 
    650     """
    651 
    652     def test_unquoting(self):
    653         # Make sure unquoting of all ASCII values works
    654         escape_list = []
    655         for num in range(128):
    656             given = hexescape(chr(num))
    657             expect = chr(num)
    658             result = urllib.unquote(given)
    659             self.assertEqual(expect, result,
    660                              "using unquote(): %s != %s" % (expect, result))
    661             result = urllib.unquote_plus(given)
    662             self.assertEqual(expect, result,
    663                              "using unquote_plus(): %s != %s" %
    664                              (expect, result))
    665             escape_list.append(given)
    666         escape_string = ''.join(escape_list)
    667         del escape_list
    668         result = urllib.unquote(escape_string)
    669         self.assertEqual(result.count('%'), 1,
    670                          "using quote(): not all characters escaped; %s" %
    671                          result)
    672         result = urllib.unquote(escape_string)
    673         self.assertEqual(result.count('%'), 1,
    674                          "using unquote(): not all characters escaped: "
    675                          "%s" % result)
    676 
    677     def test_unquoting_badpercent(self):
    678         # Test unquoting on bad percent-escapes
    679         given = '%xab'
    680         expect = given
    681         result = urllib.unquote(given)
    682         self.assertEqual(expect, result, "using unquote(): %r != %r"
    683                          % (expect, result))
    684         given = '%x'
    685         expect = given
    686         result = urllib.unquote(given)
    687         self.assertEqual(expect, result, "using unquote(): %r != %r"
    688                          % (expect, result))
    689         given = '%'
    690         expect = given
    691         result = urllib.unquote(given)
    692         self.assertEqual(expect, result, "using unquote(): %r != %r"
    693                          % (expect, result))
    694 
    695     def test_unquoting_mixed_case(self):
    696         # Test unquoting on mixed-case hex digits in the percent-escapes
    697         given = '%Ab%eA'
    698         expect = '\xab\xea'
    699         result = urllib.unquote(given)
    700         self.assertEqual(expect, result, "using unquote(): %r != %r"
    701                          % (expect, result))
    702 
    703     def test_unquoting_parts(self):
    704         # Make sure unquoting works when have non-quoted characters
    705         # interspersed
    706         given = 'ab%sd' % hexescape('c')
    707         expect = "abcd"
    708         result = urllib.unquote(given)
    709         self.assertEqual(expect, result,
    710                          "using quote(): %s != %s" % (expect, result))
    711         result = urllib.unquote_plus(given)
    712         self.assertEqual(expect, result,
    713                          "using unquote_plus(): %s != %s" % (expect, result))
    714 
    715     def test_unquoting_plus(self):
    716         # Test difference between unquote() and unquote_plus()
    717         given = "are+there+spaces..."
    718         expect = given
    719         result = urllib.unquote(given)
    720         self.assertEqual(expect, result,
    721                          "using unquote(): %s != %s" % (expect, result))
    722         expect = given.replace('+', ' ')
    723         result = urllib.unquote_plus(given)
    724         self.assertEqual(expect, result,
    725                          "using unquote_plus(): %s != %s" % (expect, result))
    726 
    727     def test_unquote_with_unicode(self):
    728         r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
    729         self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
    730 
    731 class urlencode_Tests(unittest.TestCase):
    732     """Tests for urlencode()"""
    733 
    734     def help_inputtype(self, given, test_type):
    735         """Helper method for testing different input types.
    736 
    737         'given' must lead to only the pairs:
    738             * 1st, 1
    739             * 2nd, 2
    740             * 3rd, 3
    741 
    742         Test cannot assume anything about order.  Docs make no guarantee and
    743         have possible dictionary input.
    744 
    745         """
    746         expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
    747         result = urllib.urlencode(given)
    748         for expected in expect_somewhere:
    749             self.assertIn(expected, result,
    750                          "testing %s: %s not found in %s" %
    751                          (test_type, expected, result))
    752         self.assertEqual(result.count('&'), 2,
    753                          "testing %s: expected 2 '&'s; got %s" %
    754                          (test_type, result.count('&')))
    755         amp_location = result.index('&')
    756         on_amp_left = result[amp_location - 1]
    757         on_amp_right = result[amp_location + 1]
    758         self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
    759                      "testing %s: '&' not located in proper place in %s" %
    760                      (test_type, result))
    761         self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
    762                          "testing %s: "
    763                          "unexpected number of characters: %s != %s" %
    764                          (test_type, len(result), (5 * 3) + 2))
    765 
    766     def test_using_mapping(self):
    767         # Test passing in a mapping object as an argument.
    768         self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
    769                             "using dict as input type")
    770 
    771     def test_using_sequence(self):
    772         # Test passing in a sequence of two-item sequences as an argument.
    773         self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
    774                             "using sequence of two-item tuples as input")
    775 
    776     def test_quoting(self):
    777         # Make sure keys and values are quoted using quote_plus()
    778         given = {"&":"="}
    779         expect = "%s=%s" % (hexescape('&'), hexescape('='))
    780         result = urllib.urlencode(given)
    781         self.assertEqual(expect, result)
    782         given = {"key name":"A bunch of pluses"}
    783         expect = "key+name=A+bunch+of+pluses"
    784         result = urllib.urlencode(given)
    785         self.assertEqual(expect, result)
    786 
    787     def test_doseq(self):
    788         # Test that passing True for 'doseq' parameter works correctly
    789         given = {'sequence':['1', '2', '3']}
    790         expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
    791         result = urllib.urlencode(given)
    792         self.assertEqual(expect, result)
    793         result = urllib.urlencode(given, True)
    794         for value in given["sequence"]:
    795             expect = "sequence=%s" % value
    796             self.assertIn(expect, result)
    797         self.assertEqual(result.count('&'), 2,
    798                          "Expected 2 '&'s, got %s" % result.count('&'))
    799 
    800 class Pathname_Tests(unittest.TestCase):
    801     """Test pathname2url() and url2pathname()"""
    802 
    803     def test_basic(self):
    804         # Make sure simple tests pass
    805         expected_path = os.path.join("parts", "of", "a", "path")
    806         expected_url = "parts/of/a/path"
    807         result = urllib.pathname2url(expected_path)
    808         self.assertEqual(expected_url, result,
    809                          "pathname2url() failed; %s != %s" %
    810                          (result, expected_url))
    811         result = urllib.url2pathname(expected_url)
    812         self.assertEqual(expected_path, result,
    813                          "url2pathame() failed; %s != %s" %
    814                          (result, expected_path))
    815 
    816     def test_quoting(self):
    817         # Test automatic quoting and unquoting works for pathnam2url() and
    818         # url2pathname() respectively
    819         given = os.path.join("needs", "quot=ing", "here")
    820         expect = "needs/%s/here" % urllib.quote("quot=ing")
    821         result = urllib.pathname2url(given)
    822         self.assertEqual(expect, result,
    823                          "pathname2url() failed; %s != %s" %
    824                          (expect, result))
    825         expect = given
    826         result = urllib.url2pathname(result)
    827         self.assertEqual(expect, result,
    828                          "url2pathname() failed; %s != %s" %
    829                          (expect, result))
    830         given = os.path.join("make sure", "using_quote")
    831         expect = "%s/using_quote" % urllib.quote("make sure")
    832         result = urllib.pathname2url(given)
    833         self.assertEqual(expect, result,
    834                          "pathname2url() failed; %s != %s" %
    835                          (expect, result))
    836         given = "make+sure/using_unquote"
    837         expect = os.path.join("make+sure", "using_unquote")
    838         result = urllib.url2pathname(given)
    839         self.assertEqual(expect, result,
    840                          "url2pathname() failed; %s != %s" %
    841                          (expect, result))
    842 
    843     @unittest.skipUnless(sys.platform == 'win32',
    844                          'test specific to the nturl2path library')
    845     def test_ntpath(self):
    846         given = ('/C:/', '///C:/', '/C|//')
    847         expect = 'C:\\'
    848         for url in given:
    849             result = urllib.url2pathname(url)
    850             self.assertEqual(expect, result,
    851                              'nturl2path.url2pathname() failed; %s != %s' %
    852                              (expect, result))
    853         given = '///C|/path'
    854         expect = 'C:\\path'
    855         result = urllib.url2pathname(given)
    856         self.assertEqual(expect, result,
    857                          'nturl2path.url2pathname() failed; %s != %s' %
    858                          (expect, result))
    859 
    860 class Utility_Tests(unittest.TestCase):
    861     """Testcase to test the various utility functions in the urllib."""
    862     # In Python 3 this test class is moved to test_urlparse.
    863 
    864     def test_splittype(self):
    865         splittype = urllib.splittype
    866         self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
    867         self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
    868         self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
    869         self.assertEqual(splittype('type:'), ('type', ''))
    870         self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
    871 
    872     def test_splithost(self):
    873         splithost = urllib.splithost
    874         self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
    875                          ('www.example.org:80', '/foo/bar/baz.html'))
    876         self.assertEqual(splithost('//www.example.org:80'),
    877                          ('www.example.org:80', ''))
    878         self.assertEqual(splithost('/foo/bar/baz.html'),
    879                          (None, '/foo/bar/baz.html'))
    880 
    881     def test_splituser(self):
    882         splituser = urllib.splituser
    883         self.assertEqual(splituser('User:Pass@www.python.org:080'),
    884                          ('User:Pass', 'www.python.org:080'))
    885         self.assertEqual(splituser('@www.python.org:080'),
    886                          ('', 'www.python.org:080'))
    887         self.assertEqual(splituser('www.python.org:080'),
    888                          (None, 'www.python.org:080'))
    889         self.assertEqual(splituser('User:Pass@'),
    890                          ('User:Pass', ''))
    891         self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
    892                          ('User@example.com:Pass', 'www.python.org:080'))
    893 
    894     def test_splitpasswd(self):
    895         # Some of the password examples are not sensible, but it is added to
    896         # confirming to RFC2617 and addressing issue4675.
    897         splitpasswd = urllib.splitpasswd
    898         self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
    899         self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
    900         self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
    901         self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
    902         self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
    903         self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
    904         self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
    905         self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
    906         self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
    907         self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
    908         self.assertEqual(splitpasswd('user:'), ('user', ''))
    909         self.assertEqual(splitpasswd('user'), ('user', None))
    910         self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
    911 
    912     def test_splitport(self):
    913         splitport = urllib.splitport
    914         self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
    915         self.assertEqual(splitport('parrot'), ('parrot', None))
    916         self.assertEqual(splitport('parrot:'), ('parrot', None))
    917         self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
    918         self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
    919         self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
    920         self.assertEqual(splitport('[::1]'), ('[::1]', None))
    921         self.assertEqual(splitport(':88'), ('', '88'))
    922 
    923     def test_splitnport(self):
    924         splitnport = urllib.splitnport
    925         self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
    926         self.assertEqual(splitnport('parrot'), ('parrot', -1))
    927         self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
    928         self.assertEqual(splitnport('parrot:'), ('parrot', -1))
    929         self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
    930         self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
    931         self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
    932         self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
    933         self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
    934 
    935     def test_splitquery(self):
    936         # Normal cases are exercised by other tests; ensure that we also
    937         # catch cases with no port specified (testcase ensuring coverage)
    938         splitquery = urllib.splitquery
    939         self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
    940                          ('http://python.org/fake', 'foo=bar'))
    941         self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
    942                          ('http://python.org/fake?foo=bar', ''))
    943         self.assertEqual(splitquery('http://python.org/fake'),
    944                          ('http://python.org/fake', None))
    945         self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
    946 
    947     def test_splittag(self):
    948         splittag = urllib.splittag
    949         self.assertEqual(splittag('http://example.com?foo=bar#baz'),
    950                          ('http://example.com?foo=bar', 'baz'))
    951         self.assertEqual(splittag('http://example.com?foo=bar#'),
    952                          ('http://example.com?foo=bar', ''))
    953         self.assertEqual(splittag('#baz'), ('', 'baz'))
    954         self.assertEqual(splittag('http://example.com?foo=bar'),
    955                          ('http://example.com?foo=bar', None))
    956         self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
    957                          ('http://example.com?foo=bar#baz', 'boo'))
    958 
    959     def test_splitattr(self):
    960         splitattr = urllib.splitattr
    961         self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
    962                          ('/path', ['attr1=value1', 'attr2=value2']))
    963         self.assertEqual(splitattr('/path;'), ('/path', ['']))
    964         self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
    965                          ('', ['attr1=value1', 'attr2=value2']))
    966         self.assertEqual(splitattr('/path'), ('/path', []))
    967 
    968     def test_splitvalue(self):
    969         # Normal cases are exercised by other tests; test pathological cases
    970         # with no key/value pairs. (testcase ensuring coverage)
    971         splitvalue = urllib.splitvalue
    972         self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
    973         self.assertEqual(splitvalue('foo='), ('foo', ''))
    974         self.assertEqual(splitvalue('=bar'), ('', 'bar'))
    975         self.assertEqual(splitvalue('foobar'), ('foobar', None))
    976         self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
    977 
    978     def test_toBytes(self):
    979         result = urllib.toBytes(u'http://www.python.org')
    980         self.assertEqual(result, 'http://www.python.org')
    981         self.assertRaises(UnicodeError, urllib.toBytes,
    982                           test_support.u(r'http://www.python.org/medi\u00e6val'))
    983 
    984     def test_unwrap(self):
    985         url = urllib.unwrap('<URL:type://host/path>')
    986         self.assertEqual(url, 'type://host/path')
    987 
    988 
    989 class URLopener_Tests(unittest.TestCase):
    990     """Testcase to test the open method of URLopener class."""
    991 
    992     def test_quoted_open(self):
    993         class DummyURLopener(urllib.URLopener):
    994             def open_spam(self, url):
    995                 return url
    996 
    997         self.assertEqual(DummyURLopener().open(
    998             'spam://example/ /'),'//example/%20/')
    999 
   1000         # test the safe characters are not quoted by urlopen
   1001         self.assertEqual(DummyURLopener().open(
   1002             "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
   1003             "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
   1004 
   1005 
   1006 # Just commented them out.
   1007 # Can't really tell why keep failing in windows and sparc.
   1008 # Everywhere else they work ok, but on those machines, sometimes
   1009 # fail in one of the tests, sometimes in other. I have a linux, and
   1010 # the tests go ok.
   1011 # If anybody has one of the problematic environments, please help!
   1012 # .   Facundo
   1013 #
   1014 # def server(evt):
   1015 #     import socket, time
   1016 #     serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   1017 #     serv.settimeout(3)
   1018 #     serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
   1019 #     serv.bind(("", 9093))
   1020 #     serv.listen(5)
   1021 #     try:
   1022 #         conn, addr = serv.accept()
   1023 #         conn.send("1 Hola mundo\n")
   1024 #         cantdata = 0
   1025 #         while cantdata < 13:
   1026 #             data = conn.recv(13-cantdata)
   1027 #             cantdata += len(data)
   1028 #             time.sleep(.3)
   1029 #         conn.send("2 No more lines\n")
   1030 #         conn.close()
   1031 #     except socket.timeout:
   1032 #         pass
   1033 #     finally:
   1034 #         serv.close()
   1035 #         evt.set()
   1036 #
   1037 # class FTPWrapperTests(unittest.TestCase):
   1038 #
   1039 #     def setUp(self):
   1040 #         import ftplib, time, threading
   1041 #         ftplib.FTP.port = 9093
   1042 #         self.evt = threading.Event()
   1043 #         threading.Thread(target=server, args=(self.evt,)).start()
   1044 #         time.sleep(.1)
   1045 #
   1046 #     def tearDown(self):
   1047 #         self.evt.wait()
   1048 #
   1049 #     def testBasic(self):
   1050 #         # connects
   1051 #         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
   1052 #         ftp.close()
   1053 #
   1054 #     def testTimeoutNone(self):
   1055 #         # global default timeout is ignored
   1056 #         import socket
   1057 #         self.assertIsNone(socket.getdefaulttimeout())
   1058 #         socket.setdefaulttimeout(30)
   1059 #         try:
   1060 #             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
   1061 #         finally:
   1062 #             socket.setdefaulttimeout(None)
   1063 #         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
   1064 #         ftp.close()
   1065 #
   1066 #     def testTimeoutDefault(self):
   1067 #         # global default timeout is used
   1068 #         import socket
   1069 #         self.assertIsNone(socket.getdefaulttimeout())
   1070 #         socket.setdefaulttimeout(30)
   1071 #         try:
   1072 #             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
   1073 #         finally:
   1074 #             socket.setdefaulttimeout(None)
   1075 #         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
   1076 #         ftp.close()
   1077 #
   1078 #     def testTimeoutValue(self):
   1079 #         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
   1080 #                                 timeout=30)
   1081 #         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
   1082 #         ftp.close()
   1083 
   1084 
   1085 
   1086 def test_main():
   1087     import warnings
   1088     with warnings.catch_warnings():
   1089         warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0",
   1090                                 DeprecationWarning)
   1091         test_support.run_unittest(
   1092             urlopen_FileTests,
   1093             urlopen_HttpTests,
   1094             urlretrieve_FileTests,
   1095             urlretrieve_HttpTests,
   1096             ProxyTests,
   1097             QuotingTests,
   1098             UnquotingTests,
   1099             urlencode_Tests,
   1100             Pathname_Tests,
   1101             Utility_Tests,
   1102             URLopener_Tests,
   1103             ProxyTests,
   1104             ProxyTests_withOrderedEnv,
   1105             #FTPWrapperTests,
   1106         )
   1107 
   1108 
   1109 
   1110 if __name__ == '__main__':
   1111     test_main()
   1112