Home | History | Annotate | Download | only in test
      1 import sys
      2 import unicodedata
      3 import unittest
      4 import urllib.parse
      5 
      6 RFC1808_BASE = "http://a/b/c/d;p?q#f"
      7 RFC2396_BASE = "http://a/b/c/d;p?q"
      8 RFC3986_BASE = 'http://a/b/c/d;p?q'
      9 SIMPLE_BASE  = 'http://a/b/c/d'
     10 
     11 # Each parse_qsl testcase is a two-tuple that contains
     12 # a string with the query and a list with the expected result.
     13 
     14 parse_qsl_test_cases = [
     15     ("", []),
     16     ("&", []),
     17     ("&&", []),
     18     ("=", [('', '')]),
     19     ("=a", [('', 'a')]),
     20     ("a", [('a', '')]),
     21     ("a=", [('a', '')]),
     22     ("&a=b", [('a', 'b')]),
     23     ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
     24     ("a=1&a=2", [('a', '1'), ('a', '2')]),
     25     (b"", []),
     26     (b"&", []),
     27     (b"&&", []),
     28     (b"=", [(b'', b'')]),
     29     (b"=a", [(b'', b'a')]),
     30     (b"a", [(b'a', b'')]),
     31     (b"a=", [(b'a', b'')]),
     32     (b"&a=b", [(b'a', b'b')]),
     33     (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
     34     (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
     35     (";", []),
     36     (";;", []),
     37     (";a=b", [('a', 'b')]),
     38     ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
     39     ("a=1;a=2", [('a', '1'), ('a', '2')]),
     40     (b";", []),
     41     (b";;", []),
     42     (b";a=b", [(b'a', b'b')]),
     43     (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
     44     (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
     45 ]
     46 
     47 # Each parse_qs testcase is a two-tuple that contains
     48 # a string with the query and a dictionary with the expected result.
     49 
     50 parse_qs_test_cases = [
     51     ("", {}),
     52     ("&", {}),
     53     ("&&", {}),
     54     ("=", {'': ['']}),
     55     ("=a", {'': ['a']}),
     56     ("a", {'a': ['']}),
     57     ("a=", {'a': ['']}),
     58     ("&a=b", {'a': ['b']}),
     59     ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
     60     ("a=1&a=2", {'a': ['1', '2']}),
     61     (b"", {}),
     62     (b"&", {}),
     63     (b"&&", {}),
     64     (b"=", {b'': [b'']}),
     65     (b"=a", {b'': [b'a']}),
     66     (b"a", {b'a': [b'']}),
     67     (b"a=", {b'a': [b'']}),
     68     (b"&a=b", {b'a': [b'b']}),
     69     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
     70     (b"a=1&a=2", {b'a': [b'1', b'2']}),
     71     (";", {}),
     72     (";;", {}),
     73     (";a=b", {'a': ['b']}),
     74     ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
     75     ("a=1;a=2", {'a': ['1', '2']}),
     76     (b";", {}),
     77     (b";;", {}),
     78     (b";a=b", {b'a': [b'b']}),
     79     (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
     80     (b"a=1;a=2", {b'a': [b'1', b'2']}),
     81 ]
     82 
     83 class UrlParseTestCase(unittest.TestCase):
     84 
     85     def checkRoundtrips(self, url, parsed, split):
     86         result = urllib.parse.urlparse(url)
     87         self.assertEqual(result, parsed)
     88         t = (result.scheme, result.netloc, result.path,
     89              result.params, result.query, result.fragment)
     90         self.assertEqual(t, parsed)
     91         # put it back together and it should be the same
     92         result2 = urllib.parse.urlunparse(result)
     93         self.assertEqual(result2, url)
     94         self.assertEqual(result2, result.geturl())
     95 
     96         # the result of geturl() is a fixpoint; we can always parse it
     97         # again to get the same result:
     98         result3 = urllib.parse.urlparse(result.geturl())
     99         self.assertEqual(result3.geturl(), result.geturl())
    100         self.assertEqual(result3,          result)
    101         self.assertEqual(result3.scheme,   result.scheme)
    102         self.assertEqual(result3.netloc,   result.netloc)
    103         self.assertEqual(result3.path,     result.path)
    104         self.assertEqual(result3.params,   result.params)
    105         self.assertEqual(result3.query,    result.query)
    106         self.assertEqual(result3.fragment, result.fragment)
    107         self.assertEqual(result3.username, result.username)
    108         self.assertEqual(result3.password, result.password)
    109         self.assertEqual(result3.hostname, result.hostname)
    110         self.assertEqual(result3.port,     result.port)
    111 
    112         # check the roundtrip using urlsplit() as well
    113         result = urllib.parse.urlsplit(url)
    114         self.assertEqual(result, split)
    115         t = (result.scheme, result.netloc, result.path,
    116              result.query, result.fragment)
    117         self.assertEqual(t, split)
    118         result2 = urllib.parse.urlunsplit(result)
    119         self.assertEqual(result2, url)
    120         self.assertEqual(result2, result.geturl())
    121 
    122         # check the fixpoint property of re-parsing the result of geturl()
    123         result3 = urllib.parse.urlsplit(result.geturl())
    124         self.assertEqual(result3.geturl(), result.geturl())
    125         self.assertEqual(result3,          result)
    126         self.assertEqual(result3.scheme,   result.scheme)
    127         self.assertEqual(result3.netloc,   result.netloc)
    128         self.assertEqual(result3.path,     result.path)
    129         self.assertEqual(result3.query,    result.query)
    130         self.assertEqual(result3.fragment, result.fragment)
    131         self.assertEqual(result3.username, result.username)
    132         self.assertEqual(result3.password, result.password)
    133         self.assertEqual(result3.hostname, result.hostname)
    134         self.assertEqual(result3.port,     result.port)
    135 
    136     def test_qsl(self):
    137         for orig, expect in parse_qsl_test_cases:
    138             result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
    139             self.assertEqual(result, expect, "Error parsing %r" % orig)
    140             expect_without_blanks = [v for v in expect if len(v[1])]
    141             result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
    142             self.assertEqual(result, expect_without_blanks,
    143                             "Error parsing %r" % orig)
    144 
    145     def test_qs(self):
    146         for orig, expect in parse_qs_test_cases:
    147             result = urllib.parse.parse_qs(orig, keep_blank_values=True)
    148             self.assertEqual(result, expect, "Error parsing %r" % orig)
    149             expect_without_blanks = {v: expect[v]
    150                                      for v in expect if len(expect[v][0])}
    151             result = urllib.parse.parse_qs(orig, keep_blank_values=False)
    152             self.assertEqual(result, expect_without_blanks,
    153                             "Error parsing %r" % orig)
    154 
    155     def test_roundtrips(self):
    156         str_cases = [
    157             ('file:///tmp/junk.txt',
    158              ('file', '', '/tmp/junk.txt', '', '', ''),
    159              ('file', '', '/tmp/junk.txt', '', '')),
    160             ('imap://mail.python.org/mbox1',
    161              ('imap', 'mail.python.org', '/mbox1', '', '', ''),
    162              ('imap', 'mail.python.org', '/mbox1', '', '')),
    163             ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
    164              ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
    165               '', '', ''),
    166              ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
    167               '', '')),
    168             ('nfs://server/path/to/file.txt',
    169              ('nfs', 'server', '/path/to/file.txt', '', '', ''),
    170              ('nfs', 'server', '/path/to/file.txt', '', '')),
    171             ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
    172              ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
    173               '', '', ''),
    174              ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
    175               '', '')),
    176             ('git+ssh://git (at] github.com/user/project.git',
    177             ('git+ssh', 'git (at] github.com','/user/project.git',
    178              '','',''),
    179             ('git+ssh', 'git (at] github.com','/user/project.git',
    180              '', '')),
    181             ]
    182         def _encode(t):
    183             return (t[0].encode('ascii'),
    184                     tuple(x.encode('ascii') for x in t[1]),
    185                     tuple(x.encode('ascii') for x in t[2]))
    186         bytes_cases = [_encode(x) for x in str_cases]
    187         for url, parsed, split in str_cases + bytes_cases:
    188             self.checkRoundtrips(url, parsed, split)
    189 
    190     def test_http_roundtrips(self):
    191         # urllib.parse.urlsplit treats 'http:' as an optimized special case,
    192         # so we test both 'http:' and 'https:' in all the following.
    193         # Three cheers for white box knowledge!
    194         str_cases = [
    195             ('://www.python.org',
    196              ('www.python.org', '', '', '', ''),
    197              ('www.python.org', '', '', '')),
    198             ('://www.python.org#abc',
    199              ('www.python.org', '', '', '', 'abc'),
    200              ('www.python.org', '', '', 'abc')),
    201             ('://www.python.org?q=abc',
    202              ('www.python.org', '', '', 'q=abc', ''),
    203              ('www.python.org', '', 'q=abc', '')),
    204             ('://www.python.org/#abc',
    205              ('www.python.org', '/', '', '', 'abc'),
    206              ('www.python.org', '/', '', 'abc')),
    207             ('://a/b/c/d;p?q#f',
    208              ('a', '/b/c/d', 'p', 'q', 'f'),
    209              ('a', '/b/c/d;p', 'q', 'f')),
    210             ]
    211         def _encode(t):
    212             return (t[0].encode('ascii'),
    213                     tuple(x.encode('ascii') for x in t[1]),
    214                     tuple(x.encode('ascii') for x in t[2]))
    215         bytes_cases = [_encode(x) for x in str_cases]
    216         str_schemes = ('http', 'https')
    217         bytes_schemes = (b'http', b'https')
    218         str_tests = str_schemes, str_cases
    219         bytes_tests = bytes_schemes, bytes_cases
    220         for schemes, test_cases in (str_tests, bytes_tests):
    221             for scheme in schemes:
    222                 for url, parsed, split in test_cases:
    223                     url = scheme + url
    224                     parsed = (scheme,) + parsed
    225                     split = (scheme,) + split
    226                     self.checkRoundtrips(url, parsed, split)
    227 
    228     def checkJoin(self, base, relurl, expected):
    229         str_components = (base, relurl, expected)
    230         self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
    231         bytes_components = baseb, relurlb, expectedb = [
    232                             x.encode('ascii') for x in str_components]
    233         self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
    234 
    235     def test_unparse_parse(self):
    236         str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
    237         bytes_cases = [x.encode('ascii') for x in str_cases]
    238         for u in str_cases + bytes_cases:
    239             self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
    240             self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
    241 
    242     def test_RFC1808(self):
    243         # "normal" cases from RFC 1808:
    244         self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
    245         self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
    246         self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
    247         self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
    248         self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
    249         self.checkJoin(RFC1808_BASE, '//g', 'http://g')
    250         self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
    251         self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
    252         self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
    253         self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
    254         self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
    255         self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
    256         self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
    257         self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
    258         self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
    259         self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
    260         self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
    261         self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
    262         self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
    263         self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
    264         self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
    265         self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
    266 
    267         # "abnormal" cases from RFC 1808:
    268         self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
    269         self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
    270         self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
    271         self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
    272         self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
    273         self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
    274         self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
    275         self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
    276         self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
    277 
    278         # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
    279         # so we'll not actually run these tests (which expect 1808 behavior).
    280         #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
    281         #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
    282 
    283         # XXX: The following tests are no longer compatible with RFC3986
    284         # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
    285         # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
    286         # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
    287         # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
    288 
    289 
    290     def test_RFC2368(self):
    291         # Issue 11467: path that starts with a number is not parsed correctly
    292         self.assertEqual(urllib.parse.urlparse('mailto:1337 (at] example.org'),
    293                 ('mailto', '', '1337 (at] example.org', '', '', ''))
    294 
    295     def test_RFC2396(self):
    296         # cases from RFC 2396
    297 
    298         self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
    299         self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
    300         self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
    301         self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
    302         self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
    303         self.checkJoin(RFC2396_BASE, '//g', 'http://g')
    304         self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
    305         self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
    306         self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
    307         self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
    308         self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
    309         self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
    310         self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
    311         self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
    312         self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
    313         self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
    314         self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
    315         self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
    316         self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
    317         self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
    318         self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
    319         self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
    320         self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
    321         self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
    322         self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
    323         self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
    324         self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
    325         self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
    326         self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
    327         self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
    328         self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
    329         self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
    330         self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
    331         self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
    332         self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
    333 
    334         # XXX: The following tests are no longer compatible with RFC3986
    335         # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
    336         # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
    337         # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
    338         # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
    339 
    340     def test_RFC3986(self):
    341         self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
    342         self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
    343         self.checkJoin(RFC3986_BASE, 'g:h','g:h')
    344         self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
    345         self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
    346         self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
    347         self.checkJoin(RFC3986_BASE, '/g','http://a/g')
    348         self.checkJoin(RFC3986_BASE, '//g','http://g')
    349         self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
    350         self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
    351         self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
    352         self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
    353         self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
    354         self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
    355         self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
    356         self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
    357         self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
    358         self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
    359         self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
    360         self.checkJoin(RFC3986_BASE, '..','http://a/b/')
    361         self.checkJoin(RFC3986_BASE, '../','http://a/b/')
    362         self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
    363         self.checkJoin(RFC3986_BASE, '../..','http://a/')
    364         self.checkJoin(RFC3986_BASE, '../../','http://a/')
    365         self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
    366         self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
    367 
    368         # Abnormal Examples
    369 
    370         # The 'abnormal scenarios' are incompatible with RFC2986 parsing
    371         # Tests are here for reference.
    372 
    373         self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
    374         self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
    375         self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
    376         self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
    377         self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
    378         self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
    379         self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
    380         self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
    381         self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
    382         self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
    383         self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
    384         self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
    385         self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
    386         self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
    387         self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
    388         self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
    389         self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
    390         self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
    391         #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
    392         self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
    393 
    394         # Test for issue9721
    395         self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
    396 
    397     def test_urljoins(self):
    398         self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
    399         self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
    400         self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
    401         self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
    402         self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
    403         self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
    404         self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
    405         self.checkJoin(SIMPLE_BASE, '//g','http://g')
    406         self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
    407         self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
    408         self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
    409         self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
    410         self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
    411         self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
    412         self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
    413         self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
    414         self.checkJoin(SIMPLE_BASE, '../..','http://a/')
    415         self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
    416         self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
    417         self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
    418         self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
    419         self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
    420         self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
    421         self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
    422         self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
    423         self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
    424         self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
    425         self.checkJoin('http:///', '..','http:///')
    426         self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
    427         self.checkJoin('', 'http://a/./g', 'http://a/./g')
    428         self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
    429         self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
    430         self.checkJoin('ws://a/b','g','ws://a/g')
    431         self.checkJoin('wss://a/b','g','wss://a/g')
    432 
    433         # XXX: The following tests are no longer compatible with RFC3986
    434         # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
    435         # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
    436 
    437         # test for issue22118 duplicate slashes
    438         self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
    439 
    440         # Non-RFC-defined tests, covering variations of base and trailing
    441         # slashes
    442         self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
    443         self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
    444         self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
    445         self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
    446         self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
    447         self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
    448 
    449         # issue 23703: don't duplicate filename
    450         self.checkJoin('a', 'b', 'b')
    451 
    452     def test_RFC2732(self):
    453         str_cases = [
    454             ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
    455             ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
    456             ('http://[::1]:5432/foo/', '::1', 5432),
    457             ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
    458             ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
    459             ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
    460              'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
    461             ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
    462             ('http://[::ffff:12.34.56.78]:5432/foo/',
    463              '::ffff:12.34.56.78', 5432),
    464             ('http://Test.python.org/foo/', 'test.python.org', None),
    465             ('http://12.34.56.78/foo/', '12.34.56.78', None),
    466             ('http://[::1]/foo/', '::1', None),
    467             ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
    468             ('http://[dead:beef::]/foo/', 'dead:beef::', None),
    469             ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
    470              'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
    471             ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
    472             ('http://[::ffff:12.34.56.78]/foo/',
    473              '::ffff:12.34.56.78', None),
    474             ('http://Test.python.org:/foo/', 'test.python.org', None),
    475             ('http://12.34.56.78:/foo/', '12.34.56.78', None),
    476             ('http://[::1]:/foo/', '::1', None),
    477             ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
    478             ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
    479             ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
    480              'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
    481             ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
    482             ('http://[::ffff:12.34.56.78]:/foo/',
    483              '::ffff:12.34.56.78', None),
    484             ]
    485         def _encode(t):
    486             return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
    487         bytes_cases = [_encode(x) for x in str_cases]
    488         for url, hostname, port in str_cases + bytes_cases:
    489             urlparsed = urllib.parse.urlparse(url)
    490             self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
    491 
    492         str_cases = [
    493                 'http://::12.34.56.78]/',
    494                 'http://[::1/foo/',
    495                 'ftp://[::1/foo/bad]/bad',
    496                 'http://[::1/foo/bad]/bad',
    497                 'http://[::ffff:12.34.56.78']
    498         bytes_cases = [x.encode('ascii') for x in str_cases]
    499         for invalid_url in str_cases + bytes_cases:
    500             self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
    501 
    502     def test_urldefrag(self):
    503         str_cases = [
    504             ('http://python.org#frag', 'http://python.org', 'frag'),
    505             ('http://python.org', 'http://python.org', ''),
    506             ('http://python.org/#frag', 'http://python.org/', 'frag'),
    507             ('http://python.org/', 'http://python.org/', ''),
    508             ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
    509             ('http://python.org/?q', 'http://python.org/?q', ''),
    510             ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
    511             ('http://python.org/p?q', 'http://python.org/p?q', ''),
    512             (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
    513             (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
    514         ]
    515         def _encode(t):
    516             return type(t)(x.encode('ascii') for x in t)
    517         bytes_cases = [_encode(x) for x in str_cases]
    518         for url, defrag, frag in str_cases + bytes_cases:
    519             result = urllib.parse.urldefrag(url)
    520             self.assertEqual(result.geturl(), url)
    521             self.assertEqual(result, (defrag, frag))
    522             self.assertEqual(result.url, defrag)
    523             self.assertEqual(result.fragment, frag)
    524 
    525     def test_urlsplit_scoped_IPv6(self):
    526         p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
    527         self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
    528         self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
    529 
    530         p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
    531         self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
    532         self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
    533 
    534     def test_urlsplit_attributes(self):
    535         url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
    536         p = urllib.parse.urlsplit(url)
    537         self.assertEqual(p.scheme, "http")
    538         self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
    539         self.assertEqual(p.path, "/doc/")
    540         self.assertEqual(p.query, "")
    541         self.assertEqual(p.fragment, "frag")
    542         self.assertEqual(p.username, None)
    543         self.assertEqual(p.password, None)
    544         self.assertEqual(p.hostname, "www.python.org")
    545         self.assertEqual(p.port, None)
    546         # geturl() won't return exactly the original URL in this case
    547         # since the scheme is always case-normalized
    548         # We handle this by ignoring the first 4 characters of the URL
    549         self.assertEqual(p.geturl()[4:], url[4:])
    550 
    551         url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
    552         p = urllib.parse.urlsplit(url)
    553         self.assertEqual(p.scheme, "http")
    554         self.assertEqual(p.netloc, "User:Pass (at] www.python.org:080")
    555         self.assertEqual(p.path, "/doc/")
    556         self.assertEqual(p.query, "query=yes")
    557         self.assertEqual(p.fragment, "frag")
    558         self.assertEqual(p.username, "User")
    559         self.assertEqual(p.password, "Pass")
    560         self.assertEqual(p.hostname, "www.python.org")
    561         self.assertEqual(p.port, 80)
    562         self.assertEqual(p.geturl(), url)
    563 
    564         # Addressing issue1698, which suggests Username can contain
    565         # "@" characters.  Though not RFC compliant, many ftp sites allow
    566         # and request email addresses as usernames.
    567 
    568         url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
    569         p = urllib.parse.urlsplit(url)
    570         self.assertEqual(p.scheme, "http")
    571         self.assertEqual(p.netloc, "User (at] example.com:Pass (at] www.python.org:080")
    572         self.assertEqual(p.path, "/doc/")
    573         self.assertEqual(p.query, "query=yes")
    574         self.assertEqual(p.fragment, "frag")
    575         self.assertEqual(p.username, "User (at] example.com")
    576         self.assertEqual(p.password, "Pass")
    577         self.assertEqual(p.hostname, "www.python.org")
    578         self.assertEqual(p.port, 80)
    579         self.assertEqual(p.geturl(), url)
    580 
    581         # And check them all again, only with bytes this time
    582         url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
    583         p = urllib.parse.urlsplit(url)
    584         self.assertEqual(p.scheme, b"http")
    585         self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
    586         self.assertEqual(p.path, b"/doc/")
    587         self.assertEqual(p.query, b"")
    588         self.assertEqual(p.fragment, b"frag")
    589         self.assertEqual(p.username, None)
    590         self.assertEqual(p.password, None)
    591         self.assertEqual(p.hostname, b"www.python.org")
    592         self.assertEqual(p.port, None)
    593         self.assertEqual(p.geturl()[4:], url[4:])
    594 
    595         url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
    596         p = urllib.parse.urlsplit(url)
    597         self.assertEqual(p.scheme, b"http")
    598         self.assertEqual(p.netloc, b"User:Pass (at] www.python.org:080")
    599         self.assertEqual(p.path, b"/doc/")
    600         self.assertEqual(p.query, b"query=yes")
    601         self.assertEqual(p.fragment, b"frag")
    602         self.assertEqual(p.username, b"User")
    603         self.assertEqual(p.password, b"Pass")
    604         self.assertEqual(p.hostname, b"www.python.org")
    605         self.assertEqual(p.port, 80)
    606         self.assertEqual(p.geturl(), url)
    607 
    608         url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
    609         p = urllib.parse.urlsplit(url)
    610         self.assertEqual(p.scheme, b"http")
    611         self.assertEqual(p.netloc, b"User (at] example.com:Pass (at] www.python.org:080")
    612         self.assertEqual(p.path, b"/doc/")
    613         self.assertEqual(p.query, b"query=yes")
    614         self.assertEqual(p.fragment, b"frag")
    615         self.assertEqual(p.username, b"User (at] example.com")
    616         self.assertEqual(p.password, b"Pass")
    617         self.assertEqual(p.hostname, b"www.python.org")
    618         self.assertEqual(p.port, 80)
    619         self.assertEqual(p.geturl(), url)
    620 
    621         # Verify an illegal port raises ValueError
    622         url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
    623         p = urllib.parse.urlsplit(url)
    624         with self.assertRaisesRegex(ValueError, "out of range"):
    625             p.port
    626 
    627     def test_attributes_bad_port(self):
    628         """Check handling of invalid ports."""
    629         for bytes in (False, True):
    630             for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
    631                 for port in ("foo", "1.5", "-1", "0x10"):
    632                     with self.subTest(bytes=bytes, parse=parse, port=port):
    633                         netloc = "www.example.net:" + port
    634                         url = "http://" + netloc
    635                         if bytes:
    636                             netloc = netloc.encode("ascii")
    637                             url = url.encode("ascii")
    638                         p = parse(url)
    639                         self.assertEqual(p.netloc, netloc)
    640                         with self.assertRaises(ValueError):
    641                             p.port
    642 
    643     def test_attributes_without_netloc(self):
    644         # This example is straight from RFC 3261.  It looks like it
    645         # should allow the username, hostname, and port to be filled
    646         # in, but doesn't.  Since it's a URI and doesn't use the
    647         # scheme://netloc syntax, the netloc and related attributes
    648         # should be left empty.
    649         uri = "sip:alice (at] atlanta.com;maddr=239.255.255.1;ttl=15"
    650         p = urllib.parse.urlsplit(uri)
    651         self.assertEqual(p.netloc, "")
    652         self.assertEqual(p.username, None)
    653         self.assertEqual(p.password, None)
    654         self.assertEqual(p.hostname, None)
    655         self.assertEqual(p.port, None)
    656         self.assertEqual(p.geturl(), uri)
    657 
    658         p = urllib.parse.urlparse(uri)
    659         self.assertEqual(p.netloc, "")
    660         self.assertEqual(p.username, None)
    661         self.assertEqual(p.password, None)
    662         self.assertEqual(p.hostname, None)
    663         self.assertEqual(p.port, None)
    664         self.assertEqual(p.geturl(), uri)
    665 
    666         # You guessed it, repeating the test with bytes input
    667         uri = b"sip:alice (at] atlanta.com;maddr=239.255.255.1;ttl=15"
    668         p = urllib.parse.urlsplit(uri)
    669         self.assertEqual(p.netloc, b"")
    670         self.assertEqual(p.username, None)
    671         self.assertEqual(p.password, None)
    672         self.assertEqual(p.hostname, None)
    673         self.assertEqual(p.port, None)
    674         self.assertEqual(p.geturl(), uri)
    675 
    676         p = urllib.parse.urlparse(uri)
    677         self.assertEqual(p.netloc, b"")
    678         self.assertEqual(p.username, None)
    679         self.assertEqual(p.password, None)
    680         self.assertEqual(p.hostname, None)
    681         self.assertEqual(p.port, None)
    682         self.assertEqual(p.geturl(), uri)
    683 
    684     def test_noslash(self):
    685         # Issue 1637: http://foo.com?query is legal
    686         self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
    687                          ('http', 'example.com', '', '', 'blahblah=/foo', ''))
    688         self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
    689                          (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
    690 
    691     def test_withoutscheme(self):
    692         # Test urlparse without scheme
    693         # Issue 754016: urlparse goes wrong with IP:port without scheme
    694         # RFC 1808 specifies that netloc should start with //, urlparse expects
    695         # the same, otherwise it classifies the portion of url as path.
    696         self.assertEqual(urllib.parse.urlparse("path"),
    697                 ('','','path','','',''))
    698         self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
    699                 ('','www.python.org:80','','','',''))
    700         self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
    701                 ('http','www.python.org:80','','','',''))
    702         # Repeat for bytes input
    703         self.assertEqual(urllib.parse.urlparse(b"path"),
    704                 (b'',b'',b'path',b'',b'',b''))
    705         self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
    706                 (b'',b'www.python.org:80',b'',b'',b'',b''))
    707         self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
    708                 (b'http',b'www.python.org:80',b'',b'',b'',b''))
    709 
    710     def test_portseparator(self):
    711         # Issue 754016 makes changes for port separator ':' from scheme separator
    712         self.assertEqual(urllib.parse.urlparse("path:80"),
    713                 ('','','path:80','','',''))
    714         self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
    715         self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
    716         self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
    717                 ('http','www.python.org:80','','','',''))
    718         # As usual, need to check bytes input as well
    719         self.assertEqual(urllib.parse.urlparse(b"path:80"),
    720                 (b'',b'',b'path:80',b'',b'',b''))
    721         self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
    722         self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
    723         self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
    724                 (b'http',b'www.python.org:80',b'',b'',b'',b''))
    725 
    726     def test_usingsys(self):
    727         # Issue 3314: sys module is used in the error
    728         self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
    729 
    730     def test_anyscheme(self):
    731         # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
    732         self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
    733                          ('s3', 'foo.com', '/stuff', '', '', ''))
    734         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
    735                          ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
    736         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
    737                          ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
    738         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
    739                          ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
    740 
    741         # And for bytes...
    742         self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
    743                          (b's3', b'foo.com', b'/stuff', b'', b'', b''))
    744         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
    745                          (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
    746         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
    747                          (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
    748         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
    749                          (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
    750 
    751     def test_default_scheme(self):
    752         # Exercise the scheme parameter of urlparse() and urlsplit()
    753         for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
    754             with self.subTest(function=func):
    755                 result = func("http://example.net/", "ftp")
    756                 self.assertEqual(result.scheme, "http")
    757                 result = func(b"http://example.net/", b"ftp")
    758                 self.assertEqual(result.scheme, b"http")
    759                 self.assertEqual(func("path", "ftp").scheme, "ftp")
    760                 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
    761                 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
    762                 self.assertEqual(func("path").scheme, "")
    763                 self.assertEqual(func(b"path").scheme, b"")
    764                 self.assertEqual(func(b"path", "").scheme, b"")
    765 
    766     def test_parse_fragments(self):
    767         # Exercise the allow_fragments parameter of urlparse() and urlsplit()
    768         tests = (
    769             ("http:#frag", "path", "frag"),
    770             ("//example.net#frag", "path", "frag"),
    771             ("index.html#frag", "path", "frag"),
    772             (";a=b#frag", "params", "frag"),
    773             ("?a=b#frag", "query", "frag"),
    774             ("#frag", "path", "frag"),
    775             ("abc#@frag", "path", "@frag"),
    776             ("//abc#@frag", "path", "@frag"),
    777             ("//abc:80#@frag", "path", "@frag"),
    778             ("//abc#@frag:80", "path", "@frag:80"),
    779         )
    780         for url, attr, expected_frag in tests:
    781             for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
    782                 if attr == "params" and func is urllib.parse.urlsplit:
    783                     attr = "path"
    784                 with self.subTest(url=url, function=func):
    785                     result = func(url, allow_fragments=False)
    786                     self.assertEqual(result.fragment, "")
    787                     self.assertTrue(
    788                             getattr(result, attr).endswith("#" + expected_frag))
    789                     self.assertEqual(func(url, "", False).fragment, "")
    790 
    791                     result = func(url, allow_fragments=True)
    792                     self.assertEqual(result.fragment, expected_frag)
    793                     self.assertFalse(
    794                             getattr(result, attr).endswith(expected_frag))
    795                     self.assertEqual(func(url, "", True).fragment,
    796                                      expected_frag)
    797                     self.assertEqual(func(url).fragment, expected_frag)
    798 
    799     def test_mixed_types_rejected(self):
    800         # Several functions that process either strings or ASCII encoded bytes
    801         # accept multiple arguments. Check they reject mixed type input
    802         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    803             urllib.parse.urlparse("www.python.org", b"http")
    804         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    805             urllib.parse.urlparse(b"www.python.org", "http")
    806         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    807             urllib.parse.urlsplit("www.python.org", b"http")
    808         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    809             urllib.parse.urlsplit(b"www.python.org", "http")
    810         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    811             urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
    812         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    813             urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
    814         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    815             urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
    816         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    817             urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
    818         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    819             urllib.parse.urljoin("http://python.org", b"http://python.org")
    820         with self.assertRaisesRegex(TypeError, "Cannot mix str"):
    821             urllib.parse.urljoin(b"http://python.org", "http://python.org")
    822 
    823     def _check_result_type(self, str_type):
    824         num_args = len(str_type._fields)
    825         bytes_type = str_type._encoded_counterpart
    826         self.assertIs(bytes_type._decoded_counterpart, str_type)
    827         str_args = ('',) * num_args
    828         bytes_args = (b'',) * num_args
    829         str_result = str_type(*str_args)
    830         bytes_result = bytes_type(*bytes_args)
    831         encoding = 'ascii'
    832         errors = 'strict'
    833         self.assertEqual(str_result, str_args)
    834         self.assertEqual(bytes_result.decode(), str_args)
    835         self.assertEqual(bytes_result.decode(), str_result)
    836         self.assertEqual(bytes_result.decode(encoding), str_args)
    837         self.assertEqual(bytes_result.decode(encoding), str_result)
    838         self.assertEqual(bytes_result.decode(encoding, errors), str_args)
    839         self.assertEqual(bytes_result.decode(encoding, errors), str_result)
    840         self.assertEqual(bytes_result, bytes_args)
    841         self.assertEqual(str_result.encode(), bytes_args)
    842         self.assertEqual(str_result.encode(), bytes_result)
    843         self.assertEqual(str_result.encode(encoding), bytes_args)
    844         self.assertEqual(str_result.encode(encoding), bytes_result)
    845         self.assertEqual(str_result.encode(encoding, errors), bytes_args)
    846         self.assertEqual(str_result.encode(encoding, errors), bytes_result)
    847 
    848     def test_result_pairs(self):
    849         # Check encoding and decoding between result pairs
    850         result_types = [
    851           urllib.parse.DefragResult,
    852           urllib.parse.SplitResult,
    853           urllib.parse.ParseResult,
    854         ]
    855         for result_type in result_types:
    856             self._check_result_type(result_type)
    857 
    858     def test_parse_qs_encoding(self):
    859         result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
    860         self.assertEqual(result, {'key': ['\u0141\xE9']})
    861         result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
    862         self.assertEqual(result, {'key': ['\u0141\xE9']})
    863         result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
    864         self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
    865         result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
    866         self.assertEqual(result, {'key': ['\u0141\ufffd-']})
    867         result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
    868                                                           errors="ignore")
    869         self.assertEqual(result, {'key': ['\u0141-']})
    870 
    871     def test_parse_qsl_encoding(self):
    872         result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
    873         self.assertEqual(result, [('key', '\u0141\xE9')])
    874         result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
    875         self.assertEqual(result, [('key', '\u0141\xE9')])
    876         result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
    877         self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
    878         result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
    879         self.assertEqual(result, [('key', '\u0141\ufffd-')])
    880         result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
    881                                                           errors="ignore")
    882         self.assertEqual(result, [('key', '\u0141-')])
    883 
    884     def test_parse_qsl_max_num_fields(self):
    885         with self.assertRaises(ValueError):
    886             urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
    887         with self.assertRaises(ValueError):
    888             urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
    889         urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
    890 
    891     def test_urlencode_sequences(self):
    892         # Other tests incidentally urlencode things; test non-covered cases:
    893         # Sequence and object values.
    894         result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
    895         # we cannot rely on ordering here
    896         assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
    897 
    898         class Trivial:
    899             def __str__(self):
    900                 return 'trivial'
    901 
    902         result = urllib.parse.urlencode({'a': Trivial()}, True)
    903         self.assertEqual(result, 'a=trivial')
    904 
    905     def test_urlencode_quote_via(self):
    906         result = urllib.parse.urlencode({'a': 'some value'})
    907         self.assertEqual(result, "a=some+value")
    908         result = urllib.parse.urlencode({'a': 'some value/another'},
    909                                         quote_via=urllib.parse.quote)
    910         self.assertEqual(result, "a=some%20value%2Fanother")
    911         result = urllib.parse.urlencode({'a': 'some value/another'},
    912                                         safe='/', quote_via=urllib.parse.quote)
    913         self.assertEqual(result, "a=some%20value/another")
    914 
    915     def test_quote_from_bytes(self):
    916         self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
    917         result = urllib.parse.quote_from_bytes(b'archaeological arcana')
    918         self.assertEqual(result, 'archaeological%20arcana')
    919         result = urllib.parse.quote_from_bytes(b'')
    920         self.assertEqual(result, '')
    921 
    922     def test_unquote_to_bytes(self):
    923         result = urllib.parse.unquote_to_bytes('abc%20def')
    924         self.assertEqual(result, b'abc def')
    925         result = urllib.parse.unquote_to_bytes('')
    926         self.assertEqual(result, b'')
    927 
    928     def test_quote_errors(self):
    929         self.assertRaises(TypeError, urllib.parse.quote, b'foo',
    930                           encoding='utf-8')
    931         self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
    932 
    933     def test_issue14072(self):
    934         p1 = urllib.parse.urlsplit('tel:+31-641044153')
    935         self.assertEqual(p1.scheme, 'tel')
    936         self.assertEqual(p1.path, '+31-641044153')
    937         p2 = urllib.parse.urlsplit('tel:+31641044153')
    938         self.assertEqual(p2.scheme, 'tel')
    939         self.assertEqual(p2.path, '+31641044153')
    940         # assert the behavior for urlparse
    941         p1 = urllib.parse.urlparse('tel:+31-641044153')
    942         self.assertEqual(p1.scheme, 'tel')
    943         self.assertEqual(p1.path, '+31-641044153')
    944         p2 = urllib.parse.urlparse('tel:+31641044153')
    945         self.assertEqual(p2.scheme, 'tel')
    946         self.assertEqual(p2.path, '+31641044153')
    947 
    948     def test_telurl_params(self):
    949         p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
    950         self.assertEqual(p1.scheme, 'tel')
    951         self.assertEqual(p1.path, '123-4')
    952         self.assertEqual(p1.params, 'phone-context=+1-650-516')
    953 
    954         p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
    955         self.assertEqual(p1.scheme, 'tel')
    956         self.assertEqual(p1.path, '+1-201-555-0123')
    957         self.assertEqual(p1.params, '')
    958 
    959         p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
    960         self.assertEqual(p1.scheme, 'tel')
    961         self.assertEqual(p1.path, '7042')
    962         self.assertEqual(p1.params, 'phone-context=example.com')
    963 
    964         p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
    965         self.assertEqual(p1.scheme, 'tel')
    966         self.assertEqual(p1.path, '863-1234')
    967         self.assertEqual(p1.params, 'phone-context=+1-914-555')
    968 
    969     def test_Quoter_repr(self):
    970         quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
    971         self.assertIn('Quoter', repr(quoter))
    972 
    973     def test_all(self):
    974         expected = []
    975         undocumented = {
    976             'splitattr', 'splithost', 'splitnport', 'splitpasswd',
    977             'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
    978             'splitvalue',
    979             'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
    980         }
    981         for name in dir(urllib.parse):
    982             if name.startswith('_') or name in undocumented:
    983                 continue
    984             object = getattr(urllib.parse, name)
    985             if getattr(object, '__module__', None) == 'urllib.parse':
    986                 expected.append(name)
    987         self.assertCountEqual(urllib.parse.__all__, expected)
    988 
    989     def test_urlsplit_normalization(self):
    990         # Certain characters should never occur in the netloc,
    991         # including under normalization.
    992         # Ensure that ALL of them are detected and cause an error
    993         illegal_chars = '/:#?@'
    994         hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
    995         denorm_chars = [
    996             c for c in map(chr, range(128, sys.maxunicode))
    997             if (hex_chars & set(unicodedata.decomposition(c).split()))
    998             and c not in illegal_chars
    999         ]
   1000         # Sanity check that we found at least one such character
   1001         self.assertIn('\u2100', denorm_chars)
   1002         self.assertIn('\uFF03', denorm_chars)
   1003 
   1004         for scheme in ["http", "https", "ftp"]:
   1005             for c in denorm_chars:
   1006                 url = "{}://netloc{}false.netloc/path".format(scheme, c)
   1007                 with self.subTest(url=url, char='{:04X}'.format(ord(c))):
   1008                     with self.assertRaises(ValueError):
   1009                         urllib.parse.urlsplit(url)
   1010 
   1011 class Utility_Tests(unittest.TestCase):
   1012     """Testcase to test the various utility functions in the urllib."""
   1013     # In Python 2 this test class was in test_urllib.
   1014 
   1015     def test_splittype(self):
   1016         splittype = urllib.parse.splittype
   1017         self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
   1018         self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
   1019         self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
   1020         self.assertEqual(splittype('type:'), ('type', ''))
   1021         self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
   1022 
   1023     def test_splithost(self):
   1024         splithost = urllib.parse.splithost
   1025         self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
   1026                          ('www.example.org:80', '/foo/bar/baz.html'))
   1027         self.assertEqual(splithost('//www.example.org:80'),
   1028                          ('www.example.org:80', ''))
   1029         self.assertEqual(splithost('/foo/bar/baz.html'),
   1030                          (None, '/foo/bar/baz.html'))
   1031 
   1032         # bpo-30500: # starts a fragment.
   1033         self.assertEqual(splithost('//127.0.0.1#@host.com'),
   1034                          ('127.0.0.1', '/#@host.com'))
   1035         self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
   1036                          ('127.0.0.1', '/#@host.com:80'))
   1037         self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
   1038                          ('127.0.0.1:80', '/#@host.com'))
   1039 
   1040         # Empty host is returned as empty string.
   1041         self.assertEqual(splithost("///file"),
   1042                          ('', '/file'))
   1043 
   1044         # Trailing semicolon, question mark and hash symbol are kept.
   1045         self.assertEqual(splithost("//example.net/file;"),
   1046                          ('example.net', '/file;'))
   1047         self.assertEqual(splithost("//example.net/file?"),
   1048                          ('example.net', '/file?'))
   1049         self.assertEqual(splithost("//example.net/file#"),
   1050                          ('example.net', '/file#'))
   1051 
   1052     def test_splituser(self):
   1053         splituser = urllib.parse.splituser
   1054         self.assertEqual(splituser('User:Pass (at] www.python.org:080'),
   1055                          ('User:Pass', 'www.python.org:080'))
   1056         self.assertEqual(splituser('@www.python.org:080'),
   1057                          ('', 'www.python.org:080'))
   1058         self.assertEqual(splituser('www.python.org:080'),
   1059                          (None, 'www.python.org:080'))
   1060         self.assertEqual(splituser('User:Pass@'),
   1061                          ('User:Pass', ''))
   1062         self.assertEqual(splituser('User (at] example.com:Pass (at] www.python.org:080'),
   1063                          ('User (at] example.com:Pass', 'www.python.org:080'))
   1064 
   1065     def test_splitpasswd(self):
   1066         # Some of the password examples are not sensible, but it is added to
   1067         # confirming to RFC2617 and addressing issue4675.
   1068         splitpasswd = urllib.parse.splitpasswd
   1069         self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
   1070         self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
   1071         self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
   1072         self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
   1073         self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
   1074         self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
   1075         self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
   1076         self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
   1077         self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
   1078         self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
   1079         self.assertEqual(splitpasswd('user:'), ('user', ''))
   1080         self.assertEqual(splitpasswd('user'), ('user', None))
   1081         self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
   1082 
   1083     def test_splitport(self):
   1084         splitport = urllib.parse.splitport
   1085         self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
   1086         self.assertEqual(splitport('parrot'), ('parrot', None))
   1087         self.assertEqual(splitport('parrot:'), ('parrot', None))
   1088         self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
   1089         self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
   1090         self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
   1091         self.assertEqual(splitport('[::1]'), ('[::1]', None))
   1092         self.assertEqual(splitport(':88'), ('', '88'))
   1093 
   1094     def test_splitnport(self):
   1095         splitnport = urllib.parse.splitnport
   1096         self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
   1097         self.assertEqual(splitnport('parrot'), ('parrot', -1))
   1098         self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
   1099         self.assertEqual(splitnport('parrot:'), ('parrot', -1))
   1100         self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
   1101         self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
   1102         self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
   1103         self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
   1104         self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
   1105 
   1106     def test_splitquery(self):
   1107         # Normal cases are exercised by other tests; ensure that we also
   1108         # catch cases with no port specified (testcase ensuring coverage)
   1109         splitquery = urllib.parse.splitquery
   1110         self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
   1111                          ('http://python.org/fake', 'foo=bar'))
   1112         self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
   1113                          ('http://python.org/fake?foo=bar', ''))
   1114         self.assertEqual(splitquery('http://python.org/fake'),
   1115                          ('http://python.org/fake', None))
   1116         self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
   1117 
   1118     def test_splittag(self):
   1119         splittag = urllib.parse.splittag
   1120         self.assertEqual(splittag('http://example.com?foo=bar#baz'),
   1121                          ('http://example.com?foo=bar', 'baz'))
   1122         self.assertEqual(splittag('http://example.com?foo=bar#'),
   1123                          ('http://example.com?foo=bar', ''))
   1124         self.assertEqual(splittag('#baz'), ('', 'baz'))
   1125         self.assertEqual(splittag('http://example.com?foo=bar'),
   1126                          ('http://example.com?foo=bar', None))
   1127         self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
   1128                          ('http://example.com?foo=bar#baz', 'boo'))
   1129 
   1130     def test_splitattr(self):
   1131         splitattr = urllib.parse.splitattr
   1132         self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
   1133                          ('/path', ['attr1=value1', 'attr2=value2']))
   1134         self.assertEqual(splitattr('/path;'), ('/path', ['']))
   1135         self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
   1136                          ('', ['attr1=value1', 'attr2=value2']))
   1137         self.assertEqual(splitattr('/path'), ('/path', []))
   1138 
   1139     def test_splitvalue(self):
   1140         # Normal cases are exercised by other tests; test pathological cases
   1141         # with no key/value pairs. (testcase ensuring coverage)
   1142         splitvalue = urllib.parse.splitvalue
   1143         self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
   1144         self.assertEqual(splitvalue('foo='), ('foo', ''))
   1145         self.assertEqual(splitvalue('=bar'), ('', 'bar'))
   1146         self.assertEqual(splitvalue('foobar'), ('foobar', None))
   1147         self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
   1148 
   1149     def test_to_bytes(self):
   1150         result = urllib.parse.to_bytes('http://www.python.org')
   1151         self.assertEqual(result, 'http://www.python.org')
   1152         self.assertRaises(UnicodeError, urllib.parse.to_bytes,
   1153                           'http://www.python.org/medi\u00e6val')
   1154 
   1155     def test_unwrap(self):
   1156         url = urllib.parse.unwrap('<URL:type://host/path>')
   1157         self.assertEqual(url, 'type://host/path')
   1158 
   1159 
   1160 if __name__ == "__main__":
   1161     unittest.main()
   1162