Home | History | Annotate | Download | only in test
      1 from test import test_support
      2 import unittest
      3 import urlparse
      4 
      5 RFC1808_BASE = "http://a/b/c/d;p?q#f"
      6 RFC2396_BASE = "http://a/b/c/d;p?q"
      7 RFC3986_BASE = 'http://a/b/c/d;p?q'
      8 SIMPLE_BASE  = 'http://a/b/c/d'
      9 
     10 # A list of test cases.  Each test case is a two-tuple that contains
     11 # a string with the query and a dictionary with the expected result.
     12 
     13 parse_qsl_test_cases = [
     14     ("", []),
     15     ("&", []),
     16     ("&&", []),
     17     ("=", [('', '')]),
     18     ("=a", [('', 'a')]),
     19     ("a", [('a', '')]),
     20     ("a=", [('a', '')]),
     21     ("a=", [('a', '')]),
     22     ("&a=b", [('a', 'b')]),
     23     ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
     24     ("a=1&a=2", [('a', '1'), ('a', '2')]),
     25     (";", []),
     26     (";;", []),
     27     (";a=b", [('a', 'b')]),
     28     ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
     29     ("a=1;a=2", [('a', '1'), ('a', '2')]),
     30     (b";", []),
     31     (b";;", []),
     32     (b";a=b", [(b'a', b'b')]),
     33     (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
     34     (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
     35 ]
     36 
     37 parse_qs_test_cases = [
     38     ("", {}),
     39     ("&", {}),
     40     ("&&", {}),
     41     ("=", {'': ['']}),
     42     ("=a", {'': ['a']}),
     43     ("a", {'a': ['']}),
     44     ("a=", {'a': ['']}),
     45     ("&a=b", {'a': ['b']}),
     46     ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
     47     ("a=1&a=2", {'a': ['1', '2']}),
     48     (b"", {}),
     49     (b"&", {}),
     50     (b"&&", {}),
     51     (b"=", {b'': [b'']}),
     52     (b"=a", {b'': [b'a']}),
     53     (b"a", {b'a': [b'']}),
     54     (b"a=", {b'a': [b'']}),
     55     (b"&a=b", {b'a': [b'b']}),
     56     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
     57     (b"a=1&a=2", {b'a': [b'1', b'2']}),
     58     (";", {}),
     59     (";;", {}),
     60     (";a=b", {'a': ['b']}),
     61     ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
     62     ("a=1;a=2", {'a': ['1', '2']}),
     63     (b";", {}),
     64     (b";;", {}),
     65     (b";a=b", {b'a': [b'b']}),
     66     (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
     67     (b"a=1;a=2", {b'a': [b'1', b'2']}),
     68 ]
     69 
     70 class UrlParseTestCase(unittest.TestCase):
     71 
     72     def checkRoundtrips(self, url, parsed, split):
     73         result = urlparse.urlparse(url)
     74         self.assertEqual(result, parsed)
     75         t = (result.scheme, result.netloc, result.path,
     76              result.params, result.query, result.fragment)
     77         self.assertEqual(t, parsed)
     78         # put it back together and it should be the same
     79         result2 = urlparse.urlunparse(result)
     80         self.assertEqual(result2, url)
     81         self.assertEqual(result2, result.geturl())
     82 
     83         # the result of geturl() is a fixpoint; we can always parse it
     84         # again to get the same result:
     85         result3 = urlparse.urlparse(result.geturl())
     86         self.assertEqual(result3.geturl(), result.geturl())
     87         self.assertEqual(result3,          result)
     88         self.assertEqual(result3.scheme,   result.scheme)
     89         self.assertEqual(result3.netloc,   result.netloc)
     90         self.assertEqual(result3.path,     result.path)
     91         self.assertEqual(result3.params,   result.params)
     92         self.assertEqual(result3.query,    result.query)
     93         self.assertEqual(result3.fragment, result.fragment)
     94         self.assertEqual(result3.username, result.username)
     95         self.assertEqual(result3.password, result.password)
     96         self.assertEqual(result3.hostname, result.hostname)
     97         self.assertEqual(result3.port,     result.port)
     98 
     99         # check the roundtrip using urlsplit() as well
    100         result = urlparse.urlsplit(url)
    101         self.assertEqual(result, split)
    102         t = (result.scheme, result.netloc, result.path,
    103              result.query, result.fragment)
    104         self.assertEqual(t, split)
    105         result2 = urlparse.urlunsplit(result)
    106         self.assertEqual(result2, url)
    107         self.assertEqual(result2, result.geturl())
    108 
    109         # check the fixpoint property of re-parsing the result of geturl()
    110         result3 = urlparse.urlsplit(result.geturl())
    111         self.assertEqual(result3.geturl(), result.geturl())
    112         self.assertEqual(result3,          result)
    113         self.assertEqual(result3.scheme,   result.scheme)
    114         self.assertEqual(result3.netloc,   result.netloc)
    115         self.assertEqual(result3.path,     result.path)
    116         self.assertEqual(result3.query,    result.query)
    117         self.assertEqual(result3.fragment, result.fragment)
    118         self.assertEqual(result3.username, result.username)
    119         self.assertEqual(result3.password, result.password)
    120         self.assertEqual(result3.hostname, result.hostname)
    121         self.assertEqual(result3.port,     result.port)
    122 
    123     def test_qsl(self):
    124         for orig, expect in parse_qsl_test_cases:
    125             result = urlparse.parse_qsl(orig, keep_blank_values=True)
    126             self.assertEqual(result, expect, "Error parsing %r" % orig)
    127             expect_without_blanks = [v for v in expect if len(v[1])]
    128             result = urlparse.parse_qsl(orig, keep_blank_values=False)
    129             self.assertEqual(result, expect_without_blanks,
    130                     "Error parsing %r" % orig)
    131 
    132     def test_qs(self):
    133         for orig, expect in parse_qs_test_cases:
    134             result = urlparse.parse_qs(orig, keep_blank_values=True)
    135             self.assertEqual(result, expect, "Error parsing %r" % orig)
    136             expect_without_blanks = dict(
    137                     [(v, expect[v]) for v in expect if len(expect[v][0])])
    138             result = urlparse.parse_qs(orig, keep_blank_values=False)
    139             self.assertEqual(result, expect_without_blanks,
    140                     "Error parsing %r" % orig)
    141 
    142     def test_roundtrips(self):
    143         testcases = [
    144             ('file:///tmp/junk.txt',
    145              ('file', '', '/tmp/junk.txt', '', '', ''),
    146              ('file', '', '/tmp/junk.txt', '', '')),
    147             ('imap://mail.python.org/mbox1',
    148              ('imap', 'mail.python.org', '/mbox1', '', '', ''),
    149              ('imap', 'mail.python.org', '/mbox1', '', '')),
    150             ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
    151              ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
    152               '', '', ''),
    153              ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
    154               '', '')),
    155             ('nfs://server/path/to/file.txt',
    156              ('nfs', 'server', '/path/to/file.txt',  '', '', ''),
    157              ('nfs', 'server', '/path/to/file.txt', '', '')),
    158             ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
    159              ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
    160               '', '', ''),
    161              ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
    162               '', '')),
    163             ('git+ssh://git (at] github.com/user/project.git',
    164             ('git+ssh', 'git (at] github.com','/user/project.git',
    165              '','',''),
    166             ('git+ssh', 'git (at] github.com','/user/project.git',
    167              '', ''))
    168             ]
    169         for url, parsed, split in testcases:
    170             self.checkRoundtrips(url, parsed, split)
    171 
    172     def test_http_roundtrips(self):
    173         # urlparse.urlsplit treats 'http:' as an optimized special case,
    174         # so we test both 'http:' and 'https:' in all the following.
    175         # Three cheers for white box knowledge!
    176         testcases = [
    177             ('://www.python.org',
    178              ('www.python.org', '', '', '', ''),
    179              ('www.python.org', '', '', '')),
    180             ('://www.python.org#abc',
    181              ('www.python.org', '', '', '', 'abc'),
    182              ('www.python.org', '', '', 'abc')),
    183             ('://www.python.org?q=abc',
    184              ('www.python.org', '', '', 'q=abc', ''),
    185              ('www.python.org', '', 'q=abc', '')),
    186             ('://www.python.org/#abc',
    187              ('www.python.org', '/', '', '', 'abc'),
    188              ('www.python.org', '/', '', 'abc')),
    189             ('://a/b/c/d;p?q#f',
    190              ('a', '/b/c/d', 'p', 'q', 'f'),
    191              ('a', '/b/c/d;p', 'q', 'f')),
    192             ]
    193         for scheme in ('http', 'https'):
    194             for url, parsed, split in testcases:
    195                 url = scheme + url
    196                 parsed = (scheme,) + parsed
    197                 split = (scheme,) + split
    198                 self.checkRoundtrips(url, parsed, split)
    199 
    200     def checkJoin(self, base, relurl, expected):
    201         self.assertEqual(urlparse.urljoin(base, relurl), expected,
    202                          (base, relurl, expected))
    203 
    204     def test_unparse_parse(self):
    205         for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
    206             self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
    207             self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
    208 
    209     def test_RFC1808(self):
    210         # "normal" cases from RFC 1808:
    211         self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
    212         self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
    213         self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
    214         self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
    215         self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
    216         self.checkJoin(RFC1808_BASE, '//g', 'http://g')
    217         self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
    218         self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
    219         self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
    220         self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
    221         self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
    222         self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
    223         self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
    224         self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
    225         self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
    226         self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
    227         self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
    228         self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
    229         self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
    230         self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
    231         self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
    232         self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
    233 
    234         # "abnormal" cases from RFC 1808:
    235         self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
    236         self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
    237         self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
    238         self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
    239         self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
    240         self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
    241         self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
    242         self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
    243         self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
    244         self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
    245         self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
    246         self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
    247         self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
    248 
    249         # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
    250         # so we'll not actually run these tests (which expect 1808 behavior).
    251         #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
    252         #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
    253 
    254     def test_RFC2368(self):
    255         # Issue 11467: path that starts with a number is not parsed correctly
    256         self.assertEqual(urlparse.urlparse('mailto:1337 (at] example.org'),
    257                 ('mailto', '', '1337 (at] example.org', '', '', ''))
    258 
    259     def test_RFC2396(self):
    260         # cases from RFC 2396
    261         self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
    262         self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
    263         self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
    264         self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
    265         self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
    266         self.checkJoin(RFC2396_BASE, '//g', 'http://g')
    267         self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
    268         self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
    269         self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
    270         self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
    271         self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
    272         self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
    273         self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
    274         self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
    275         self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
    276         self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
    277         self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
    278         self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
    279         self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
    280         self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
    281         self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
    282         self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
    283         self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
    284         self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
    285         self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
    286         self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
    287         self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
    288         self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
    289         self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
    290         self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
    291         self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
    292         self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
    293         self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
    294         self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
    295         self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
    296         self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
    297         self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
    298         self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
    299         self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
    300 
    301     def test_RFC3986(self):
    302         # Test cases from RFC3986
    303         self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
    304         self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
    305         self.checkJoin(RFC3986_BASE, 'g:h','g:h')
    306         self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
    307         self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
    308         self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
    309         self.checkJoin(RFC3986_BASE, '/g','http://a/g')
    310         self.checkJoin(RFC3986_BASE, '//g','http://g')
    311         self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
    312         self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
    313         self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
    314         self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
    315         self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
    316         self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
    317         self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
    318         self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
    319         self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
    320         self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
    321         self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
    322         self.checkJoin(RFC3986_BASE, '..','http://a/b/')
    323         self.checkJoin(RFC3986_BASE, '../','http://a/b/')
    324         self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
    325         self.checkJoin(RFC3986_BASE, '../..','http://a/')
    326         self.checkJoin(RFC3986_BASE, '../../','http://a/')
    327         self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
    328 
    329         #Abnormal Examples
    330 
    331         # The 'abnormal scenarios' are incompatible with RFC2986 parsing
    332         # Tests are here for reference.
    333 
    334         #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
    335         #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
    336         #self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
    337         #self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
    338 
    339         self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
    340         self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
    341         self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
    342         self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
    343         self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
    344         self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
    345         self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
    346         self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
    347         self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
    348         self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
    349         self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
    350         self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
    351         self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
    352         self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
    353         #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
    354         self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') # relaxed parser
    355 
    356         # Test for issue9721
    357         self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
    358 
    359     def test_urljoins(self):
    360         self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
    361         self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
    362         self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
    363         self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
    364         self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
    365         self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
    366         self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
    367         self.checkJoin(SIMPLE_BASE, '//g','http://g')
    368         self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
    369         self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
    370         self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
    371         self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
    372         self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
    373         self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
    374         self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
    375         self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
    376         self.checkJoin(SIMPLE_BASE, '../..','http://a/')
    377         self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
    378         self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
    379         self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
    380         self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
    381         self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
    382         self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
    383         self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
    384         self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
    385         self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
    386         self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
    387         self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
    388         self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
    389         self.checkJoin('http:///', '..','http:///')
    390         self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
    391         self.checkJoin('', 'http://a/./g', 'http://a/./g')
    392         self.checkJoin('svn://pathtorepo/dir1','dir2','svn://pathtorepo/dir2')
    393         self.checkJoin('svn+ssh://pathtorepo/dir1','dir2','svn+ssh://pathtorepo/dir2')
    394 
    395     def test_RFC2732(self):
    396         for url, hostname, port in [
    397             ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
    398             ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
    399             ('http://[::1]:5432/foo/', '::1', 5432),
    400             ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
    401             ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
    402             ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
    403              'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
    404             ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
    405             ('http://[::ffff:12.34.56.78]:5432/foo/',
    406              '::ffff:12.34.56.78', 5432),
    407             ('http://Test.python.org/foo/', 'test.python.org', None),
    408             ('http://12.34.56.78/foo/', '12.34.56.78', None),
    409             ('http://[::1]/foo/', '::1', None),
    410             ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
    411             ('http://[dead:beef::]/foo/', 'dead:beef::', None),
    412             ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
    413              'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
    414             ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
    415             ('http://[::ffff:12.34.56.78]/foo/',
    416              '::ffff:12.34.56.78', None),
    417             ('http://Test.python.org:/foo/', 'test.python.org', None),
    418             ('http://12.34.56.78:/foo/', '12.34.56.78', None),
    419             ('http://[::1]:/foo/', '::1', None),
    420             ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
    421             ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
    422             ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
    423              'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
    424             ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
    425             ('http://[::ffff:12.34.56.78]:/foo/',
    426              '::ffff:12.34.56.78', None),
    427             ]:
    428             urlparsed = urlparse.urlparse(url)
    429             self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
    430 
    431         for invalid_url in [
    432                 'http://::12.34.56.78]/',
    433                 'http://[::1/foo/',
    434                 'ftp://[::1/foo/bad]/bad',
    435                 'http://[::1/foo/bad]/bad',
    436                 'http://[::ffff:12.34.56.78']:
    437             self.assertRaises(ValueError, urlparse.urlparse, invalid_url)
    438 
    439     def test_urldefrag(self):
    440         for url, defrag, frag in [
    441             ('http://python.org#frag', 'http://python.org', 'frag'),
    442             ('http://python.org', 'http://python.org', ''),
    443             ('http://python.org/#frag', 'http://python.org/', 'frag'),
    444             ('http://python.org/', 'http://python.org/', ''),
    445             ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
    446             ('http://python.org/?q', 'http://python.org/?q', ''),
    447             ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
    448             ('http://python.org/p?q', 'http://python.org/p?q', ''),
    449             (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
    450             (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
    451             ]:
    452             self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
    453 
    454     def test_urlsplit_attributes(self):
    455         url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
    456         p = urlparse.urlsplit(url)
    457         self.assertEqual(p.scheme, "http")
    458         self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
    459         self.assertEqual(p.path, "/doc/")
    460         self.assertEqual(p.query, "")
    461         self.assertEqual(p.fragment, "frag")
    462         self.assertEqual(p.username, None)
    463         self.assertEqual(p.password, None)
    464         self.assertEqual(p.hostname, "www.python.org")
    465         self.assertEqual(p.port, None)
    466         # geturl() won't return exactly the original URL in this case
    467         # since the scheme is always case-normalized
    468         #self.assertEqual(p.geturl(), url)
    469 
    470         url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
    471         p = urlparse.urlsplit(url)
    472         self.assertEqual(p.scheme, "http")
    473         self.assertEqual(p.netloc, "User:Pass (at] www.python.org:080")
    474         self.assertEqual(p.path, "/doc/")
    475         self.assertEqual(p.query, "query=yes")
    476         self.assertEqual(p.fragment, "frag")
    477         self.assertEqual(p.username, "User")
    478         self.assertEqual(p.password, "Pass")
    479         self.assertEqual(p.hostname, "www.python.org")
    480         self.assertEqual(p.port, 80)
    481         self.assertEqual(p.geturl(), url)
    482 
    483         # Addressing issue1698, which suggests Username can contain
    484         # "@" characters.  Though not RFC compliant, many ftp sites allow
    485         # and request email addresses as usernames.
    486 
    487         url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
    488         p = urlparse.urlsplit(url)
    489         self.assertEqual(p.scheme, "http")
    490         self.assertEqual(p.netloc, "User (at] example.com:Pass (at] www.python.org:080")
    491         self.assertEqual(p.path, "/doc/")
    492         self.assertEqual(p.query, "query=yes")
    493         self.assertEqual(p.fragment, "frag")
    494         self.assertEqual(p.username, "User (at] example.com")
    495         self.assertEqual(p.password, "Pass")
    496         self.assertEqual(p.hostname, "www.python.org")
    497         self.assertEqual(p.port, 80)
    498         self.assertEqual(p.geturl(), url)
    499 
    500         # Verify an illegal port of value greater than 65535 is set as None
    501         url = "http://www.python.org:65536"
    502         p = urlparse.urlsplit(url)
    503         self.assertEqual(p.port, None)
    504 
    505     def test_issue14072(self):
    506         p1 = urlparse.urlsplit('tel:+31-641044153')
    507         self.assertEqual(p1.scheme, 'tel')
    508         self.assertEqual(p1.path, '+31-641044153')
    509 
    510         p2 = urlparse.urlsplit('tel:+31641044153')
    511         self.assertEqual(p2.scheme, 'tel')
    512         self.assertEqual(p2.path, '+31641044153')
    513 
    514         # Assert for urlparse
    515         p1 = urlparse.urlparse('tel:+31-641044153')
    516         self.assertEqual(p1.scheme, 'tel')
    517         self.assertEqual(p1.path, '+31-641044153')
    518 
    519         p2 = urlparse.urlparse('tel:+31641044153')
    520         self.assertEqual(p2.scheme, 'tel')
    521         self.assertEqual(p2.path, '+31641044153')
    522 
    523 
    524     def test_telurl_params(self):
    525         p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516')
    526         self.assertEqual(p1.scheme, 'tel')
    527         self.assertEqual(p1.path, '123-4')
    528         self.assertEqual(p1.params, 'phone-context=+1-650-516')
    529 
    530         p1 = urlparse.urlparse('tel:+1-201-555-0123')
    531         self.assertEqual(p1.scheme, 'tel')
    532         self.assertEqual(p1.path, '+1-201-555-0123')
    533         self.assertEqual(p1.params, '')
    534 
    535         p1 = urlparse.urlparse('tel:7042;phone-context=example.com')
    536         self.assertEqual(p1.scheme, 'tel')
    537         self.assertEqual(p1.path, '7042')
    538         self.assertEqual(p1.params, 'phone-context=example.com')
    539 
    540         p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555')
    541         self.assertEqual(p1.scheme, 'tel')
    542         self.assertEqual(p1.path, '863-1234')
    543         self.assertEqual(p1.params, 'phone-context=+1-914-555')
    544 
    545 
    546     def test_attributes_bad_port(self):
    547         """Check handling of non-integer ports."""
    548         p = urlparse.urlsplit("http://www.example.net:foo")
    549         self.assertEqual(p.netloc, "www.example.net:foo")
    550         self.assertRaises(ValueError, lambda: p.port)
    551 
    552         p = urlparse.urlparse("http://www.example.net:foo")
    553         self.assertEqual(p.netloc, "www.example.net:foo")
    554         self.assertRaises(ValueError, lambda: p.port)
    555 
    556     def test_attributes_without_netloc(self):
    557         # This example is straight from RFC 3261.  It looks like it
    558         # should allow the username, hostname, and port to be filled
    559         # in, but doesn't.  Since it's a URI and doesn't use the
    560         # scheme://netloc syntax, the netloc and related attributes
    561         # should be left empty.
    562         uri = "sip:alice (at] atlanta.com;maddr=239.255.255.1;ttl=15"
    563         p = urlparse.urlsplit(uri)
    564         self.assertEqual(p.netloc, "")
    565         self.assertEqual(p.username, None)
    566         self.assertEqual(p.password, None)
    567         self.assertEqual(p.hostname, None)
    568         self.assertEqual(p.port, None)
    569         self.assertEqual(p.geturl(), uri)
    570 
    571         p = urlparse.urlparse(uri)
    572         self.assertEqual(p.netloc, "")
    573         self.assertEqual(p.username, None)
    574         self.assertEqual(p.password, None)
    575         self.assertEqual(p.hostname, None)
    576         self.assertEqual(p.port, None)
    577         self.assertEqual(p.geturl(), uri)
    578 
    579     def test_caching(self):
    580         # Test case for bug #1313119
    581         uri = "http://example.com/doc/"
    582         unicode_uri = unicode(uri)
    583 
    584         urlparse.urlparse(unicode_uri)
    585         p = urlparse.urlparse(uri)
    586         self.assertEqual(type(p.scheme), type(uri))
    587         self.assertEqual(type(p.hostname), type(uri))
    588         self.assertEqual(type(p.path), type(uri))
    589 
    590     def test_noslash(self):
    591         # Issue 1637: http://foo.com?query is legal
    592         self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
    593                          ('http', 'example.com', '', '', 'blahblah=/foo', ''))
    594 
    595     def test_anyscheme(self):
    596         # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
    597         self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
    598                          ('s3','foo.com','/stuff','','',''))
    599         self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
    600                          ('x-newscheme','foo.com','/stuff','','',''))
    601         self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
    602                          ('x-newscheme','foo.com','/stuff','','query','fragment'))
    603         self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"),
    604                          ('x-newscheme','foo.com','/stuff','','query',''))
    605 
    606     def test_withoutscheme(self):
    607         # Test urlparse without scheme
    608         # Issue 754016: urlparse goes wrong with IP:port without scheme
    609         # RFC 1808 specifies that netloc should start with //, urlparse expects
    610         # the same, otherwise it classifies the portion of url as path.
    611         self.assertEqual(urlparse.urlparse("path"),
    612                 ('','','path','','',''))
    613         self.assertEqual(urlparse.urlparse("//www.python.org:80"),
    614                 ('','www.python.org:80','','','',''))
    615         self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
    616                 ('http','www.python.org:80','','','',''))
    617 
    618     def test_portseparator(self):
    619         # Issue 754016 makes changes for port separator ':' from scheme separator
    620         self.assertEqual(urlparse.urlparse("path:80"),
    621                 ('','','path:80','','',''))
    622         self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
    623         self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
    624         self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
    625                 ('http','www.python.org:80','','','',''))
    626 
    627 def test_main():
    628     test_support.run_unittest(UrlParseTestCase)
    629 
    630 if __name__ == "__main__":
    631     test_main()
    632