1 #! /usr/bin/env python 2 3 from test import test_support 4 import unittest 5 import urlparse 6 7 RFC1808_BASE = "http://a/b/c/d;p?q#f" 8 RFC2396_BASE = "http://a/b/c/d;p?q" 9 RFC3986_BASE = 'http://a/b/c/d;p?q' 10 SIMPLE_BASE = 'http://a/b/c/d' 11 12 # A list of test cases. Each test case is a two-tuple that contains 13 # a string with the query and a dictionary with the expected result. 14 15 parse_qsl_test_cases = [ 16 ("", []), 17 ("&", []), 18 ("&&", []), 19 ("=", [('', '')]), 20 ("=a", [('', 'a')]), 21 ("a", [('a', '')]), 22 ("a=", [('a', '')]), 23 ("a=", [('a', '')]), 24 ("&a=b", [('a', 'b')]), 25 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), 26 ("a=1&a=2", [('a', '1'), ('a', '2')]), 27 ] 28 29 class UrlParseTestCase(unittest.TestCase): 30 31 def checkRoundtrips(self, url, parsed, split): 32 result = urlparse.urlparse(url) 33 self.assertEqual(result, parsed) 34 t = (result.scheme, result.netloc, result.path, 35 result.params, result.query, result.fragment) 36 self.assertEqual(t, parsed) 37 # put it back together and it should be the same 38 result2 = urlparse.urlunparse(result) 39 self.assertEqual(result2, url) 40 self.assertEqual(result2, result.geturl()) 41 42 # the result of geturl() is a fixpoint; we can always parse it 43 # again to get the same result: 44 result3 = urlparse.urlparse(result.geturl()) 45 self.assertEqual(result3.geturl(), result.geturl()) 46 self.assertEqual(result3, result) 47 self.assertEqual(result3.scheme, result.scheme) 48 self.assertEqual(result3.netloc, result.netloc) 49 self.assertEqual(result3.path, result.path) 50 self.assertEqual(result3.params, result.params) 51 self.assertEqual(result3.query, result.query) 52 self.assertEqual(result3.fragment, result.fragment) 53 self.assertEqual(result3.username, result.username) 54 self.assertEqual(result3.password, result.password) 55 self.assertEqual(result3.hostname, result.hostname) 56 self.assertEqual(result3.port, result.port) 57 58 # check the roundtrip using urlsplit() as well 59 result = urlparse.urlsplit(url) 60 self.assertEqual(result, split) 61 t = (result.scheme, result.netloc, result.path, 62 result.query, result.fragment) 63 self.assertEqual(t, split) 64 result2 = urlparse.urlunsplit(result) 65 self.assertEqual(result2, url) 66 self.assertEqual(result2, result.geturl()) 67 68 # check the fixpoint property of re-parsing the result of geturl() 69 result3 = urlparse.urlsplit(result.geturl()) 70 self.assertEqual(result3.geturl(), result.geturl()) 71 self.assertEqual(result3, result) 72 self.assertEqual(result3.scheme, result.scheme) 73 self.assertEqual(result3.netloc, result.netloc) 74 self.assertEqual(result3.path, result.path) 75 self.assertEqual(result3.query, result.query) 76 self.assertEqual(result3.fragment, result.fragment) 77 self.assertEqual(result3.username, result.username) 78 self.assertEqual(result3.password, result.password) 79 self.assertEqual(result3.hostname, result.hostname) 80 self.assertEqual(result3.port, result.port) 81 82 def test_qsl(self): 83 for orig, expect in parse_qsl_test_cases: 84 result = urlparse.parse_qsl(orig, keep_blank_values=True) 85 self.assertEqual(result, expect, "Error parsing %r" % orig) 86 expect_without_blanks = [v for v in expect if len(v[1])] 87 result = urlparse.parse_qsl(orig, keep_blank_values=False) 88 self.assertEqual(result, expect_without_blanks, 89 "Error parsing %r" % orig) 90 91 92 def test_roundtrips(self): 93 testcases = [ 94 ('file:///tmp/junk.txt', 95 ('file', '', '/tmp/junk.txt', '', '', ''), 96 ('file', '', '/tmp/junk.txt', '', '')), 97 ('imap://mail.python.org/mbox1', 98 ('imap', 'mail.python.org', '/mbox1', '', '', ''), 99 ('imap', 'mail.python.org', '/mbox1', '', '')), 100 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', 101 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 102 '', '', ''), 103 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 104 '', '')), 105 ('nfs://server/path/to/file.txt', 106 ('nfs', 'server', '/path/to/file.txt', '', '', ''), 107 ('nfs', 'server', '/path/to/file.txt', '', '')), 108 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', 109 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 110 '', '', ''), 111 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 112 '', '')), 113 ('git+ssh://git (at] github.com/user/project.git', 114 ('git+ssh', 'git (at] github.com','/user/project.git', 115 '','',''), 116 ('git+ssh', 'git (at] github.com','/user/project.git', 117 '', '')) 118 ] 119 for url, parsed, split in testcases: 120 self.checkRoundtrips(url, parsed, split) 121 122 def test_http_roundtrips(self): 123 # urlparse.urlsplit treats 'http:' as an optimized special case, 124 # so we test both 'http:' and 'https:' in all the following. 125 # Three cheers for white box knowledge! 126 testcases = [ 127 ('://www.python.org', 128 ('www.python.org', '', '', '', ''), 129 ('www.python.org', '', '', '')), 130 ('://www.python.org#abc', 131 ('www.python.org', '', '', '', 'abc'), 132 ('www.python.org', '', '', 'abc')), 133 ('://www.python.org?q=abc', 134 ('www.python.org', '', '', 'q=abc', ''), 135 ('www.python.org', '', 'q=abc', '')), 136 ('://www.python.org/#abc', 137 ('www.python.org', '/', '', '', 'abc'), 138 ('www.python.org', '/', '', 'abc')), 139 ('://a/b/c/d;p?q#f', 140 ('a', '/b/c/d', 'p', 'q', 'f'), 141 ('a', '/b/c/d;p', 'q', 'f')), 142 ] 143 for scheme in ('http', 'https'): 144 for url, parsed, split in testcases: 145 url = scheme + url 146 parsed = (scheme,) + parsed 147 split = (scheme,) + split 148 self.checkRoundtrips(url, parsed, split) 149 150 def checkJoin(self, base, relurl, expected): 151 self.assertEqual(urlparse.urljoin(base, relurl), expected, 152 (base, relurl, expected)) 153 154 def test_unparse_parse(self): 155 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]: 156 self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) 157 self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u) 158 159 def test_RFC1808(self): 160 # "normal" cases from RFC 1808: 161 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') 162 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') 163 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') 164 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') 165 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') 166 self.checkJoin(RFC1808_BASE, '//g', 'http://g') 167 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') 168 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 169 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') 170 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') 171 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 172 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 173 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') 174 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 175 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') 176 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') 177 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') 178 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') 179 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') 180 self.checkJoin(RFC1808_BASE, '../..', 'http://a/') 181 self.checkJoin(RFC1808_BASE, '../../', 'http://a/') 182 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') 183 184 # "abnormal" cases from RFC 1808: 185 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') 186 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') 187 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') 188 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') 189 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') 190 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') 191 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') 192 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') 193 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') 194 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') 195 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') 196 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') 197 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') 198 199 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), 200 # so we'll not actually run these tests (which expect 1808 behavior). 201 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') 202 #self.checkJoin(RFC1808_BASE, 'http:', 'http:') 203 204 def test_RFC2368(self): 205 # Issue 11467: path that starts with a number is not parsed correctly 206 self.assertEqual(urlparse.urlparse('mailto:1337 (at] example.org'), 207 ('mailto', '', '1337 (at] example.org', '', '', '')) 208 209 def test_RFC2396(self): 210 # cases from RFC 2396 211 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') 212 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') 213 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') 214 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') 215 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') 216 self.checkJoin(RFC2396_BASE, '//g', 'http://g') 217 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') 218 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') 219 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') 220 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 221 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') 222 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 223 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') 224 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') 225 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') 226 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') 227 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') 228 self.checkJoin(RFC2396_BASE, '../..', 'http://a/') 229 self.checkJoin(RFC2396_BASE, '../../', 'http://a/') 230 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') 231 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) 232 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') 233 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') 234 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') 235 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') 236 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') 237 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') 238 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') 239 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') 240 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') 241 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') 242 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') 243 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') 244 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') 245 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') 246 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 247 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') 248 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 249 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') 250 251 def test_RFC3986(self): 252 # Test cases from RFC3986 253 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 254 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x') 255 self.checkJoin(RFC3986_BASE, 'g:h','g:h') 256 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') 257 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') 258 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') 259 self.checkJoin(RFC3986_BASE, '/g','http://a/g') 260 self.checkJoin(RFC3986_BASE, '//g','http://g') 261 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 262 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') 263 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') 264 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') 265 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') 266 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') 267 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') 268 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') 269 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') 270 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') 271 self.checkJoin(RFC3986_BASE, './','http://a/b/c/') 272 self.checkJoin(RFC3986_BASE, '..','http://a/b/') 273 self.checkJoin(RFC3986_BASE, '../','http://a/b/') 274 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') 275 self.checkJoin(RFC3986_BASE, '../..','http://a/') 276 self.checkJoin(RFC3986_BASE, '../../','http://a/') 277 self.checkJoin(RFC3986_BASE, '../../g','http://a/g') 278 279 #Abnormal Examples 280 281 # The 'abnormal scenarios' are incompatible with RFC2986 parsing 282 # Tests are here for reference. 283 284 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') 285 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') 286 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g') 287 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g') 288 289 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') 290 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') 291 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') 292 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') 293 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') 294 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') 295 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') 296 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') 297 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') 298 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') 299 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 300 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') 301 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') 302 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') 303 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser 304 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') # relaxed parser 305 306 # Test for issue9721 307 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') 308 309 def test_urljoins(self): 310 self.checkJoin(SIMPLE_BASE, 'g:h','g:h') 311 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 312 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 313 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') 314 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') 315 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') 316 self.checkJoin(SIMPLE_BASE, '/g','http://a/g') 317 self.checkJoin(SIMPLE_BASE, '//g','http://g') 318 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') 319 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') 320 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 321 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') 322 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') 323 self.checkJoin(SIMPLE_BASE, '..','http://a/b/') 324 self.checkJoin(SIMPLE_BASE, '../','http://a/b/') 325 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') 326 self.checkJoin(SIMPLE_BASE, '../..','http://a/') 327 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') 328 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') 329 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') 330 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') 331 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') 332 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') 333 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') 334 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 335 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 336 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') 337 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') 338 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') 339 self.checkJoin('http:///', '..','http:///') 340 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') 341 self.checkJoin('', 'http://a/./g', 'http://a/./g') 342 self.checkJoin('svn://pathtorepo/dir1','dir2','svn://pathtorepo/dir2') 343 self.checkJoin('svn+ssh://pathtorepo/dir1','dir2','svn+ssh://pathtorepo/dir2') 344 345 def test_RFC2732(self): 346 for url, hostname, port in [ 347 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), 348 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), 349 ('http://[::1]:5432/foo/', '::1', 5432), 350 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), 351 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), 352 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 353 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), 354 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), 355 ('http://[::ffff:12.34.56.78]:5432/foo/', 356 '::ffff:12.34.56.78', 5432), 357 ('http://Test.python.org/foo/', 'test.python.org', None), 358 ('http://12.34.56.78/foo/', '12.34.56.78', None), 359 ('http://[::1]/foo/', '::1', None), 360 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), 361 ('http://[dead:beef::]/foo/', 'dead:beef::', None), 362 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 363 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 364 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), 365 ('http://[::ffff:12.34.56.78]/foo/', 366 '::ffff:12.34.56.78', None), 367 ]: 368 urlparsed = urlparse.urlparse(url) 369 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) 370 371 for invalid_url in [ 372 'http://::12.34.56.78]/', 373 'http://[::1/foo/', 374 'ftp://[::1/foo/bad]/bad', 375 'http://[::1/foo/bad]/bad', 376 'http://[::ffff:12.34.56.78']: 377 self.assertRaises(ValueError, urlparse.urlparse, invalid_url) 378 379 def test_urldefrag(self): 380 for url, defrag, frag in [ 381 ('http://python.org#frag', 'http://python.org', 'frag'), 382 ('http://python.org', 'http://python.org', ''), 383 ('http://python.org/#frag', 'http://python.org/', 'frag'), 384 ('http://python.org/', 'http://python.org/', ''), 385 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), 386 ('http://python.org/?q', 'http://python.org/?q', ''), 387 ('http://python.org/p#frag', 'http://python.org/p', 'frag'), 388 ('http://python.org/p?q', 'http://python.org/p?q', ''), 389 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), 390 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), 391 ]: 392 self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 393 394 def test_urlsplit_attributes(self): 395 url = "HTTP://WWW.PYTHON.ORG/doc/#frag" 396 p = urlparse.urlsplit(url) 397 self.assertEqual(p.scheme, "http") 398 self.assertEqual(p.netloc, "WWW.PYTHON.ORG") 399 self.assertEqual(p.path, "/doc/") 400 self.assertEqual(p.query, "") 401 self.assertEqual(p.fragment, "frag") 402 self.assertEqual(p.username, None) 403 self.assertEqual(p.password, None) 404 self.assertEqual(p.hostname, "www.python.org") 405 self.assertEqual(p.port, None) 406 # geturl() won't return exactly the original URL in this case 407 # since the scheme is always case-normalized 408 #self.assertEqual(p.geturl(), url) 409 410 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" 411 p = urlparse.urlsplit(url) 412 self.assertEqual(p.scheme, "http") 413 self.assertEqual(p.netloc, "User:Pass (at] www.python.org:080") 414 self.assertEqual(p.path, "/doc/") 415 self.assertEqual(p.query, "query=yes") 416 self.assertEqual(p.fragment, "frag") 417 self.assertEqual(p.username, "User") 418 self.assertEqual(p.password, "Pass") 419 self.assertEqual(p.hostname, "www.python.org") 420 self.assertEqual(p.port, 80) 421 self.assertEqual(p.geturl(), url) 422 423 # Addressing issue1698, which suggests Username can contain 424 # "@" characters. Though not RFC compliant, many ftp sites allow 425 # and request email addresses as usernames. 426 427 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 428 p = urlparse.urlsplit(url) 429 self.assertEqual(p.scheme, "http") 430 self.assertEqual(p.netloc, "User (at] example.com:Pass (at] www.python.org:080") 431 self.assertEqual(p.path, "/doc/") 432 self.assertEqual(p.query, "query=yes") 433 self.assertEqual(p.fragment, "frag") 434 self.assertEqual(p.username, "User (at] example.com") 435 self.assertEqual(p.password, "Pass") 436 self.assertEqual(p.hostname, "www.python.org") 437 self.assertEqual(p.port, 80) 438 self.assertEqual(p.geturl(), url) 439 440 # Verify an illegal port of value greater than 65535 is set as None 441 url = "http://www.python.org:65536" 442 p = urlparse.urlsplit(url) 443 self.assertEqual(p.port, None) 444 445 def test_issue14072(self): 446 p1 = urlparse.urlsplit('tel:+31-641044153') 447 self.assertEqual(p1.scheme, 'tel') 448 self.assertEqual(p1.path, '+31-641044153') 449 450 p2 = urlparse.urlsplit('tel:+31641044153') 451 self.assertEqual(p2.scheme, 'tel') 452 self.assertEqual(p2.path, '+31641044153') 453 454 # Assert for urlparse 455 p1 = urlparse.urlparse('tel:+31-641044153') 456 self.assertEqual(p1.scheme, 'tel') 457 self.assertEqual(p1.path, '+31-641044153') 458 459 p2 = urlparse.urlparse('tel:+31641044153') 460 self.assertEqual(p2.scheme, 'tel') 461 self.assertEqual(p2.path, '+31641044153') 462 463 464 def test_telurl_params(self): 465 p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516') 466 self.assertEqual(p1.scheme, 'tel') 467 self.assertEqual(p1.path, '123-4') 468 self.assertEqual(p1.params, 'phone-context=+1-650-516') 469 470 p1 = urlparse.urlparse('tel:+1-201-555-0123') 471 self.assertEqual(p1.scheme, 'tel') 472 self.assertEqual(p1.path, '+1-201-555-0123') 473 self.assertEqual(p1.params, '') 474 475 p1 = urlparse.urlparse('tel:7042;phone-context=example.com') 476 self.assertEqual(p1.scheme, 'tel') 477 self.assertEqual(p1.path, '7042') 478 self.assertEqual(p1.params, 'phone-context=example.com') 479 480 p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555') 481 self.assertEqual(p1.scheme, 'tel') 482 self.assertEqual(p1.path, '863-1234') 483 self.assertEqual(p1.params, 'phone-context=+1-914-555') 484 485 486 def test_attributes_bad_port(self): 487 """Check handling of non-integer ports.""" 488 p = urlparse.urlsplit("http://www.example.net:foo") 489 self.assertEqual(p.netloc, "www.example.net:foo") 490 self.assertRaises(ValueError, lambda: p.port) 491 492 p = urlparse.urlparse("http://www.example.net:foo") 493 self.assertEqual(p.netloc, "www.example.net:foo") 494 self.assertRaises(ValueError, lambda: p.port) 495 496 def test_attributes_without_netloc(self): 497 # This example is straight from RFC 3261. It looks like it 498 # should allow the username, hostname, and port to be filled 499 # in, but doesn't. Since it's a URI and doesn't use the 500 # scheme://netloc syntax, the netloc and related attributes 501 # should be left empty. 502 uri = "sip:alice (at] atlanta.com;maddr=239.255.255.1;ttl=15" 503 p = urlparse.urlsplit(uri) 504 self.assertEqual(p.netloc, "") 505 self.assertEqual(p.username, None) 506 self.assertEqual(p.password, None) 507 self.assertEqual(p.hostname, None) 508 self.assertEqual(p.port, None) 509 self.assertEqual(p.geturl(), uri) 510 511 p = urlparse.urlparse(uri) 512 self.assertEqual(p.netloc, "") 513 self.assertEqual(p.username, None) 514 self.assertEqual(p.password, None) 515 self.assertEqual(p.hostname, None) 516 self.assertEqual(p.port, None) 517 self.assertEqual(p.geturl(), uri) 518 519 def test_caching(self): 520 # Test case for bug #1313119 521 uri = "http://example.com/doc/" 522 unicode_uri = unicode(uri) 523 524 urlparse.urlparse(unicode_uri) 525 p = urlparse.urlparse(uri) 526 self.assertEqual(type(p.scheme), type(uri)) 527 self.assertEqual(type(p.hostname), type(uri)) 528 self.assertEqual(type(p.path), type(uri)) 529 530 def test_noslash(self): 531 # Issue 1637: http://foo.com?query is legal 532 self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"), 533 ('http', 'example.com', '', '', 'blahblah=/foo', '')) 534 535 def test_anyscheme(self): 536 # Issue 7904: s3://foo.com/stuff has netloc "foo.com". 537 self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"), 538 ('s3','foo.com','/stuff','','','')) 539 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"), 540 ('x-newscheme','foo.com','/stuff','','','')) 541 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), 542 ('x-newscheme','foo.com','/stuff','','query','fragment')) 543 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"), 544 ('x-newscheme','foo.com','/stuff','','query','')) 545 546 def test_withoutscheme(self): 547 # Test urlparse without scheme 548 # Issue 754016: urlparse goes wrong with IP:port without scheme 549 # RFC 1808 specifies that netloc should start with //, urlparse expects 550 # the same, otherwise it classifies the portion of url as path. 551 self.assertEqual(urlparse.urlparse("path"), 552 ('','','path','','','')) 553 self.assertEqual(urlparse.urlparse("//www.python.org:80"), 554 ('','www.python.org:80','','','','')) 555 self.assertEqual(urlparse.urlparse("http://www.python.org:80"), 556 ('http','www.python.org:80','','','','')) 557 558 def test_portseparator(self): 559 # Issue 754016 makes changes for port separator ':' from scheme separator 560 self.assertEqual(urlparse.urlparse("path:80"), 561 ('','','path:80','','','')) 562 self.assertEqual(urlparse.urlparse("http:"),('http','','','','','')) 563 self.assertEqual(urlparse.urlparse("https:"),('https','','','','','')) 564 self.assertEqual(urlparse.urlparse("http://www.python.org:80"), 565 ('http','www.python.org:80','','','','')) 566 567 def test_main(): 568 test_support.run_unittest(UrlParseTestCase) 569 570 if __name__ == "__main__": 571 test_main() 572