1 #!/usr/bin/env python 2 # 3 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software 4 # Foundation; All Rights Reserved 5 6 """A HTTPSConnection/Handler with additional proxy and cert validation features. 7 8 In particular, monkey patches in Python r74203 to provide support for CONNECT 9 proxies and adds SSL cert validation if the ssl module is present. 10 """ 11 12 __author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)" 13 14 import base64 15 import httplib 16 import logging 17 import re 18 import socket 19 import urllib2 20 21 from urllib import splittype 22 from urllib import splituser 23 from urllib import splitpasswd 24 25 class InvalidCertificateException(httplib.HTTPException): 26 """Raised when a certificate is provided with an invalid hostname.""" 27 28 def __init__(self, host, cert, reason): 29 """Constructor. 30 31 Args: 32 host: The hostname the connection was made to. 33 cert: The SSL certificate (as a dictionary) the host returned. 34 """ 35 httplib.HTTPException.__init__(self) 36 self.host = host 37 self.cert = cert 38 self.reason = reason 39 40 def __str__(self): 41 return ('Host %s returned an invalid certificate (%s): %s\n' 42 'To learn more, see ' 43 'http://code.google.com/appengine/kb/general.html#rpcssl' % 44 (self.host, self.reason, self.cert)) 45 46 def can_validate_certs(): 47 """Return True if we have the SSL package and can validate certificates.""" 48 try: 49 import ssl 50 return True 51 except ImportError: 52 return False 53 54 def _create_fancy_connection(tunnel_host=None, key_file=None, 55 cert_file=None, ca_certs=None): 56 # This abomination brought to you by the fact that 57 # the HTTPHandler creates the connection instance in the middle 58 # of do_open so we need to add the tunnel host to the class. 59 60 class PresetProxyHTTPSConnection(httplib.HTTPSConnection): 61 """An HTTPS connection that uses a proxy defined by the enclosing scope.""" 62 63 def __init__(self, *args, **kwargs): 64 httplib.HTTPSConnection.__init__(self, *args, **kwargs) 65 66 self._tunnel_host = tunnel_host 67 if tunnel_host: 68 logging.debug("Creating preset proxy https conn: %s", tunnel_host) 69 70 self.key_file = key_file 71 self.cert_file = cert_file 72 self.ca_certs = ca_certs 73 try: 74 import ssl 75 if self.ca_certs: 76 self.cert_reqs = ssl.CERT_REQUIRED 77 else: 78 self.cert_reqs = ssl.CERT_NONE 79 except ImportError: 80 pass 81 82 def _tunnel(self): 83 self._set_hostport(self._tunnel_host, None) 84 logging.info("Connecting through tunnel to: %s:%d", 85 self.host, self.port) 86 self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port)) 87 response = self.response_class(self.sock, strict=self.strict, 88 method=self._method) 89 (_, code, message) = response._read_status() 90 91 if code != 200: 92 self.close() 93 raise socket.error, "Tunnel connection failed: %d %s" % ( 94 code, message.strip()) 95 96 while True: 97 line = response.fp.readline() 98 if line == "\r\n": 99 break 100 101 def _get_valid_hosts_for_cert(self, cert): 102 """Returns a list of valid host globs for an SSL certificate. 103 104 Args: 105 cert: A dictionary representing an SSL certificate. 106 Returns: 107 list: A list of valid host globs. 108 """ 109 if 'subjectAltName' in cert: 110 return [x[1] for x in cert['subjectAltName'] if x[0].lower() == 'dns'] 111 else: 112 # Return a list of commonName fields 113 return [x[0][1] for x in cert['subject'] 114 if x[0][0].lower() == 'commonname'] 115 116 def _validate_certificate_hostname(self, cert, hostname): 117 """Validates that a given hostname is valid for an SSL certificate. 118 119 Args: 120 cert: A dictionary representing an SSL certificate. 121 hostname: The hostname to test. 122 Returns: 123 bool: Whether or not the hostname is valid for this certificate. 124 """ 125 hosts = self._get_valid_hosts_for_cert(cert) 126 for host in hosts: 127 # Convert the glob-style hostname expression (eg, '*.google.com') into a 128 # valid regular expression. 129 host_re = host.replace('.', '\.').replace('*', '[^.]*') 130 if re.search('^%s$' % (host_re,), hostname, re.I): 131 return True 132 return False 133 134 135 def connect(self): 136 # TODO(frew): When we drop support for <2.6 (in the far distant future), 137 # change this to socket.create_connection. 138 self.sock = _create_connection((self.host, self.port)) 139 140 if self._tunnel_host: 141 self._tunnel() 142 143 # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl, 144 # with fallback. 145 try: 146 import ssl 147 self.sock = ssl.wrap_socket(self.sock, 148 keyfile=self.key_file, 149 certfile=self.cert_file, 150 ca_certs=self.ca_certs, 151 cert_reqs=self.cert_reqs) 152 153 if self.cert_reqs & ssl.CERT_REQUIRED: 154 cert = self.sock.getpeercert() 155 hostname = self.host.split(':', 0)[0] 156 if not self._validate_certificate_hostname(cert, hostname): 157 raise InvalidCertificateException(hostname, cert, 158 'hostname mismatch') 159 except ImportError: 160 ssl = socket.ssl(self.sock, 161 keyfile=self.key_file, 162 certfile=self.cert_file) 163 self.sock = httplib.FakeSocket(self.sock, ssl) 164 165 return PresetProxyHTTPSConnection 166 167 168 # Here to end of _create_connection copied wholesale from Python 2.6"s socket.py 169 _GLOBAL_DEFAULT_TIMEOUT = object() 170 171 172 def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT): 173 """Connect to *address* and return the socket object. 174 175 Convenience function. Connect to *address* (a 2-tuple ``(host, 176 port)``) and return the socket object. Passing the optional 177 *timeout* parameter will set the timeout on the socket instance 178 before attempting to connect. If no *timeout* is supplied, the 179 global default timeout setting returned by :func:`getdefaulttimeout` 180 is used. 181 """ 182 183 msg = "getaddrinfo returns an empty list" 184 host, port = address 185 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 186 af, socktype, proto, canonname, sa = res 187 sock = None 188 try: 189 sock = socket.socket(af, socktype, proto) 190 if timeout is not _GLOBAL_DEFAULT_TIMEOUT: 191 sock.settimeout(timeout) 192 sock.connect(sa) 193 return sock 194 195 except socket.error, msg: 196 if sock is not None: 197 sock.close() 198 199 raise socket.error, msg 200 201 202 class FancyRequest(urllib2.Request): 203 """A request that allows the use of a CONNECT proxy.""" 204 205 def __init__(self, *args, **kwargs): 206 urllib2.Request.__init__(self, *args, **kwargs) 207 self._tunnel_host = None 208 self._key_file = None 209 self._cert_file = None 210 self._ca_certs = None 211 212 def set_proxy(self, host, type): 213 saved_type = None 214 215 if self.get_type() == "https" and not self._tunnel_host: 216 self._tunnel_host = self.get_host() 217 saved_type = self.get_type() 218 urllib2.Request.set_proxy(self, host, type) 219 220 if saved_type: 221 # Don't set self.type, we want to preserve the 222 # type for tunneling. 223 self.type = saved_type 224 225 def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None): 226 self._key_file = key_file 227 self._cert_file = cert_file 228 self._ca_certs = ca_certs 229 230 231 class FancyProxyHandler(urllib2.ProxyHandler): 232 """A ProxyHandler that works with CONNECT-enabled proxies.""" 233 234 # Taken verbatim from /usr/lib/python2.5/urllib2.py 235 def _parse_proxy(self, proxy): 236 """Return (scheme, user, password, host/port) given a URL or an authority. 237 238 If a URL is supplied, it must have an authority (host:port) component. 239 According to RFC 3986, having an authority component means the URL must 240 have two slashes after the scheme: 241 242 >>> _parse_proxy('file:/ftp.example.com/') 243 Traceback (most recent call last): 244 ValueError: proxy URL with no authority: 'file:/ftp.example.com/' 245 246 The first three items of the returned tuple may be None. 247 248 Examples of authority parsing: 249 250 >>> _parse_proxy('proxy.example.com') 251 (None, None, None, 'proxy.example.com') 252 >>> _parse_proxy('proxy.example.com:3128') 253 (None, None, None, 'proxy.example.com:3128') 254 255 The authority component may optionally include userinfo (assumed to be 256 username:password): 257 258 >>> _parse_proxy('joe:password (at] proxy.example.com') 259 (None, 'joe', 'password', 'proxy.example.com') 260 >>> _parse_proxy('joe:password (at] proxy.example.com:3128') 261 (None, 'joe', 'password', 'proxy.example.com:3128') 262 263 Same examples, but with URLs instead: 264 265 >>> _parse_proxy('http://proxy.example.com/') 266 ('http', None, None, 'proxy.example.com') 267 >>> _parse_proxy('http://proxy.example.com:3128/') 268 ('http', None, None, 'proxy.example.com:3128') 269 >>> _parse_proxy('http://joe:password@proxy.example.com/') 270 ('http', 'joe', 'password', 'proxy.example.com') 271 >>> _parse_proxy('http://joe:password@proxy.example.com:3128') 272 ('http', 'joe', 'password', 'proxy.example.com:3128') 273 274 Everything after the authority is ignored: 275 276 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') 277 ('ftp', 'joe', 'password', 'proxy.example.com') 278 279 Test for no trailing '/' case: 280 281 >>> _parse_proxy('http://joe:password@proxy.example.com') 282 ('http', 'joe', 'password', 'proxy.example.com') 283 284 """ 285 scheme, r_scheme = splittype(proxy) 286 if not r_scheme.startswith("/"): 287 # authority 288 scheme = None 289 authority = proxy 290 else: 291 # URL 292 if not r_scheme.startswith("//"): 293 raise ValueError("proxy URL with no authority: %r" % proxy) 294 # We have an authority, so for RFC 3986-compliant URLs (by ss 3. 295 # and 3.3.), path is empty or starts with '/' 296 end = r_scheme.find("/", 2) 297 if end == -1: 298 end = None 299 authority = r_scheme[2:end] 300 userinfo, hostport = splituser(authority) 301 if userinfo is not None: 302 user, password = splitpasswd(userinfo) 303 else: 304 user = password = None 305 return scheme, user, password, hostport 306 307 def proxy_open(self, req, proxy, type): 308 # This block is copied wholesale from Python2.6 urllib2. 309 # It is idempotent, so the superclass method call executes as normal 310 # if invoked. 311 orig_type = req.get_type() 312 proxy_type, user, password, hostport = self._parse_proxy(proxy) 313 if proxy_type is None: 314 proxy_type = orig_type 315 if user and password: 316 user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password)) 317 creds = base64.b64encode(user_pass).strip() 318 # Later calls overwrite earlier calls for the same header 319 req.add_header("Proxy-authorization", "Basic " + creds) 320 hostport = urllib2.unquote(hostport) 321 req.set_proxy(hostport, proxy_type) 322 # This condition is the change 323 if orig_type == "https": 324 return None 325 326 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type) 327 328 329 class FancyHTTPSHandler(urllib2.HTTPSHandler): 330 """An HTTPSHandler that works with CONNECT-enabled proxies.""" 331 332 def do_open(self, http_class, req): 333 # Intentionally very specific so as to opt for false negatives 334 # rather than false positives. 335 try: 336 return urllib2.HTTPSHandler.do_open( 337 self, 338 _create_fancy_connection(req._tunnel_host, 339 req._key_file, 340 req._cert_file, 341 req._ca_certs), 342 req) 343 except urllib2.URLError, url_error: 344 try: 345 import ssl 346 if (type(url_error.reason) == ssl.SSLError and 347 url_error.reason.args[0] == 1): 348 # Display the reason to the user. Need to use args for python2.5 349 # compat. 350 raise InvalidCertificateException(req.host, '', 351 url_error.reason.args[1]) 352 except ImportError: 353 pass 354 355 raise url_error 356 357 358 # We have to implement this so that we persist the tunneling behavior 359 # through redirects. 360 class FancyRedirectHandler(urllib2.HTTPRedirectHandler): 361 """A redirect handler that persists CONNECT-enabled proxy information.""" 362 363 def redirect_request(self, req, *args, **kwargs): 364 new_req = urllib2.HTTPRedirectHandler.redirect_request( 365 self, req, *args, **kwargs) 366 # Same thing as in our set_proxy implementation, but in this case 367 # we"ve only got a Request to work with, so it was this or copy 368 # everything over piecemeal. 369 # 370 # Note that we do not persist tunneling behavior from an http request 371 # to an https request, because an http request does not set _tunnel_host. 372 # 373 # Also note that in Python < 2.6, you will get an error in 374 # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http 375 # proxy, since the proxy type will be set to http instead of https. 376 # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to 377 # https.) Such an urllib2.Request could result from this redirect 378 # if you are redirecting from an http request (since an an http request 379 # does not have _tunnel_host set, and thus you will not set the proxy 380 # in the code below), and if you have defined a proxy for https in, say, 381 # FancyProxyHandler, and that proxy has type http. 382 if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request): 383 if new_req.get_type() == "https": 384 if req._tunnel_host: 385 # req is proxied, so copy the proxy info. 386 new_req._tunnel_host = new_req.get_host() 387 new_req.set_proxy(req.host, "https") 388 else: 389 # req is not proxied, so just make sure _tunnel_host is defined. 390 new_req._tunnel_host = None 391 new_req.type = "https" 392 if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request): 393 # Copy the auxiliary data in case this or any further redirect is https 394 new_req._key_file = req._key_file 395 new_req._cert_file = req._cert_file 396 new_req._ca_certs = req._ca_certs 397 398 return new_req 399