1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """ 6 A http client with support for https connections with certificate verification. 7 8 The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3 9 and the code is from Lib/ssl.py in python3: 10 http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py 11 12 One use case is to download Chromium DEPS file in a secure way: 13 https://src.chromium.org/chrome/trunk/src/DEPS 14 15 Notice: python 2.7 or newer is required. 16 """ 17 18 import cookielib 19 import httplib 20 import os 21 import re 22 import socket 23 import ssl 24 import time 25 import urllib 26 import urllib2 27 28 import http_client 29 30 31 _SCRIPT_DIR = os.path.dirname(__file__) 32 _TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem') 33 34 35 class CertificateError(ValueError): 36 pass 37 38 39 def _DNSNameMatch(dn, hostname, max_wildcards=1): 40 """Matching according to RFC 6125, section 6.4.3 41 42 http://tools.ietf.org/html/rfc6125#section-6.4.3 43 """ 44 pats = [] 45 if not dn: 46 return False 47 48 parts = dn.split(r'.') 49 leftmost = parts[0] 50 remainder = parts[1:] 51 52 wildcards = leftmost.count('*') 53 if wildcards > max_wildcards: 54 # Issue #17980: avoid denials of service by refusing more 55 # than one wildcard per fragment. A survery of established 56 # policy among SSL implementations showed it to be a 57 # reasonable choice. 58 raise CertificateError( 59 'too many wildcards in certificate DNS name: ' + repr(dn)) 60 61 # speed up common case w/o wildcards 62 if not wildcards: 63 return dn.lower() == hostname.lower() 64 65 # RFC 6125, section 6.4.3, subitem 1. 66 # The client SHOULD NOT attempt to match a presented identifier in which 67 # the wildcard character comprises a label other than the left-most label. 68 if leftmost == '*': 69 # When '*' is a fragment by itself, it matches a non-empty dotless 70 # fragment. 71 pats.append('[^.]+') 72 elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 73 # RFC 6125, section 6.4.3, subitem 3. 74 # The client SHOULD NOT attempt to match a presented identifier 75 # where the wildcard character is embedded within an A-label or 76 # U-label of an internationalized domain name. 77 pats.append(re.escape(leftmost)) 78 else: 79 # Otherwise, '*' matches any dotless string, e.g. www* 80 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 81 82 # add the remaining fragments, ignore any wildcards 83 for frag in remainder: 84 pats.append(re.escape(frag)) 85 86 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 87 return pat.match(hostname) 88 89 90 def _MatchHostname(cert, hostname): 91 """Verify that *cert* (in decoded format as returned by 92 SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 93 rules are followed, but IP addresses are not accepted for *hostname*. 94 95 CertificateError is raised on failure. On success, the function 96 returns nothing. 97 """ 98 if not cert: 99 raise ValueError('empty or no certificate, match_hostname needs a ' 100 'SSL socket or SSL context with either ' 101 'CERT_OPTIONAL or CERT_REQUIRED') 102 dnsnames = [] 103 san = cert.get('subjectAltName', ()) 104 for key, value in san: 105 if key == 'DNS': 106 if _DNSNameMatch(value, hostname): 107 return 108 dnsnames.append(value) 109 if not dnsnames: 110 # The subject is only checked when there is no dNSName entry 111 # in subjectAltName 112 for sub in cert.get('subject', ()): 113 for key, value in sub: 114 # XXX according to RFC 2818, the most specific Common Name 115 # must be used. 116 if key == 'commonName': 117 if _DNSNameMatch(value, hostname): 118 return 119 dnsnames.append(value) 120 if len(dnsnames) > 1: 121 raise CertificateError('hostname %r doesn\'t match either of %s' 122 % (hostname, ', '.join(map(repr, dnsnames)))) 123 elif len(dnsnames) == 1: 124 raise CertificateError('hostname %r doesn\'t match %r' 125 % (hostname, dnsnames[0])) 126 else: 127 raise CertificateError('no appropriate commonName or ' 128 'subjectAltName fields were found') 129 130 131 class HTTPSConnection(httplib.HTTPSConnection): 132 133 def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs): 134 self.root_certs = root_certs 135 httplib.HTTPSConnection.__init__(self, host, **kwargs) 136 137 def connect(self): 138 # Overrides for certificate verification. 139 args = [(self.host, self.port), self.timeout,] 140 if self.source_address: 141 args.append(self.source_address) 142 sock = socket.create_connection(*args) 143 144 if self._tunnel_host: 145 self.sock = sock 146 self._tunnel() 147 148 # Wrap the socket for verification with the root certs. 149 kwargs = {} 150 if self.root_certs is not None: 151 kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs) 152 self.sock = ssl.wrap_socket(sock, **kwargs) 153 154 # Check hostname. 155 try: 156 _MatchHostname(self.sock.getpeercert(), self.host) 157 except CertificateError: 158 self.sock.shutdown(socket.SHUT_RDWR) 159 self.sock.close() 160 raise 161 162 163 class HTTPSHandler(urllib2.HTTPSHandler): 164 165 def __init__(self, root_certs=_TRUSTED_ROOT_CERTS): 166 urllib2.HTTPSHandler.__init__(self) 167 self.root_certs = root_certs 168 169 def https_open(self, req): 170 # Pass a reference to the function below so that verification against 171 # trusted root certs could be injected. 172 return self.do_open(self.GetConnection, req) 173 174 def GetConnection(self, host, **kwargs): 175 params = dict(root_certs=self.root_certs) 176 params.update(kwargs) 177 return HTTPSConnection(host, **params) 178 179 180 def _SendRequest(url, timeout=None): 181 """Send request to the given https url, and return the server response. 182 183 Args: 184 url: The https url to send request to. 185 186 Returns: 187 An integer: http code of the response. 188 A string: content of the response. 189 190 Raises: 191 CertificateError: Certificate verification fails. 192 """ 193 if not url: 194 return None, None 195 196 handlers = [] 197 if url.startswith('https://'): 198 # HTTPSHandler has to go first, because we don't want to send secure cookies 199 # to a man in the middle. 200 handlers.append(HTTPSHandler()) 201 202 203 cookie_file = os.environ.get('COOKIE_FILE') 204 if cookie_file and os.path.exists(cookie_file): 205 handlers.append( 206 urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file))) 207 208 url_opener = urllib2.build_opener(*handlers) 209 210 status_code = None 211 content = None 212 213 try: 214 response = url_opener.open(url, timeout=timeout) 215 216 status_code = response.code 217 content = response.read() 218 except urllib2.HTTPError as e: 219 status_code = e.code 220 content = None 221 except (ssl.SSLError, httplib.BadStatusLine, IOError): 222 status_code = -1 223 content = None 224 225 return status_code, content 226 227 228 class HttpClientLocal(http_client.HttpClient): 229 """This http client is used locally in a workstation, GCE VMs, etc.""" 230 231 @staticmethod 232 def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5, 233 retry_if_not=None): 234 if params: 235 url = '%s?%s' % (url, urllib.urlencode(params)) 236 237 count = 0 238 while True: 239 count += 1 240 241 status_code, content = _SendRequest(url, timeout=timeout) 242 if status_code == 200: 243 return status_code, content 244 if retry_if_not and status_code == retry_if_not: 245 return status_code, content 246 247 if count < retries: 248 time.sleep(retry_interval) 249 else: 250 return status_code, content 251 252 # Should never be reached. 253 return status_code, content 254