Home | History | Annotate | Download | only in common
      1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """
      6 A http client with support for https connections with certificate verification.
      7 
      8 The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3
      9 and the code is from Lib/ssl.py in python3:
     10   http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py
     11 
     12 One use case is to download Chromium DEPS file in a secure way:
     13   https://src.chromium.org/chrome/trunk/src/DEPS
     14 
     15 Notice: python 2.7 or newer is required.
     16 """
     17 
     18 import cookielib
     19 import httplib
     20 import os
     21 import re
     22 import socket
     23 import ssl
     24 import time
     25 import urllib
     26 import urllib2
     27 
     28 import http_client
     29 
     30 
     31 _SCRIPT_DIR = os.path.dirname(__file__)
     32 _TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem')
     33 
     34 
     35 class CertificateError(ValueError):
     36   pass
     37 
     38 
     39 def _DNSNameMatch(dn, hostname, max_wildcards=1):
     40   """Matching according to RFC 6125, section 6.4.3
     41 
     42   http://tools.ietf.org/html/rfc6125#section-6.4.3
     43   """
     44   pats = []
     45   if not dn:
     46     return False
     47 
     48   parts = dn.split(r'.')
     49   leftmost = parts[0]
     50   remainder = parts[1:]
     51 
     52   wildcards = leftmost.count('*')
     53   if wildcards > max_wildcards:
     54     # Issue #17980: avoid denials of service by refusing more
     55     # than one wildcard per fragment.  A survery of established
     56     # policy among SSL implementations showed it to be a
     57     # reasonable choice.
     58     raise CertificateError(
     59         'too many wildcards in certificate DNS name: ' + repr(dn))
     60 
     61   # speed up common case w/o wildcards
     62   if not wildcards:
     63     return dn.lower() == hostname.lower()
     64 
     65   # RFC 6125, section 6.4.3, subitem 1.
     66   # The client SHOULD NOT attempt to match a presented identifier in which
     67   # the wildcard character comprises a label other than the left-most label.
     68   if leftmost == '*':
     69     # When '*' is a fragment by itself, it matches a non-empty dotless
     70     # fragment.
     71     pats.append('[^.]+')
     72   elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
     73     # RFC 6125, section 6.4.3, subitem 3.
     74     # The client SHOULD NOT attempt to match a presented identifier
     75     # where the wildcard character is embedded within an A-label or
     76     # U-label of an internationalized domain name.
     77     pats.append(re.escape(leftmost))
     78   else:
     79     # Otherwise, '*' matches any dotless string, e.g. www*
     80     pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
     81 
     82   # add the remaining fragments, ignore any wildcards
     83   for frag in remainder:
     84     pats.append(re.escape(frag))
     85 
     86   pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
     87   return pat.match(hostname)
     88 
     89 
     90 def _MatchHostname(cert, hostname):
     91   """Verify that *cert* (in decoded format as returned by
     92   SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
     93   rules are followed, but IP addresses are not accepted for *hostname*.
     94 
     95   CertificateError is raised on failure. On success, the function
     96   returns nothing.
     97   """
     98   if not cert:
     99     raise ValueError('empty or no certificate, match_hostname needs a '
    100                      'SSL socket or SSL context with either '
    101                      'CERT_OPTIONAL or CERT_REQUIRED')
    102   dnsnames = []
    103   san = cert.get('subjectAltName', ())
    104   for key, value in san:
    105     if key == 'DNS':
    106       if _DNSNameMatch(value, hostname):
    107         return
    108       dnsnames.append(value)
    109   if not dnsnames:
    110     # The subject is only checked when there is no dNSName entry
    111     # in subjectAltName
    112     for sub in cert.get('subject', ()):
    113       for key, value in sub:
    114         # XXX according to RFC 2818, the most specific Common Name
    115         # must be used.
    116         if key == 'commonName':
    117           if _DNSNameMatch(value, hostname):
    118             return
    119           dnsnames.append(value)
    120   if len(dnsnames) > 1:
    121     raise CertificateError('hostname %r doesn\'t match either of %s'
    122                            % (hostname, ', '.join(map(repr, dnsnames))))
    123   elif len(dnsnames) == 1:
    124     raise CertificateError('hostname %r doesn\'t match %r'
    125                            % (hostname, dnsnames[0]))
    126   else:
    127     raise CertificateError('no appropriate commonName or '
    128                            'subjectAltName fields were found')
    129 
    130 
    131 class HTTPSConnection(httplib.HTTPSConnection):
    132 
    133   def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs):
    134     self.root_certs = root_certs
    135     httplib.HTTPSConnection.__init__(self, host, **kwargs)
    136 
    137   def connect(self):
    138     # Overrides for certificate verification.
    139     args = [(self.host, self.port), self.timeout,]
    140     if self.source_address:
    141       args.append(self.source_address)
    142     sock = socket.create_connection(*args)
    143 
    144     if self._tunnel_host:
    145       self.sock = sock
    146       self._tunnel()
    147 
    148     # Wrap the socket for verification with the root certs.
    149     kwargs = {}
    150     if self.root_certs is not None:
    151       kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs)
    152     self.sock = ssl.wrap_socket(sock, **kwargs)
    153 
    154     # Check hostname.
    155     try:
    156       _MatchHostname(self.sock.getpeercert(), self.host)
    157     except CertificateError:
    158       self.sock.shutdown(socket.SHUT_RDWR)
    159       self.sock.close()
    160       raise
    161 
    162 
    163 class HTTPSHandler(urllib2.HTTPSHandler):
    164 
    165   def __init__(self, root_certs=_TRUSTED_ROOT_CERTS):
    166     urllib2.HTTPSHandler.__init__(self)
    167     self.root_certs = root_certs
    168 
    169   def https_open(self, req):
    170     # Pass a reference to the function below so that verification against
    171     # trusted root certs could be injected.
    172     return self.do_open(self.GetConnection, req)
    173 
    174   def GetConnection(self, host, **kwargs):
    175     params = dict(root_certs=self.root_certs)
    176     params.update(kwargs)
    177     return HTTPSConnection(host, **params)
    178 
    179 
    180 def _SendRequest(url, timeout=None):
    181   """Send request to the given https url, and return the server response.
    182 
    183   Args:
    184     url: The https url to send request to.
    185 
    186   Returns:
    187     An integer: http code of the response.
    188     A string: content of the response.
    189 
    190   Raises:
    191     CertificateError: Certificate verification fails.
    192   """
    193   if not url:
    194     return None, None
    195 
    196   handlers = []
    197   if url.startswith('https://'):
    198     # HTTPSHandler has to go first, because we don't want to send secure cookies
    199     # to a man in the middle.
    200     handlers.append(HTTPSHandler())
    201 
    202 
    203   cookie_file = os.environ.get('COOKIE_FILE')
    204   if cookie_file and os.path.exists(cookie_file):
    205     handlers.append(
    206         urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file)))
    207 
    208   url_opener = urllib2.build_opener(*handlers)
    209 
    210   status_code = None
    211   content = None
    212 
    213   try:
    214     response = url_opener.open(url, timeout=timeout)
    215 
    216     status_code = response.code
    217     content = response.read()
    218   except urllib2.HTTPError as e:
    219     status_code = e.code
    220     content = None
    221   except (ssl.SSLError, httplib.BadStatusLine, IOError):
    222     status_code = -1
    223     content = None
    224 
    225   return status_code, content
    226 
    227 
    228 class HttpClientLocal(http_client.HttpClient):
    229   """This http client is used locally in a workstation, GCE VMs, etc."""
    230 
    231   @staticmethod
    232   def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5,
    233           retry_if_not=None):
    234     if params:
    235       url = '%s?%s' % (url, urllib.urlencode(params))
    236 
    237     count = 0
    238     while True:
    239       count += 1
    240 
    241       status_code, content = _SendRequest(url, timeout=timeout)
    242       if status_code == 200:
    243         return status_code, content
    244       if retry_if_not and status_code == retry_if_not:
    245         return status_code, content
    246 
    247       if count < retries:
    248         time.sleep(retry_interval)
    249       else:
    250         return status_code, content
    251 
    252     # Should never be reached.
    253     return status_code, content
    254