Home | History | Annotate | Download | only in paste
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 """
      4 An application that proxies WSGI requests to a remote server.
      5 
      6 TODO:
      7 
      8 * Send ``Via`` header?  It's not clear to me this is a Via in the
      9   style of a typical proxy.
     10 
     11 * Other headers or metadata?  I put in X-Forwarded-For, but that's it.
     12 
     13 * Signed data of non-HTTP keys?  This would be for things like
     14   REMOTE_USER.
     15 
     16 * Something to indicate what the original URL was?  The original host,
     17   scheme, and base path.
     18 
     19 * Rewriting ``Location`` headers?  mod_proxy does this.
     20 
     21 * Rewriting body?  (Probably not on this one -- that can be done with
     22   a different middleware that wraps this middleware)
     23 
     24 * Example::
     25 
     26     use = egg:Paste#proxy
     27     address = http://server3:8680/exist/rest/db/orgs/sch/config/
     28     allowed_request_methods = GET
     29 
     30 """
     31 
     32 from six.moves import http_client as httplib
     33 from six.moves.urllib import parse as urlparse
     34 from six.moves.urllib.parse import quote
     35 import six
     36 
     37 from paste import httpexceptions
     38 from paste.util.converters import aslist
     39 
     40 # Remove these headers from response (specify lower case header
     41 # names):
     42 filtered_headers = (
     43     'transfer-encoding',
     44     'connection',
     45     'keep-alive',
     46     'proxy-authenticate',
     47     'proxy-authorization',
     48     'te',
     49     'trailers',
     50     'upgrade',
     51 )
     52 
     53 class Proxy(object):
     54 
     55     def __init__(self, address, allowed_request_methods=(),
     56                  suppress_http_headers=()):
     57         self.address = address
     58         self.parsed = urlparse.urlsplit(address)
     59         self.scheme = self.parsed[0].lower()
     60         self.host = self.parsed[1]
     61         self.path = self.parsed[2]
     62         self.allowed_request_methods = [
     63             x.lower() for x in allowed_request_methods if x]
     64 
     65         self.suppress_http_headers = [
     66             x.lower() for x in suppress_http_headers if x]
     67 
     68     def __call__(self, environ, start_response):
     69         if (self.allowed_request_methods and
     70             environ['REQUEST_METHOD'].lower() not in self.allowed_request_methods):
     71             return httpexceptions.HTTPBadRequest("Disallowed")(environ, start_response)
     72 
     73         if self.scheme == 'http':
     74             ConnClass = httplib.HTTPConnection
     75         elif self.scheme == 'https':
     76             ConnClass = httplib.HTTPSConnection
     77         else:
     78             raise ValueError(
     79                 "Unknown scheme for %r: %r" % (self.address, self.scheme))
     80         conn = ConnClass(self.host)
     81         headers = {}
     82         for key, value in environ.items():
     83             if key.startswith('HTTP_'):
     84                 key = key[5:].lower().replace('_', '-')
     85                 if key == 'host' or key in self.suppress_http_headers:
     86                     continue
     87                 headers[key] = value
     88         headers['host'] = self.host
     89         if 'REMOTE_ADDR' in environ:
     90             headers['x-forwarded-for'] = environ['REMOTE_ADDR']
     91         if environ.get('CONTENT_TYPE'):
     92             headers['content-type'] = environ['CONTENT_TYPE']
     93         if environ.get('CONTENT_LENGTH'):
     94             if environ['CONTENT_LENGTH'] == '-1':
     95                 # This is a special case, where the content length is basically undetermined
     96                 body = environ['wsgi.input'].read(-1)
     97                 headers['content-length'] = str(len(body))
     98             else:
     99                 headers['content-length'] = environ['CONTENT_LENGTH']
    100                 length = int(environ['CONTENT_LENGTH'])
    101                 body = environ['wsgi.input'].read(length)
    102         else:
    103             body = ''
    104 
    105         path_info = quote(environ['PATH_INFO'])
    106         if self.path:
    107             request_path = path_info
    108             if request_path and request_path[0] == '/':
    109                 request_path = request_path[1:]
    110 
    111             path = urlparse.urljoin(self.path, request_path)
    112         else:
    113             path = path_info
    114         if environ.get('QUERY_STRING'):
    115             path += '?' + environ['QUERY_STRING']
    116 
    117         conn.request(environ['REQUEST_METHOD'],
    118                      path,
    119                      body, headers)
    120         res = conn.getresponse()
    121         headers_out = parse_headers(res.msg)
    122 
    123         status = '%s %s' % (res.status, res.reason)
    124         start_response(status, headers_out)
    125         # @@: Default?
    126         length = res.getheader('content-length')
    127         if length is not None:
    128             body = res.read(int(length))
    129         else:
    130             body = res.read()
    131         conn.close()
    132         return [body]
    133 
    134 def make_proxy(global_conf, address, allowed_request_methods="",
    135                suppress_http_headers=""):
    136     """
    137     Make a WSGI application that proxies to another address:
    138 
    139     ``address``
    140         the full URL ending with a trailing ``/``
    141 
    142     ``allowed_request_methods``:
    143         a space seperated list of request methods (e.g., ``GET POST``)
    144 
    145     ``suppress_http_headers``
    146         a space seperated list of http headers (lower case, without
    147         the leading ``http_``) that should not be passed on to target
    148         host
    149     """
    150     allowed_request_methods = aslist(allowed_request_methods)
    151     suppress_http_headers = aslist(suppress_http_headers)
    152     return Proxy(
    153         address,
    154         allowed_request_methods=allowed_request_methods,
    155         suppress_http_headers=suppress_http_headers)
    156 
    157 
    158 class TransparentProxy(object):
    159 
    160     """
    161     A proxy that sends the request just as it was given, including
    162     respecting HTTP_HOST, wsgi.url_scheme, etc.
    163 
    164     This is a way of translating WSGI requests directly to real HTTP
    165     requests.  All information goes in the environment; modify it to
    166     modify the way the request is made.
    167 
    168     If you specify ``force_host`` (and optionally ``force_scheme``)
    169     then HTTP_HOST won't be used to determine where to connect to;
    170     instead a specific host will be connected to, but the ``Host``
    171     header in the request will remain intact.
    172     """
    173 
    174     def __init__(self, force_host=None,
    175                  force_scheme='http'):
    176         self.force_host = force_host
    177         self.force_scheme = force_scheme
    178 
    179     def __repr__(self):
    180         return '<%s %s force_host=%r force_scheme=%r>' % (
    181             self.__class__.__name__,
    182             hex(id(self)),
    183             self.force_host, self.force_scheme)
    184 
    185     def __call__(self, environ, start_response):
    186         scheme = environ['wsgi.url_scheme']
    187         if self.force_host is None:
    188             conn_scheme = scheme
    189         else:
    190             conn_scheme = self.force_scheme
    191         if conn_scheme == 'http':
    192             ConnClass = httplib.HTTPConnection
    193         elif conn_scheme == 'https':
    194             ConnClass = httplib.HTTPSConnection
    195         else:
    196             raise ValueError(
    197                 "Unknown scheme %r" % scheme)
    198         if 'HTTP_HOST' not in environ:
    199             raise ValueError(
    200                 "WSGI environ must contain an HTTP_HOST key")
    201         host = environ['HTTP_HOST']
    202         if self.force_host is None:
    203             conn_host = host
    204         else:
    205             conn_host = self.force_host
    206         conn = ConnClass(conn_host)
    207         headers = {}
    208         for key, value in environ.items():
    209             if key.startswith('HTTP_'):
    210                 key = key[5:].lower().replace('_', '-')
    211                 headers[key] = value
    212         headers['host'] = host
    213         if 'REMOTE_ADDR' in environ and 'HTTP_X_FORWARDED_FOR' not in environ:
    214             headers['x-forwarded-for'] = environ['REMOTE_ADDR']
    215         if environ.get('CONTENT_TYPE'):
    216             headers['content-type'] = environ['CONTENT_TYPE']
    217         if environ.get('CONTENT_LENGTH'):
    218             length = int(environ['CONTENT_LENGTH'])
    219             body = environ['wsgi.input'].read(length)
    220             if length == -1:
    221                 environ['CONTENT_LENGTH'] = str(len(body))
    222         elif 'CONTENT_LENGTH' not in environ:
    223             body = ''
    224             length = 0
    225         else:
    226             body = ''
    227             length = 0
    228 
    229         path = (environ.get('SCRIPT_NAME', '')
    230                 + environ.get('PATH_INFO', ''))
    231         path = quote(path)
    232         if 'QUERY_STRING' in environ:
    233             path += '?' + environ['QUERY_STRING']
    234         conn.request(environ['REQUEST_METHOD'],
    235                      path, body, headers)
    236         res = conn.getresponse()
    237         headers_out = parse_headers(res.msg)
    238 
    239         status = '%s %s' % (res.status, res.reason)
    240         start_response(status, headers_out)
    241         # @@: Default?
    242         length = res.getheader('content-length')
    243         if length is not None:
    244             body = res.read(int(length))
    245         else:
    246             body = res.read()
    247         conn.close()
    248         return [body]
    249 
    250 def parse_headers(message):
    251     """
    252     Turn a Message object into a list of WSGI-style headers.
    253     """
    254     headers_out = []
    255     if six.PY3:
    256         for header, value in message.items():
    257             if header.lower() not in filtered_headers:
    258                 headers_out.append((header, value))
    259     else:
    260         for full_header in message.headers:
    261             if not full_header:
    262                 # Shouldn't happen, but we'll just ignore
    263                 continue
    264             if full_header[0].isspace():
    265                 # Continuation line, add to the last header
    266                 if not headers_out:
    267                     raise ValueError(
    268                         "First header starts with a space (%r)" % full_header)
    269                 last_header, last_value = headers_out.pop()
    270                 value = last_value + ' ' + full_header.strip()
    271                 headers_out.append((last_header, value))
    272                 continue
    273             try:
    274                 header, value = full_header.split(':', 1)
    275             except:
    276                 raise ValueError("Invalid header: %r" % full_header)
    277             value = value.strip()
    278             if header.lower() not in filtered_headers:
    279                 headers_out.append((header, value))
    280     return headers_out
    281 
    282 def make_transparent_proxy(
    283     global_conf, force_host=None, force_scheme='http'):
    284     """
    285     Create a proxy that connects to a specific host, but does
    286     absolutely no other filtering, including the Host header.
    287     """
    288     return TransparentProxy(force_host=force_host,
    289                             force_scheme=force_scheme)
    290