Home | History | Annotate | Download | only in paste
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 # (c) 2005 Ian Bicking and contributors
      4 # This module is part of the Python Paste Project and is released under
      5 # the MIT License: http://www.opensource.org/licenses/mit-license.php
      6 """
      7 This module provides helper routines with work directly on a WSGI
      8 environment to solve common requirements.
      9 
     10    * get_cookies(environ)
     11    * parse_querystring(environ)
     12    * parse_formvars(environ, include_get_vars=True)
     13    * construct_url(environ, with_query_string=True, with_path_info=True,
     14                    script_name=None, path_info=None, querystring=None)
     15    * path_info_split(path_info)
     16    * path_info_pop(environ)
     17    * resolve_relative_url(url, environ)
     18 
     19 """
     20 import cgi
     21 from six.moves.urllib import parse as urlparse
     22 from six.moves.urllib.parse import quote, parse_qsl
     23 try:
     24     # Python 3
     25     from http.cookies import SimpleCookie, CookieError
     26 except ImportError:
     27     # Python 2
     28     from Cookie import SimpleCookie, CookieError
     29 
     30 try:
     31     from UserDict import DictMixin
     32 except ImportError:
     33     from collections import MutableMapping as DictMixin
     34 import six
     35 
     36 from paste.util.multidict import MultiDict
     37 
     38 __all__ = ['get_cookies', 'get_cookie_dict', 'parse_querystring',
     39            'parse_formvars', 'construct_url', 'path_info_split',
     40            'path_info_pop', 'resolve_relative_url', 'EnvironHeaders']
     41 
     42 def get_cookies(environ):
     43     """
     44     Gets a cookie object (which is a dictionary-like object) from the
     45     request environment; caches this value in case get_cookies is
     46     called again for the same request.
     47 
     48     """
     49     header = environ.get('HTTP_COOKIE', '')
     50     if 'paste.cookies' in environ:
     51         cookies, check_header = environ['paste.cookies']
     52         if check_header == header:
     53             return cookies
     54     cookies = SimpleCookie()
     55     try:
     56         cookies.load(header)
     57     except CookieError:
     58         pass
     59     environ['paste.cookies'] = (cookies, header)
     60     return cookies
     61 
     62 def get_cookie_dict(environ):
     63     """Return a *plain* dictionary of cookies as found in the request.
     64 
     65     Unlike ``get_cookies`` this returns a dictionary, not a
     66     ``SimpleCookie`` object.  For incoming cookies a dictionary fully
     67     represents the information.  Like ``get_cookies`` this caches and
     68     checks the cache.
     69     """
     70     header = environ.get('HTTP_COOKIE')
     71     if not header:
     72         return {}
     73     if 'paste.cookies.dict' in environ:
     74         cookies, check_header = environ['paste.cookies.dict']
     75         if check_header == header:
     76             return cookies
     77     cookies = SimpleCookie()
     78     try:
     79         cookies.load(header)
     80     except CookieError:
     81         pass
     82     result = {}
     83     for name in cookies:
     84         result[name] = cookies[name].value
     85     environ['paste.cookies.dict'] = (result, header)
     86     return result
     87 
     88 def parse_querystring(environ):
     89     """
     90     Parses a query string into a list like ``[(name, value)]``.
     91     Caches this value in case parse_querystring is called again
     92     for the same request.
     93 
     94     You can pass the result to ``dict()``, but be aware that keys that
     95     appear multiple times will be lost (only the last value will be
     96     preserved).
     97 
     98     """
     99     source = environ.get('QUERY_STRING', '')
    100     if not source:
    101         return []
    102     if 'paste.parsed_querystring' in environ:
    103         parsed, check_source = environ['paste.parsed_querystring']
    104         if check_source == source:
    105             return parsed
    106     parsed = parse_qsl(source, keep_blank_values=True,
    107                        strict_parsing=False)
    108     environ['paste.parsed_querystring'] = (parsed, source)
    109     return parsed
    110 
    111 def parse_dict_querystring(environ):
    112     """Parses a query string like parse_querystring, but returns a MultiDict
    113 
    114     Caches this value in case parse_dict_querystring is called again
    115     for the same request.
    116 
    117     Example::
    118 
    119         >>> environ = {'QUERY_STRING': 'day=Monday&user=fred&user=jane'}
    120         >>> parsed = parse_dict_querystring(environ)
    121 
    122         >>> parsed['day']
    123         'Monday'
    124         >>> parsed['user']
    125         'fred'
    126         >>> parsed.getall('user')
    127         ['fred', 'jane']
    128 
    129     """
    130     source = environ.get('QUERY_STRING', '')
    131     if not source:
    132         return MultiDict()
    133     if 'paste.parsed_dict_querystring' in environ:
    134         parsed, check_source = environ['paste.parsed_dict_querystring']
    135         if check_source == source:
    136             return parsed
    137     parsed = parse_qsl(source, keep_blank_values=True,
    138                        strict_parsing=False)
    139     multi = MultiDict(parsed)
    140     environ['paste.parsed_dict_querystring'] = (multi, source)
    141     return multi
    142 
    143 def parse_formvars(environ, include_get_vars=True):
    144     """Parses the request, returning a MultiDict of form variables.
    145 
    146     If ``include_get_vars`` is true then GET (query string) variables
    147     will also be folded into the MultiDict.
    148 
    149     All values should be strings, except for file uploads which are
    150     left as ``FieldStorage`` instances.
    151 
    152     If the request was not a normal form request (e.g., a POST with an
    153     XML body) then ``environ['wsgi.input']`` won't be read.
    154     """
    155     source = environ['wsgi.input']
    156     if 'paste.parsed_formvars' in environ:
    157         parsed, check_source = environ['paste.parsed_formvars']
    158         if check_source == source:
    159             if include_get_vars:
    160                 parsed.update(parse_querystring(environ))
    161             return parsed
    162     # @@: Shouldn't bother FieldStorage parsing during GET/HEAD and
    163     # fake_out_cgi requests
    164     type = environ.get('CONTENT_TYPE', '').lower()
    165     if ';' in type:
    166         type = type.split(';', 1)[0]
    167     fake_out_cgi = type not in ('', 'application/x-www-form-urlencoded',
    168                                 'multipart/form-data')
    169     # FieldStorage assumes a default CONTENT_LENGTH of -1, but a
    170     # default of 0 is better:
    171     if not environ.get('CONTENT_LENGTH'):
    172         environ['CONTENT_LENGTH'] = '0'
    173     # Prevent FieldStorage from parsing QUERY_STRING during GET/HEAD
    174     # requests
    175     old_query_string = environ.get('QUERY_STRING','')
    176     environ['QUERY_STRING'] = ''
    177     if fake_out_cgi:
    178         input = six.BytesIO(b'')
    179         old_content_type = environ.get('CONTENT_TYPE')
    180         old_content_length = environ.get('CONTENT_LENGTH')
    181         environ['CONTENT_LENGTH'] = '0'
    182         environ['CONTENT_TYPE'] = ''
    183     else:
    184         input = environ['wsgi.input']
    185     fs = cgi.FieldStorage(fp=input,
    186                           environ=environ,
    187                           keep_blank_values=1)
    188     environ['QUERY_STRING'] = old_query_string
    189     if fake_out_cgi:
    190         environ['CONTENT_TYPE'] = old_content_type
    191         environ['CONTENT_LENGTH'] = old_content_length
    192     formvars = MultiDict()
    193     if isinstance(fs.value, list):
    194         for name in fs.keys():
    195             values = fs[name]
    196             if not isinstance(values, list):
    197                 values = [values]
    198             for value in values:
    199                 if not value.filename:
    200                     value = value.value
    201                 formvars.add(name, value)
    202     environ['paste.parsed_formvars'] = (formvars, source)
    203     if include_get_vars:
    204         formvars.update(parse_querystring(environ))
    205     return formvars
    206 
    207 def construct_url(environ, with_query_string=True, with_path_info=True,
    208                   script_name=None, path_info=None, querystring=None):
    209     """Reconstructs the URL from the WSGI environment.
    210 
    211     You may override SCRIPT_NAME, PATH_INFO, and QUERYSTRING with
    212     the keyword arguments.
    213 
    214     """
    215     url = environ['wsgi.url_scheme']+'://'
    216 
    217     if environ.get('HTTP_HOST'):
    218         host = environ['HTTP_HOST']
    219         port = None
    220         if ':' in host:
    221             host, port = host.split(':', 1)
    222             if environ['wsgi.url_scheme'] == 'https':
    223                 if port == '443':
    224                     port = None
    225             elif environ['wsgi.url_scheme'] == 'http':
    226                 if port == '80':
    227                     port = None
    228         url += host
    229         if port:
    230             url += ':%s' % port
    231     else:
    232         url += environ['SERVER_NAME']
    233         if environ['wsgi.url_scheme'] == 'https':
    234             if environ['SERVER_PORT'] != '443':
    235                 url += ':' + environ['SERVER_PORT']
    236         else:
    237             if environ['SERVER_PORT'] != '80':
    238                 url += ':' + environ['SERVER_PORT']
    239 
    240     if script_name is None:
    241         url += quote(environ.get('SCRIPT_NAME',''))
    242     else:
    243         url += quote(script_name)
    244     if with_path_info:
    245         if path_info is None:
    246             url += quote(environ.get('PATH_INFO',''))
    247         else:
    248             url += quote(path_info)
    249     if with_query_string:
    250         if querystring is None:
    251             if environ.get('QUERY_STRING'):
    252                 url += '?' + environ['QUERY_STRING']
    253         elif querystring:
    254             url += '?' + querystring
    255     return url
    256 
    257 def resolve_relative_url(url, environ):
    258     """
    259     Resolve the given relative URL as being relative to the
    260     location represented by the environment.  This can be used
    261     for redirecting to a relative path.  Note: if url is already
    262     absolute, this function will (intentionally) have no effect
    263     on it.
    264 
    265     """
    266     cur_url = construct_url(environ, with_query_string=False)
    267     return urlparse.urljoin(cur_url, url)
    268 
    269 def path_info_split(path_info):
    270     """
    271     Splits off the first segment of the path.  Returns (first_part,
    272     rest_of_path).  first_part can be None (if PATH_INFO is empty), ''
    273     (if PATH_INFO is '/'), or a name without any /'s.  rest_of_path
    274     can be '' or a string starting with /.
    275 
    276     """
    277     if not path_info:
    278         return None, ''
    279     assert path_info.startswith('/'), (
    280         "PATH_INFO should start with /: %r" % path_info)
    281     path_info = path_info.lstrip('/')
    282     if '/' in path_info:
    283         first, rest = path_info.split('/', 1)
    284         return first, '/' + rest
    285     else:
    286         return path_info, ''
    287 
    288 def path_info_pop(environ):
    289     """
    290     'Pops' off the next segment of PATH_INFO, pushing it onto
    291     SCRIPT_NAME, and returning that segment.
    292 
    293     For instance::
    294 
    295         >>> def call_it(script_name, path_info):
    296         ...     env = {'SCRIPT_NAME': script_name, 'PATH_INFO': path_info}
    297         ...     result = path_info_pop(env)
    298         ...     print('SCRIPT_NAME=%r; PATH_INFO=%r; returns=%r' % (
    299         ...         env['SCRIPT_NAME'], env['PATH_INFO'], result))
    300         >>> call_it('/foo', '/bar')
    301         SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns='bar'
    302         >>> call_it('/foo/bar', '')
    303         SCRIPT_NAME='/foo/bar'; PATH_INFO=''; returns=None
    304         >>> call_it('/foo/bar', '/')
    305         SCRIPT_NAME='/foo/bar/'; PATH_INFO=''; returns=''
    306         >>> call_it('', '/1/2/3')
    307         SCRIPT_NAME='/1'; PATH_INFO='/2/3'; returns='1'
    308         >>> call_it('', '//1/2')
    309         SCRIPT_NAME='//1'; PATH_INFO='/2'; returns='1'
    310 
    311     """
    312     path = environ.get('PATH_INFO', '')
    313     if not path:
    314         return None
    315     while path.startswith('/'):
    316         environ['SCRIPT_NAME'] += '/'
    317         path = path[1:]
    318     if '/' not in path:
    319         environ['SCRIPT_NAME'] += path
    320         environ['PATH_INFO'] = ''
    321         return path
    322     else:
    323         segment, path = path.split('/', 1)
    324         environ['PATH_INFO'] = '/' + path
    325         environ['SCRIPT_NAME'] += segment
    326         return segment
    327 
    328 _parse_headers_special = {
    329     # This is a Zope convention, but we'll allow it here:
    330     'HTTP_CGI_AUTHORIZATION': 'Authorization',
    331     'CONTENT_LENGTH': 'Content-Length',
    332     'CONTENT_TYPE': 'Content-Type',
    333     }
    334 
    335 def parse_headers(environ):
    336     """
    337     Parse the headers in the environment (like ``HTTP_HOST``) and
    338     yield a sequence of those (header_name, value) tuples.
    339     """
    340     # @@: Maybe should parse out comma-separated headers?
    341     for cgi_var, value in environ.iteritems():
    342         if cgi_var in _parse_headers_special:
    343             yield _parse_headers_special[cgi_var], value
    344         elif cgi_var.startswith('HTTP_'):
    345             yield cgi_var[5:].title().replace('_', '-'), value
    346 
    347 class EnvironHeaders(DictMixin):
    348     """An object that represents the headers as present in a
    349     WSGI environment.
    350 
    351     This object is a wrapper (with no internal state) for a WSGI
    352     request object, representing the CGI-style HTTP_* keys as a
    353     dictionary.  Because a CGI environment can only hold one value for
    354     each key, this dictionary is single-valued (unlike outgoing
    355     headers).
    356     """
    357 
    358     def __init__(self, environ):
    359         self.environ = environ
    360 
    361     def _trans_name(self, name):
    362         key = 'HTTP_'+name.replace('-', '_').upper()
    363         if key == 'HTTP_CONTENT_LENGTH':
    364             key = 'CONTENT_LENGTH'
    365         elif key == 'HTTP_CONTENT_TYPE':
    366             key = 'CONTENT_TYPE'
    367         return key
    368 
    369     def _trans_key(self, key):
    370         if key == 'CONTENT_TYPE':
    371             return 'Content-Type'
    372         elif key == 'CONTENT_LENGTH':
    373             return 'Content-Length'
    374         elif key.startswith('HTTP_'):
    375             return key[5:].replace('_', '-').title()
    376         else:
    377             return None
    378 
    379     def __len__(self):
    380         return len(self.environ)
    381 
    382     def __getitem__(self, item):
    383         return self.environ[self._trans_name(item)]
    384 
    385     def __setitem__(self, item, value):
    386         # @@: Should this dictionary be writable at all?
    387         self.environ[self._trans_name(item)] = value
    388 
    389     def __delitem__(self, item):
    390         del self.environ[self._trans_name(item)]
    391 
    392     def __iter__(self):
    393         for key in self.environ:
    394             name = self._trans_key(key)
    395             if name is not None:
    396                 yield name
    397 
    398     def keys(self):
    399         return list(iter(self))
    400 
    401     def __contains__(self, item):
    402         return self._trans_name(item) in self.environ
    403 
    404 def _cgi_FieldStorage__repr__patch(self):
    405     """ monkey patch for FieldStorage.__repr__
    406 
    407     Unbelievely, the default __repr__ on FieldStorage reads
    408     the entire file content instead of being sane about it.
    409     This is a simple replacement that doesn't do that
    410     """
    411     if self.file:
    412         return "FieldStorage(%r, %r)" % (
    413                 self.name, self.filename)
    414     return "FieldStorage(%r, %r, %r)" % (
    415              self.name, self.filename, self.value)
    416 
    417 cgi.FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch
    418 
    419 if __name__ == '__main__':
    420     import doctest
    421     doctest.testmod()
    422