Home | History | Annotate | Download | only in paste
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 # Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php
      4 # Licensed to PSF under a Contributor Agreement
      5 """
      6 Middleware to check for obedience to the WSGI specification.
      7 
      8 Some of the things this checks:
      9 
     10 * Signature of the application and start_response (including that
     11   keyword arguments are not used).
     12 
     13 * Environment checks:
     14 
     15   - Environment is a dictionary (and not a subclass).
     16 
     17   - That all the required keys are in the environment: REQUEST_METHOD,
     18     SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors,
     19     wsgi.multithread, wsgi.multiprocess, wsgi.run_once
     20 
     21   - That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the
     22     environment (these headers should appear as CONTENT_LENGTH and
     23     CONTENT_TYPE).
     24 
     25   - Warns if QUERY_STRING is missing, as the cgi module acts
     26     unpredictably in that case.
     27 
     28   - That CGI-style variables (that don't contain a .) have
     29     (non-unicode) string values
     30 
     31   - That wsgi.version is a tuple
     32 
     33   - That wsgi.url_scheme is 'http' or 'https' (@@: is this too
     34     restrictive?)
     35 
     36   - Warns if the REQUEST_METHOD is not known (@@: probably too
     37     restrictive).
     38 
     39   - That SCRIPT_NAME and PATH_INFO are empty or start with /
     40 
     41   - That at least one of SCRIPT_NAME or PATH_INFO are set.
     42 
     43   - That CONTENT_LENGTH is a positive integer.
     44 
     45   - That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should
     46     be '/').
     47 
     48   - That wsgi.input has the methods read, readline, readlines, and
     49     __iter__
     50 
     51   - That wsgi.errors has the methods flush, write, writelines
     52 
     53 * The status is a string, contains a space, starts with an integer,
     54   and that integer is in range (> 100).
     55 
     56 * That the headers is a list (not a subclass, not another kind of
     57   sequence).
     58 
     59 * That the items of the headers are tuples of strings.
     60 
     61 * That there is no 'status' header (that is used in CGI, but not in
     62   WSGI).
     63 
     64 * That the headers don't contain newlines or colons, end in _ or -, or
     65   contain characters codes below 037.
     66 
     67 * That Content-Type is given if there is content (CGI often has a
     68   default content type, but WSGI does not).
     69 
     70 * That no Content-Type is given when there is no content (@@: is this
     71   too restrictive?)
     72 
     73 * That the exc_info argument to start_response is a tuple or None.
     74 
     75 * That all calls to the writer are with strings, and no other methods
     76   on the writer are accessed.
     77 
     78 * That wsgi.input is used properly:
     79 
     80   - .read() is called with zero or one argument
     81 
     82   - That it returns a string
     83 
     84   - That readline, readlines, and __iter__ return strings
     85 
     86   - That .close() is not called
     87 
     88   - No other methods are provided
     89 
     90 * That wsgi.errors is used properly:
     91 
     92   - .write() and .writelines() is called with a string
     93 
     94   - That .close() is not called, and no other methods are provided.
     95 
     96 * The response iterator:
     97 
     98   - That it is not a string (it should be a list of a single string; a
     99     string will work, but perform horribly).
    100 
    101   - That .next() returns a string
    102 
    103   - That the iterator is not iterated over until start_response has
    104     been called (that can signal either a server or application
    105     error).
    106 
    107   - That .close() is called (doesn't raise exception, only prints to
    108     sys.stderr, because we only know it isn't called when the object
    109     is garbage collected).
    110 """
    111 
    112 import re
    113 import six
    114 import sys
    115 import warnings
    116 
    117 header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$')
    118 bad_header_value_re = re.compile(r'[\000-\037]')
    119 
    120 class WSGIWarning(Warning):
    121     """
    122     Raised in response to WSGI-spec-related warnings
    123     """
    124 
    125 def middleware(application, global_conf=None):
    126 
    127     """
    128     When applied between a WSGI server and a WSGI application, this
    129     middleware will check for WSGI compliancy on a number of levels.
    130     This middleware does not modify the request or response in any
    131     way, but will throw an AssertionError if anything seems off
    132     (except for a failure to close the application iterator, which
    133     will be printed to stderr -- there's no way to throw an exception
    134     at that point).
    135     """
    136 
    137     def lint_app(*args, **kw):
    138         assert len(args) == 2, "Two arguments required"
    139         assert not kw, "No keyword arguments allowed"
    140         environ, start_response = args
    141 
    142         check_environ(environ)
    143 
    144         # We use this to check if the application returns without
    145         # calling start_response:
    146         start_response_started = []
    147 
    148         def start_response_wrapper(*args, **kw):
    149             assert len(args) == 2 or len(args) == 3, (
    150                 "Invalid number of arguments: %s" % args)
    151             assert not kw, "No keyword arguments allowed"
    152             status = args[0]
    153             headers = args[1]
    154             if len(args) == 3:
    155                 exc_info = args[2]
    156             else:
    157                 exc_info = None
    158 
    159             check_status(status)
    160             check_headers(headers)
    161             check_content_type(status, headers)
    162             check_exc_info(exc_info)
    163 
    164             start_response_started.append(None)
    165             return WriteWrapper(start_response(*args))
    166 
    167         environ['wsgi.input'] = InputWrapper(environ['wsgi.input'])
    168         environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors'])
    169 
    170         iterator = application(environ, start_response_wrapper)
    171         assert iterator is not None and iterator != False, (
    172             "The application must return an iterator, if only an empty list")
    173 
    174         check_iterator(iterator)
    175 
    176         return IteratorWrapper(iterator, start_response_started)
    177 
    178     return lint_app
    179 
    180 class InputWrapper(object):
    181 
    182     def __init__(self, wsgi_input):
    183         self.input = wsgi_input
    184 
    185     def read(self, *args):
    186         assert len(args) <= 1
    187         v = self.input.read(*args)
    188         assert isinstance(v, six.binary_type)
    189         return v
    190 
    191     def readline(self, *args):
    192         v = self.input.readline(*args)
    193         assert isinstance(v, six.binary_type)
    194         return v
    195 
    196     def readlines(self, *args):
    197         assert len(args) <= 1
    198         lines = self.input.readlines(*args)
    199         assert isinstance(lines, list)
    200         for line in lines:
    201             assert isinstance(line, six.binary_type)
    202         return lines
    203 
    204     def __iter__(self):
    205         while 1:
    206             line = self.readline()
    207             if not line:
    208                 return
    209             yield line
    210 
    211     def close(self):
    212         assert 0, "input.close() must not be called"
    213 
    214 class ErrorWrapper(object):
    215 
    216     def __init__(self, wsgi_errors):
    217         self.errors = wsgi_errors
    218 
    219     def write(self, s):
    220         assert isinstance(s, bytes)
    221         self.errors.write(s)
    222 
    223     def flush(self):
    224         self.errors.flush()
    225 
    226     def writelines(self, seq):
    227         for line in seq:
    228             self.write(line)
    229 
    230     def close(self):
    231         assert 0, "errors.close() must not be called"
    232 
    233 class WriteWrapper(object):
    234 
    235     def __init__(self, wsgi_writer):
    236         self.writer = wsgi_writer
    237 
    238     def __call__(self, s):
    239         assert isinstance(s, six.binary_type)
    240         self.writer(s)
    241 
    242 class PartialIteratorWrapper(object):
    243 
    244     def __init__(self, wsgi_iterator):
    245         self.iterator = wsgi_iterator
    246 
    247     def __iter__(self):
    248         # We want to make sure __iter__ is called
    249         return IteratorWrapper(self.iterator)
    250 
    251 class IteratorWrapper(object):
    252 
    253     def __init__(self, wsgi_iterator, check_start_response):
    254         self.original_iterator = wsgi_iterator
    255         self.iterator = iter(wsgi_iterator)
    256         self.closed = False
    257         self.check_start_response = check_start_response
    258 
    259     def __iter__(self):
    260         return self
    261 
    262     def next(self):
    263         assert not self.closed, (
    264             "Iterator read after closed")
    265         v = six.next(self.iterator)
    266         if self.check_start_response is not None:
    267             assert self.check_start_response, (
    268                 "The application returns and we started iterating over its body, but start_response has not yet been called")
    269             self.check_start_response = None
    270         return v
    271 
    272     __next__ = next
    273 
    274     def close(self):
    275         self.closed = True
    276         if hasattr(self.original_iterator, 'close'):
    277             self.original_iterator.close()
    278 
    279     def __del__(self):
    280         if not self.closed:
    281             sys.stderr.write(
    282                 "Iterator garbage collected without being closed")
    283         assert self.closed, (
    284             "Iterator garbage collected without being closed")
    285 
    286 def check_environ(environ):
    287     assert isinstance(environ,dict), (
    288         "Environment is not of the right type: %r (environment: %r)"
    289         % (type(environ), environ))
    290 
    291     for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
    292                 'wsgi.version', 'wsgi.input', 'wsgi.errors',
    293                 'wsgi.multithread', 'wsgi.multiprocess',
    294                 'wsgi.run_once']:
    295         assert key in environ, (
    296             "Environment missing required key: %r" % key)
    297 
    298     for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']:
    299         assert key not in environ, (
    300             "Environment should not have the key: %s "
    301             "(use %s instead)" % (key, key[5:]))
    302 
    303     if 'QUERY_STRING' not in environ:
    304         warnings.warn(
    305             'QUERY_STRING is not in the WSGI environment; the cgi '
    306             'module will use sys.argv when this variable is missing, '
    307             'so application errors are more likely',
    308             WSGIWarning)
    309 
    310     for key in environ.keys():
    311         if '.' in key:
    312             # Extension, we don't care about its type
    313             continue
    314         assert isinstance(environ[key], str), (
    315             "Environmental variable %s is not a string: %r (value: %r)"
    316             % (key, type(environ[key]), environ[key]))
    317 
    318     assert isinstance(environ['wsgi.version'], tuple), (
    319         "wsgi.version should be a tuple (%r)" % environ['wsgi.version'])
    320     assert environ['wsgi.url_scheme'] in ('http', 'https'), (
    321         "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme'])
    322 
    323     check_input(environ['wsgi.input'])
    324     check_errors(environ['wsgi.errors'])
    325 
    326     # @@: these need filling out:
    327     if environ['REQUEST_METHOD'] not in (
    328         'GET', 'HEAD', 'POST', 'OPTIONS','PUT','DELETE','TRACE'):
    329         warnings.warn(
    330             "Unknown REQUEST_METHOD: %r" % environ['REQUEST_METHOD'],
    331             WSGIWarning)
    332 
    333     assert (not environ.get('SCRIPT_NAME')
    334             or environ['SCRIPT_NAME'].startswith('/')), (
    335         "SCRIPT_NAME doesn't start with /: %r" % environ['SCRIPT_NAME'])
    336     assert (not environ.get('PATH_INFO')
    337             or environ['PATH_INFO'].startswith('/')), (
    338         "PATH_INFO doesn't start with /: %r" % environ['PATH_INFO'])
    339     if environ.get('CONTENT_LENGTH'):
    340         assert int(environ['CONTENT_LENGTH']) >= 0, (
    341             "Invalid CONTENT_LENGTH: %r" % environ['CONTENT_LENGTH'])
    342 
    343     if not environ.get('SCRIPT_NAME'):
    344         assert 'PATH_INFO' in environ, (
    345             "One of SCRIPT_NAME or PATH_INFO are required (PATH_INFO "
    346             "should at least be '/' if SCRIPT_NAME is empty)")
    347     assert environ.get('SCRIPT_NAME') != '/', (
    348         "SCRIPT_NAME cannot be '/'; it should instead be '', and "
    349         "PATH_INFO should be '/'")
    350 
    351 def check_input(wsgi_input):
    352     for attr in ['read', 'readline', 'readlines', '__iter__']:
    353         assert hasattr(wsgi_input, attr), (
    354             "wsgi.input (%r) doesn't have the attribute %s"
    355             % (wsgi_input, attr))
    356 
    357 def check_errors(wsgi_errors):
    358     for attr in ['flush', 'write', 'writelines']:
    359         assert hasattr(wsgi_errors, attr), (
    360             "wsgi.errors (%r) doesn't have the attribute %s"
    361             % (wsgi_errors, attr))
    362 
    363 def check_status(status):
    364     assert isinstance(status, str), (
    365         "Status must be a string (not %r)" % status)
    366     # Implicitly check that we can turn it into an integer:
    367     status_code = status.split(None, 1)[0]
    368     assert len(status_code) == 3, (
    369         "Status codes must be three characters: %r" % status_code)
    370     status_int = int(status_code)
    371     assert status_int >= 100, "Status code is invalid: %r" % status_int
    372     if len(status) < 4 or status[3] != ' ':
    373         warnings.warn(
    374             "The status string (%r) should be a three-digit integer "
    375             "followed by a single space and a status explanation"
    376             % status, WSGIWarning)
    377 
    378 def check_headers(headers):
    379     assert isinstance(headers,list), (
    380         "Headers (%r) must be of type list: %r"
    381         % (headers, type(headers)))
    382     header_names = {}
    383     for item in headers:
    384         assert isinstance(item, tuple), (
    385             "Individual headers (%r) must be of type tuple: %r"
    386             % (item, type(item)))
    387         assert len(item) == 2
    388         name, value = item
    389         assert name.lower() != 'status', (
    390             "The Status header cannot be used; it conflicts with CGI "
    391             "script, and HTTP status is not given through headers "
    392             "(value: %r)." % value)
    393         header_names[name.lower()] = None
    394         assert '\n' not in name and ':' not in name, (
    395             "Header names may not contain ':' or '\\n': %r" % name)
    396         assert header_re.search(name), "Bad header name: %r" % name
    397         assert not name.endswith('-') and not name.endswith('_'), (
    398             "Names may not end in '-' or '_': %r" % name)
    399         assert not bad_header_value_re.search(value), (
    400             "Bad header value: %r (bad char: %r)"
    401             % (value, bad_header_value_re.search(value).group(0)))
    402 
    403 def check_content_type(status, headers):
    404     code = int(status.split(None, 1)[0])
    405     # @@: need one more person to verify this interpretation of RFC 2616
    406     #     http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
    407     NO_MESSAGE_BODY = (204, 304)
    408     NO_MESSAGE_TYPE = (204, 304)
    409     for name, value in headers:
    410         if name.lower() == 'content-type':
    411             if code not in NO_MESSAGE_TYPE:
    412                 return
    413             assert 0, (("Content-Type header found in a %s response, "
    414                         "which must not return content.") % code)
    415     if code not in NO_MESSAGE_BODY:
    416         assert 0, "No Content-Type header found in headers (%s)" % headers
    417 
    418 def check_exc_info(exc_info):
    419     assert exc_info is None or type(exc_info) is type(()), (
    420         "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info)))
    421     # More exc_info checks?
    422 
    423 def check_iterator(iterator):
    424     # Technically a string is legal, which is why it's a really bad
    425     # idea, because it may cause the response to be returned
    426     # character-by-character
    427     assert not isinstance(iterator, str), (
    428         "You should not return a string as your application iterator, "
    429         "instead return a single-item list containing that string.")
    430 
    431 def make_middleware(application, global_conf):
    432     # @@: global_conf should be taken out of the middleware function,
    433     # and isolated here
    434     return middleware(application)
    435 
    436 make_middleware.__doc__ = __doc__
    437 
    438 __all__ = ['middleware', 'make_middleware']
    439