1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3 # Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php 4 # Licensed to PSF under a Contributor Agreement 5 """ 6 Middleware to check for obedience to the WSGI specification. 7 8 Some of the things this checks: 9 10 * Signature of the application and start_response (including that 11 keyword arguments are not used). 12 13 * Environment checks: 14 15 - Environment is a dictionary (and not a subclass). 16 17 - That all the required keys are in the environment: REQUEST_METHOD, 18 SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors, 19 wsgi.multithread, wsgi.multiprocess, wsgi.run_once 20 21 - That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the 22 environment (these headers should appear as CONTENT_LENGTH and 23 CONTENT_TYPE). 24 25 - Warns if QUERY_STRING is missing, as the cgi module acts 26 unpredictably in that case. 27 28 - That CGI-style variables (that don't contain a .) have 29 (non-unicode) string values 30 31 - That wsgi.version is a tuple 32 33 - That wsgi.url_scheme is 'http' or 'https' (@@: is this too 34 restrictive?) 35 36 - Warns if the REQUEST_METHOD is not known (@@: probably too 37 restrictive). 38 39 - That SCRIPT_NAME and PATH_INFO are empty or start with / 40 41 - That at least one of SCRIPT_NAME or PATH_INFO are set. 42 43 - That CONTENT_LENGTH is a positive integer. 44 45 - That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should 46 be '/'). 47 48 - That wsgi.input has the methods read, readline, readlines, and 49 __iter__ 50 51 - That wsgi.errors has the methods flush, write, writelines 52 53 * The status is a string, contains a space, starts with an integer, 54 and that integer is in range (> 100). 55 56 * That the headers is a list (not a subclass, not another kind of 57 sequence). 58 59 * That the items of the headers are tuples of strings. 60 61 * That there is no 'status' header (that is used in CGI, but not in 62 WSGI). 63 64 * That the headers don't contain newlines or colons, end in _ or -, or 65 contain characters codes below 037. 66 67 * That Content-Type is given if there is content (CGI often has a 68 default content type, but WSGI does not). 69 70 * That no Content-Type is given when there is no content (@@: is this 71 too restrictive?) 72 73 * That the exc_info argument to start_response is a tuple or None. 74 75 * That all calls to the writer are with strings, and no other methods 76 on the writer are accessed. 77 78 * That wsgi.input is used properly: 79 80 - .read() is called with zero or one argument 81 82 - That it returns a string 83 84 - That readline, readlines, and __iter__ return strings 85 86 - That .close() is not called 87 88 - No other methods are provided 89 90 * That wsgi.errors is used properly: 91 92 - .write() and .writelines() is called with a string 93 94 - That .close() is not called, and no other methods are provided. 95 96 * The response iterator: 97 98 - That it is not a string (it should be a list of a single string; a 99 string will work, but perform horribly). 100 101 - That .next() returns a string 102 103 - That the iterator is not iterated over until start_response has 104 been called (that can signal either a server or application 105 error). 106 107 - That .close() is called (doesn't raise exception, only prints to 108 sys.stderr, because we only know it isn't called when the object 109 is garbage collected). 110 """ 111 112 import re 113 import six 114 import sys 115 import warnings 116 117 header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$') 118 bad_header_value_re = re.compile(r'[\000-\037]') 119 120 class WSGIWarning(Warning): 121 """ 122 Raised in response to WSGI-spec-related warnings 123 """ 124 125 def middleware(application, global_conf=None): 126 127 """ 128 When applied between a WSGI server and a WSGI application, this 129 middleware will check for WSGI compliancy on a number of levels. 130 This middleware does not modify the request or response in any 131 way, but will throw an AssertionError if anything seems off 132 (except for a failure to close the application iterator, which 133 will be printed to stderr -- there's no way to throw an exception 134 at that point). 135 """ 136 137 def lint_app(*args, **kw): 138 assert len(args) == 2, "Two arguments required" 139 assert not kw, "No keyword arguments allowed" 140 environ, start_response = args 141 142 check_environ(environ) 143 144 # We use this to check if the application returns without 145 # calling start_response: 146 start_response_started = [] 147 148 def start_response_wrapper(*args, **kw): 149 assert len(args) == 2 or len(args) == 3, ( 150 "Invalid number of arguments: %s" % args) 151 assert not kw, "No keyword arguments allowed" 152 status = args[0] 153 headers = args[1] 154 if len(args) == 3: 155 exc_info = args[2] 156 else: 157 exc_info = None 158 159 check_status(status) 160 check_headers(headers) 161 check_content_type(status, headers) 162 check_exc_info(exc_info) 163 164 start_response_started.append(None) 165 return WriteWrapper(start_response(*args)) 166 167 environ['wsgi.input'] = InputWrapper(environ['wsgi.input']) 168 environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors']) 169 170 iterator = application(environ, start_response_wrapper) 171 assert iterator is not None and iterator != False, ( 172 "The application must return an iterator, if only an empty list") 173 174 check_iterator(iterator) 175 176 return IteratorWrapper(iterator, start_response_started) 177 178 return lint_app 179 180 class InputWrapper(object): 181 182 def __init__(self, wsgi_input): 183 self.input = wsgi_input 184 185 def read(self, *args): 186 assert len(args) <= 1 187 v = self.input.read(*args) 188 assert isinstance(v, six.binary_type) 189 return v 190 191 def readline(self, *args): 192 v = self.input.readline(*args) 193 assert isinstance(v, six.binary_type) 194 return v 195 196 def readlines(self, *args): 197 assert len(args) <= 1 198 lines = self.input.readlines(*args) 199 assert isinstance(lines, list) 200 for line in lines: 201 assert isinstance(line, six.binary_type) 202 return lines 203 204 def __iter__(self): 205 while 1: 206 line = self.readline() 207 if not line: 208 return 209 yield line 210 211 def close(self): 212 assert 0, "input.close() must not be called" 213 214 class ErrorWrapper(object): 215 216 def __init__(self, wsgi_errors): 217 self.errors = wsgi_errors 218 219 def write(self, s): 220 assert isinstance(s, bytes) 221 self.errors.write(s) 222 223 def flush(self): 224 self.errors.flush() 225 226 def writelines(self, seq): 227 for line in seq: 228 self.write(line) 229 230 def close(self): 231 assert 0, "errors.close() must not be called" 232 233 class WriteWrapper(object): 234 235 def __init__(self, wsgi_writer): 236 self.writer = wsgi_writer 237 238 def __call__(self, s): 239 assert isinstance(s, six.binary_type) 240 self.writer(s) 241 242 class PartialIteratorWrapper(object): 243 244 def __init__(self, wsgi_iterator): 245 self.iterator = wsgi_iterator 246 247 def __iter__(self): 248 # We want to make sure __iter__ is called 249 return IteratorWrapper(self.iterator) 250 251 class IteratorWrapper(object): 252 253 def __init__(self, wsgi_iterator, check_start_response): 254 self.original_iterator = wsgi_iterator 255 self.iterator = iter(wsgi_iterator) 256 self.closed = False 257 self.check_start_response = check_start_response 258 259 def __iter__(self): 260 return self 261 262 def next(self): 263 assert not self.closed, ( 264 "Iterator read after closed") 265 v = six.next(self.iterator) 266 if self.check_start_response is not None: 267 assert self.check_start_response, ( 268 "The application returns and we started iterating over its body, but start_response has not yet been called") 269 self.check_start_response = None 270 return v 271 272 __next__ = next 273 274 def close(self): 275 self.closed = True 276 if hasattr(self.original_iterator, 'close'): 277 self.original_iterator.close() 278 279 def __del__(self): 280 if not self.closed: 281 sys.stderr.write( 282 "Iterator garbage collected without being closed") 283 assert self.closed, ( 284 "Iterator garbage collected without being closed") 285 286 def check_environ(environ): 287 assert isinstance(environ,dict), ( 288 "Environment is not of the right type: %r (environment: %r)" 289 % (type(environ), environ)) 290 291 for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT', 292 'wsgi.version', 'wsgi.input', 'wsgi.errors', 293 'wsgi.multithread', 'wsgi.multiprocess', 294 'wsgi.run_once']: 295 assert key in environ, ( 296 "Environment missing required key: %r" % key) 297 298 for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']: 299 assert key not in environ, ( 300 "Environment should not have the key: %s " 301 "(use %s instead)" % (key, key[5:])) 302 303 if 'QUERY_STRING' not in environ: 304 warnings.warn( 305 'QUERY_STRING is not in the WSGI environment; the cgi ' 306 'module will use sys.argv when this variable is missing, ' 307 'so application errors are more likely', 308 WSGIWarning) 309 310 for key in environ.keys(): 311 if '.' in key: 312 # Extension, we don't care about its type 313 continue 314 assert isinstance(environ[key], str), ( 315 "Environmental variable %s is not a string: %r (value: %r)" 316 % (key, type(environ[key]), environ[key])) 317 318 assert isinstance(environ['wsgi.version'], tuple), ( 319 "wsgi.version should be a tuple (%r)" % environ['wsgi.version']) 320 assert environ['wsgi.url_scheme'] in ('http', 'https'), ( 321 "wsgi.url_scheme unknown: %r" % environ['wsgi.url_scheme']) 322 323 check_input(environ['wsgi.input']) 324 check_errors(environ['wsgi.errors']) 325 326 # @@: these need filling out: 327 if environ['REQUEST_METHOD'] not in ( 328 'GET', 'HEAD', 'POST', 'OPTIONS','PUT','DELETE','TRACE'): 329 warnings.warn( 330 "Unknown REQUEST_METHOD: %r" % environ['REQUEST_METHOD'], 331 WSGIWarning) 332 333 assert (not environ.get('SCRIPT_NAME') 334 or environ['SCRIPT_NAME'].startswith('/')), ( 335 "SCRIPT_NAME doesn't start with /: %r" % environ['SCRIPT_NAME']) 336 assert (not environ.get('PATH_INFO') 337 or environ['PATH_INFO'].startswith('/')), ( 338 "PATH_INFO doesn't start with /: %r" % environ['PATH_INFO']) 339 if environ.get('CONTENT_LENGTH'): 340 assert int(environ['CONTENT_LENGTH']) >= 0, ( 341 "Invalid CONTENT_LENGTH: %r" % environ['CONTENT_LENGTH']) 342 343 if not environ.get('SCRIPT_NAME'): 344 assert 'PATH_INFO' in environ, ( 345 "One of SCRIPT_NAME or PATH_INFO are required (PATH_INFO " 346 "should at least be '/' if SCRIPT_NAME is empty)") 347 assert environ.get('SCRIPT_NAME') != '/', ( 348 "SCRIPT_NAME cannot be '/'; it should instead be '', and " 349 "PATH_INFO should be '/'") 350 351 def check_input(wsgi_input): 352 for attr in ['read', 'readline', 'readlines', '__iter__']: 353 assert hasattr(wsgi_input, attr), ( 354 "wsgi.input (%r) doesn't have the attribute %s" 355 % (wsgi_input, attr)) 356 357 def check_errors(wsgi_errors): 358 for attr in ['flush', 'write', 'writelines']: 359 assert hasattr(wsgi_errors, attr), ( 360 "wsgi.errors (%r) doesn't have the attribute %s" 361 % (wsgi_errors, attr)) 362 363 def check_status(status): 364 assert isinstance(status, str), ( 365 "Status must be a string (not %r)" % status) 366 # Implicitly check that we can turn it into an integer: 367 status_code = status.split(None, 1)[0] 368 assert len(status_code) == 3, ( 369 "Status codes must be three characters: %r" % status_code) 370 status_int = int(status_code) 371 assert status_int >= 100, "Status code is invalid: %r" % status_int 372 if len(status) < 4 or status[3] != ' ': 373 warnings.warn( 374 "The status string (%r) should be a three-digit integer " 375 "followed by a single space and a status explanation" 376 % status, WSGIWarning) 377 378 def check_headers(headers): 379 assert isinstance(headers,list), ( 380 "Headers (%r) must be of type list: %r" 381 % (headers, type(headers))) 382 header_names = {} 383 for item in headers: 384 assert isinstance(item, tuple), ( 385 "Individual headers (%r) must be of type tuple: %r" 386 % (item, type(item))) 387 assert len(item) == 2 388 name, value = item 389 assert name.lower() != 'status', ( 390 "The Status header cannot be used; it conflicts with CGI " 391 "script, and HTTP status is not given through headers " 392 "(value: %r)." % value) 393 header_names[name.lower()] = None 394 assert '\n' not in name and ':' not in name, ( 395 "Header names may not contain ':' or '\\n': %r" % name) 396 assert header_re.search(name), "Bad header name: %r" % name 397 assert not name.endswith('-') and not name.endswith('_'), ( 398 "Names may not end in '-' or '_': %r" % name) 399 assert not bad_header_value_re.search(value), ( 400 "Bad header value: %r (bad char: %r)" 401 % (value, bad_header_value_re.search(value).group(0))) 402 403 def check_content_type(status, headers): 404 code = int(status.split(None, 1)[0]) 405 # @@: need one more person to verify this interpretation of RFC 2616 406 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html 407 NO_MESSAGE_BODY = (204, 304) 408 NO_MESSAGE_TYPE = (204, 304) 409 for name, value in headers: 410 if name.lower() == 'content-type': 411 if code not in NO_MESSAGE_TYPE: 412 return 413 assert 0, (("Content-Type header found in a %s response, " 414 "which must not return content.") % code) 415 if code not in NO_MESSAGE_BODY: 416 assert 0, "No Content-Type header found in headers (%s)" % headers 417 418 def check_exc_info(exc_info): 419 assert exc_info is None or type(exc_info) is type(()), ( 420 "exc_info (%r) is not a tuple: %r" % (exc_info, type(exc_info))) 421 # More exc_info checks? 422 423 def check_iterator(iterator): 424 # Technically a string is legal, which is why it's a really bad 425 # idea, because it may cause the response to be returned 426 # character-by-character 427 assert not isinstance(iterator, str), ( 428 "You should not return a string as your application iterator, " 429 "instead return a single-item list containing that string.") 430 431 def make_middleware(application, global_conf): 432 # @@: global_conf should be taken out of the middleware function, 433 # and isolated here 434 return middleware(application) 435 436 make_middleware.__doc__ = __doc__ 437 438 __all__ = ['middleware', 'make_middleware'] 439