Home | History | Annotate | Download | only in wsgiref
      1 """Base classes for server/gateway implementations"""
      2 
      3 from .util import FileWrapper, guess_scheme, is_hop_by_hop
      4 from .headers import Headers
      5 
      6 import sys, os, time
      7 
      8 __all__ = [
      9     'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
     10     'IISCGIHandler', 'read_environ'
     11 ]
     12 
     13 # Weekday and month names for HTTP date/time formatting; always English!
     14 _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
     15 _monthname = [None, # Dummy so we can use 1-based month numbers
     16               "Jan", "Feb", "Mar", "Apr", "May", "Jun",
     17               "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
     18 
     19 def format_date_time(timestamp):
     20     year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
     21     return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
     22         _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
     23     )
     24 
     25 _is_request = {
     26     'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
     27     'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
     28 }.__contains__
     29 
     30 def _needs_transcode(k):
     31     return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
     32         or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
     33 
     34 def read_environ():
     35     """Read environment, fixing HTTP variables"""
     36     enc = sys.getfilesystemencoding()
     37     esc = 'surrogateescape'
     38     try:
     39         ''.encode('utf-8', esc)
     40     except LookupError:
     41         esc = 'replace'
     42     environ = {}
     43 
     44     # Take the basic environment from native-unicode os.environ. Attempt to
     45     # fix up the variables that come from the HTTP request to compensate for
     46     # the bytes->unicode decoding step that will already have taken place.
     47     for k, v in os.environ.items():
     48         if _needs_transcode(k):
     49 
     50             # On win32, the os.environ is natively Unicode. Different servers
     51             # decode the request bytes using different encodings.
     52             if sys.platform == 'win32':
     53                 software = os.environ.get('SERVER_SOFTWARE', '').lower()
     54 
     55                 # On IIS, the HTTP request will be decoded as UTF-8 as long
     56                 # as the input is a valid UTF-8 sequence. Otherwise it is
     57                 # decoded using the system code page (mbcs), with no way to
     58                 # detect this has happened. Because UTF-8 is the more likely
     59                 # encoding, and mbcs is inherently unreliable (an mbcs string
     60                 # that happens to be valid UTF-8 will not be decoded as mbcs)
     61                 # always recreate the original bytes as UTF-8.
     62                 if software.startswith('microsoft-iis/'):
     63                     v = v.encode('utf-8').decode('iso-8859-1')
     64 
     65                 # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
     66                 # to the Unicode environ. No modification needed.
     67                 elif software.startswith('apache/'):
     68                     pass
     69 
     70                 # Python 3's http.server.CGIHTTPRequestHandler decodes
     71                 # using the urllib.unquote default of UTF-8, amongst other
     72                 # issues.
     73                 elif (
     74                     software.startswith('simplehttp/')
     75                     and 'python/3' in software
     76                 ):
     77                     v = v.encode('utf-8').decode('iso-8859-1')
     78 
     79                 # For other servers, guess that they have written bytes to
     80                 # the environ using stdio byte-oriented interfaces, ending up
     81                 # with the system code page.
     82                 else:
     83                     v = v.encode(enc, 'replace').decode('iso-8859-1')
     84 
     85             # Recover bytes from unicode environ, using surrogate escapes
     86             # where available (Python 3.1+).
     87             else:
     88                 v = v.encode(enc, esc).decode('iso-8859-1')
     89 
     90         environ[k] = v
     91     return environ
     92 
     93 
     94 class BaseHandler:
     95     """Manage the invocation of a WSGI application"""
     96 
     97     # Configuration parameters; can override per-subclass or per-instance
     98     wsgi_version = (1,0)
     99     wsgi_multithread = True
    100     wsgi_multiprocess = True
    101     wsgi_run_once = False
    102 
    103     origin_server = True    # We are transmitting direct to client
    104     http_version  = "1.0"   # Version that should be used for response
    105     server_software = None  # String name of server software, if any
    106 
    107     # os_environ is used to supply configuration from the OS environment:
    108     # by default it's a copy of 'os.environ' as of import time, but you can
    109     # override this in e.g. your __init__ method.
    110     os_environ= read_environ()
    111 
    112     # Collaborator classes
    113     wsgi_file_wrapper = FileWrapper     # set to None to disable
    114     headers_class = Headers             # must be a Headers-like class
    115 
    116     # Error handling (also per-subclass or per-instance)
    117     traceback_limit = None  # Print entire traceback to self.get_stderr()
    118     error_status = "500 Internal Server Error"
    119     error_headers = [('Content-Type','text/plain')]
    120     error_body = b"A server error occurred.  Please contact the administrator."
    121 
    122     # State variables (don't mess with these)
    123     status = result = None
    124     headers_sent = False
    125     headers = None
    126     bytes_sent = 0
    127 
    128     def run(self, application):
    129         """Invoke the application"""
    130         # Note to self: don't move the close()!  Asynchronous servers shouldn't
    131         # call close() from finish_response(), so if you close() anywhere but
    132         # the double-error branch here, you'll break asynchronous servers by
    133         # prematurely closing.  Async servers must return from 'run()' without
    134         # closing if there might still be output to iterate over.
    135         try:
    136             self.setup_environ()
    137             self.result = application(self.environ, self.start_response)
    138             self.finish_response()
    139         except:
    140             try:
    141                 self.handle_error()
    142             except:
    143                 # If we get an error handling an error, just give up already!
    144                 self.close()
    145                 raise   # ...and let the actual server figure it out.
    146 
    147 
    148     def setup_environ(self):
    149         """Set up the environment for one request"""
    150 
    151         env = self.environ = self.os_environ.copy()
    152         self.add_cgi_vars()
    153 
    154         env['wsgi.input']        = self.get_stdin()
    155         env['wsgi.errors']       = self.get_stderr()
    156         env['wsgi.version']      = self.wsgi_version
    157         env['wsgi.run_once']     = self.wsgi_run_once
    158         env['wsgi.url_scheme']   = self.get_scheme()
    159         env['wsgi.multithread']  = self.wsgi_multithread
    160         env['wsgi.multiprocess'] = self.wsgi_multiprocess
    161 
    162         if self.wsgi_file_wrapper is not None:
    163             env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
    164 
    165         if self.origin_server and self.server_software:
    166             env.setdefault('SERVER_SOFTWARE',self.server_software)
    167 
    168 
    169     def finish_response(self):
    170         """Send any iterable data, then close self and the iterable
    171 
    172         Subclasses intended for use in asynchronous servers will
    173         want to redefine this method, such that it sets up callbacks
    174         in the event loop to iterate over the data, and to call
    175         'self.close()' once the response is finished.
    176         """
    177         try:
    178             if not self.result_is_file() or not self.sendfile():
    179                 for data in self.result:
    180                     self.write(data)
    181                 self.finish_content()
    182         finally:
    183             self.close()
    184 
    185 
    186     def get_scheme(self):
    187         """Return the URL scheme being used"""
    188         return guess_scheme(self.environ)
    189 
    190 
    191     def set_content_length(self):
    192         """Compute Content-Length or switch to chunked encoding if possible"""
    193         try:
    194             blocks = len(self.result)
    195         except (TypeError,AttributeError,NotImplementedError):
    196             pass
    197         else:
    198             if blocks==1:
    199                 self.headers['Content-Length'] = str(self.bytes_sent)
    200                 return
    201         # XXX Try for chunked encoding if origin server and client is 1.1
    202 
    203 
    204     def cleanup_headers(self):
    205         """Make any necessary header changes or defaults
    206 
    207         Subclasses can extend this to add other defaults.
    208         """
    209         if 'Content-Length' not in self.headers:
    210             self.set_content_length()
    211 
    212     def start_response(self, status, headers,exc_info=None):
    213         """'start_response()' callable as specified by PEP 3333"""
    214 
    215         if exc_info:
    216             try:
    217                 if self.headers_sent:
    218                     # Re-raise original exception if headers sent
    219                     raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
    220             finally:
    221                 exc_info = None        # avoid dangling circular ref
    222         elif self.headers is not None:
    223             raise AssertionError("Headers already set!")
    224 
    225         self.status = status
    226         self.headers = self.headers_class(headers)
    227         status = self._convert_string_type(status, "Status")
    228         assert len(status)>=4,"Status must be at least 4 characters"
    229         assert status[:3].isdigit(), "Status message must begin w/3-digit code"
    230         assert status[3]==" ", "Status message must have a space after code"
    231 
    232         if __debug__:
    233             for name, val in headers:
    234                 name = self._convert_string_type(name, "Header name")
    235                 val = self._convert_string_type(val, "Header value")
    236                 assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed"
    237 
    238         return self.write
    239 
    240     def _convert_string_type(self, value, title):
    241         """Convert/check value type."""
    242         if type(value) is str:
    243             return value
    244         raise AssertionError(
    245             "{0} must be of type str (got {1})".format(title, repr(value))
    246         )
    247 
    248     def send_preamble(self):
    249         """Transmit version/status/date/server, via self._write()"""
    250         if self.origin_server:
    251             if self.client_is_modern():
    252                 self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
    253                 if 'Date' not in self.headers:
    254                     self._write(
    255                         ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
    256                     )
    257                 if self.server_software and 'Server' not in self.headers:
    258                     self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
    259         else:
    260             self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
    261 
    262     def write(self, data):
    263         """'write()' callable as specified by PEP 3333"""
    264 
    265         assert type(data) is bytes, \
    266             "write() argument must be a bytes instance"
    267 
    268         if not self.status:
    269             raise AssertionError("write() before start_response()")
    270 
    271         elif not self.headers_sent:
    272             # Before the first output, send the stored headers
    273             self.bytes_sent = len(data)    # make sure we know content-length
    274             self.send_headers()
    275         else:
    276             self.bytes_sent += len(data)
    277 
    278         # XXX check Content-Length and truncate if too many bytes written?
    279         self._write(data)
    280         self._flush()
    281 
    282 
    283     def sendfile(self):
    284         """Platform-specific file transmission
    285 
    286         Override this method in subclasses to support platform-specific
    287         file transmission.  It is only called if the application's
    288         return iterable ('self.result') is an instance of
    289         'self.wsgi_file_wrapper'.
    290 
    291         This method should return a true value if it was able to actually
    292         transmit the wrapped file-like object using a platform-specific
    293         approach.  It should return a false value if normal iteration
    294         should be used instead.  An exception can be raised to indicate
    295         that transmission was attempted, but failed.
    296 
    297         NOTE: this method should call 'self.send_headers()' if
    298         'self.headers_sent' is false and it is going to attempt direct
    299         transmission of the file.
    300         """
    301         return False   # No platform-specific transmission by default
    302 
    303 
    304     def finish_content(self):
    305         """Ensure headers and content have both been sent"""
    306         if not self.headers_sent:
    307             # Only zero Content-Length if not set by the application (so
    308             # that HEAD requests can be satisfied properly, see #3839)
    309             self.headers.setdefault('Content-Length', "0")
    310             self.send_headers()
    311         else:
    312             pass # XXX check if content-length was too short?
    313 
    314     def close(self):
    315         """Close the iterable (if needed) and reset all instance vars
    316 
    317         Subclasses may want to also drop the client connection.
    318         """
    319         try:
    320             if hasattr(self.result,'close'):
    321                 self.result.close()
    322         finally:
    323             self.result = self.headers = self.status = self.environ = None
    324             self.bytes_sent = 0; self.headers_sent = False
    325 
    326 
    327     def send_headers(self):
    328         """Transmit headers to the client, via self._write()"""
    329         self.cleanup_headers()
    330         self.headers_sent = True
    331         if not self.origin_server or self.client_is_modern():
    332             self.send_preamble()
    333             self._write(bytes(self.headers))
    334 
    335 
    336     def result_is_file(self):
    337         """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
    338         wrapper = self.wsgi_file_wrapper
    339         return wrapper is not None and isinstance(self.result,wrapper)
    340 
    341 
    342     def client_is_modern(self):
    343         """True if client can accept status and headers"""
    344         return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
    345 
    346 
    347     def log_exception(self,exc_info):
    348         """Log the 'exc_info' tuple in the server log
    349 
    350         Subclasses may override to retarget the output or change its format.
    351         """
    352         try:
    353             from traceback import print_exception
    354             stderr = self.get_stderr()
    355             print_exception(
    356                 exc_info[0], exc_info[1], exc_info[2],
    357                 self.traceback_limit, stderr
    358             )
    359             stderr.flush()
    360         finally:
    361             exc_info = None
    362 
    363     def handle_error(self):
    364         """Log current error, and send error output to client if possible"""
    365         self.log_exception(sys.exc_info())
    366         if not self.headers_sent:
    367             self.result = self.error_output(self.environ, self.start_response)
    368             self.finish_response()
    369         # XXX else: attempt advanced recovery techniques for HTML or text?
    370 
    371     def error_output(self, environ, start_response):
    372         """WSGI mini-app to create error output
    373 
    374         By default, this just uses the 'error_status', 'error_headers',
    375         and 'error_body' attributes to generate an output page.  It can
    376         be overridden in a subclass to dynamically generate diagnostics,
    377         choose an appropriate message for the user's preferred language, etc.
    378 
    379         Note, however, that it's not recommended from a security perspective to
    380         spit out diagnostics to any old user; ideally, you should have to do
    381         something special to enable diagnostic output, which is why we don't
    382         include any here!
    383         """
    384         start_response(self.error_status,self.error_headers[:],sys.exc_info())
    385         return [self.error_body]
    386 
    387 
    388     # Pure abstract methods; *must* be overridden in subclasses
    389 
    390     def _write(self,data):
    391         """Override in subclass to buffer data for send to client
    392 
    393         It's okay if this method actually transmits the data; BaseHandler
    394         just separates write and flush operations for greater efficiency
    395         when the underlying system actually has such a distinction.
    396         """
    397         raise NotImplementedError
    398 
    399     def _flush(self):
    400         """Override in subclass to force sending of recent '_write()' calls
    401 
    402         It's okay if this method is a no-op (i.e., if '_write()' actually
    403         sends the data.
    404         """
    405         raise NotImplementedError
    406 
    407     def get_stdin(self):
    408         """Override in subclass to return suitable 'wsgi.input'"""
    409         raise NotImplementedError
    410 
    411     def get_stderr(self):
    412         """Override in subclass to return suitable 'wsgi.errors'"""
    413         raise NotImplementedError
    414 
    415     def add_cgi_vars(self):
    416         """Override in subclass to insert CGI variables in 'self.environ'"""
    417         raise NotImplementedError
    418 
    419 
    420 class SimpleHandler(BaseHandler):
    421     """Handler that's just initialized with streams, environment, etc.
    422 
    423     This handler subclass is intended for synchronous HTTP/1.0 origin servers,
    424     and handles sending the entire response output, given the correct inputs.
    425 
    426     Usage::
    427 
    428         handler = SimpleHandler(
    429             inp,out,err,env, multithread=False, multiprocess=True
    430         )
    431         handler.run(app)"""
    432 
    433     def __init__(self,stdin,stdout,stderr,environ,
    434         multithread=True, multiprocess=False
    435     ):
    436         self.stdin = stdin
    437         self.stdout = stdout
    438         self.stderr = stderr
    439         self.base_env = environ
    440         self.wsgi_multithread = multithread
    441         self.wsgi_multiprocess = multiprocess
    442 
    443     def get_stdin(self):
    444         return self.stdin
    445 
    446     def get_stderr(self):
    447         return self.stderr
    448 
    449     def add_cgi_vars(self):
    450         self.environ.update(self.base_env)
    451 
    452     def _write(self,data):
    453         result = self.stdout.write(data)
    454         if result is None or result == len(data):
    455             return
    456         from warnings import warn
    457         warn("SimpleHandler.stdout.write() should not do partial writes",
    458             DeprecationWarning)
    459         while True:
    460             data = data[result:]
    461             if not data:
    462                 break
    463             result = self.stdout.write(data)
    464 
    465     def _flush(self):
    466         self.stdout.flush()
    467         self._flush = self.stdout.flush
    468 
    469 
    470 class BaseCGIHandler(SimpleHandler):
    471 
    472     """CGI-like systems using input/output/error streams and environ mapping
    473 
    474     Usage::
    475 
    476         handler = BaseCGIHandler(inp,out,err,env)
    477         handler.run(app)
    478 
    479     This handler class is useful for gateway protocols like ReadyExec and
    480     FastCGI, that have usable input/output/error streams and an environment
    481     mapping.  It's also the base class for CGIHandler, which just uses
    482     sys.stdin, os.environ, and so on.
    483 
    484     The constructor also takes keyword arguments 'multithread' and
    485     'multiprocess' (defaulting to 'True' and 'False' respectively) to control
    486     the configuration sent to the application.  It sets 'origin_server' to
    487     False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
    488     False.
    489     """
    490 
    491     origin_server = False
    492 
    493 
    494 class CGIHandler(BaseCGIHandler):
    495 
    496     """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
    497 
    498     Usage::
    499 
    500         CGIHandler().run(app)
    501 
    502     The difference between this class and BaseCGIHandler is that it always
    503     uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
    504     'wsgi.multiprocess' of 'True'.  It does not take any initialization
    505     parameters, but always uses 'sys.stdin', 'os.environ', and friends.
    506 
    507     If you need to override any of these parameters, use BaseCGIHandler
    508     instead.
    509     """
    510 
    511     wsgi_run_once = True
    512     # Do not allow os.environ to leak between requests in Google App Engine
    513     # and other multi-run CGI use cases.  This is not easily testable.
    514     # See http://bugs.python.org/issue7250
    515     os_environ = {}
    516 
    517     def __init__(self):
    518         BaseCGIHandler.__init__(
    519             self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
    520             read_environ(), multithread=False, multiprocess=True
    521         )
    522 
    523 
    524 class IISCGIHandler(BaseCGIHandler):
    525     """CGI-based invocation with workaround for IIS path bug
    526 
    527     This handler should be used in preference to CGIHandler when deploying on
    528     Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
    529     or metabase allowPathInfoForScriptMappings (IIS<7).
    530     """
    531     wsgi_run_once = True
    532     os_environ = {}
    533 
    534     # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
    535     # the front, causing problems for WSGI applications that wish to implement
    536     # routing. This handler strips any such duplicated path.
    537 
    538     # IIS can be configured to pass the correct PATH_INFO, but this causes
    539     # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
    540     # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
    541     # setting can only be made on a vhost level, affecting all other script
    542     # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
    543     # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
    544     # rarely uses it because there is still no UI for it.)
    545 
    546     # There is no way for CGI code to tell whether the option was set, so a
    547     # separate handler class is provided.
    548     def __init__(self):
    549         environ= read_environ()
    550         path = environ.get('PATH_INFO', '')
    551         script = environ.get('SCRIPT_NAME', '')
    552         if (path+'/').startswith(script+'/'):
    553             environ['PATH_INFO'] = path[len(script):]
    554         BaseCGIHandler.__init__(
    555             self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
    556             environ, multithread=False, multiprocess=True
    557         )
    558