Home | History | Annotate | Download | only in paste
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 # (c) 2005 Ian Bicking, Clark C. Evans and contributors
      4 # This module is part of the Python Paste Project and is released under
      5 # the MIT License: http://www.opensource.org/licenses/mit-license.php
      6 """
      7 This module handles sending static content such as in-memory data or
      8 files.  At this time it has cache helpers and understands the
      9 if-modified-since request header.
     10 """
     11 
     12 import os, time, mimetypes, zipfile, tarfile
     13 from paste.httpexceptions import *
     14 from paste.httpheaders import *
     15 
     16 CACHE_SIZE = 4096
     17 BLOCK_SIZE = 4096 * 16
     18 
     19 __all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore']
     20 
     21 class DataApp(object):
     22     """
     23     Returns an application that will send content in a single chunk,
     24     this application has support for setting cache-control and for
     25     responding to conditional (or HEAD) requests.
     26 
     27     Constructor Arguments:
     28 
     29         ``content``     the content being sent to the client
     30 
     31         ``headers``     the headers to send /w the response
     32 
     33         The remaining ``kwargs`` correspond to headers, where the
     34         underscore is replaced with a dash.  These values are only
     35         added to the headers if they are not already provided; thus,
     36         they can be used for default values.  Examples include, but
     37         are not limited to:
     38 
     39             ``content_type``
     40             ``content_encoding``
     41             ``content_location``
     42 
     43     ``cache_control()``
     44 
     45         This method provides validated construction of the ``Cache-Control``
     46         header as well as providing for automated filling out of the
     47         ``EXPIRES`` header for HTTP/1.0 clients.
     48 
     49     ``set_content()``
     50 
     51         This method provides a mechanism to set the content after the
     52         application has been constructed.  This method does things
     53         like changing ``Last-Modified`` and ``Content-Length`` headers.
     54 
     55     """
     56 
     57     allowed_methods = ('GET', 'HEAD')
     58 
     59     def __init__(self, content, headers=None, allowed_methods=None,
     60                  **kwargs):
     61         assert isinstance(headers, (type(None), list))
     62         self.expires = None
     63         self.content = None
     64         self.content_length = None
     65         self.last_modified = 0
     66         if allowed_methods is not None:
     67             self.allowed_methods = allowed_methods
     68         self.headers = headers or []
     69         for (k, v) in kwargs.items():
     70             header = get_header(k)
     71             header.update(self.headers, v)
     72         ACCEPT_RANGES.update(self.headers, bytes=True)
     73         if not CONTENT_TYPE(self.headers):
     74             CONTENT_TYPE.update(self.headers)
     75         if content is not None:
     76             self.set_content(content)
     77 
     78     def cache_control(self, **kwargs):
     79         self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
     80         return self
     81 
     82     def set_content(self, content, last_modified=None):
     83         assert content is not None
     84         if last_modified is None:
     85             self.last_modified = time.time()
     86         else:
     87             self.last_modified = last_modified
     88         self.content = content
     89         self.content_length = len(content)
     90         LAST_MODIFIED.update(self.headers, time=self.last_modified)
     91         return self
     92 
     93     def content_disposition(self, **kwargs):
     94         CONTENT_DISPOSITION.apply(self.headers, **kwargs)
     95         return self
     96 
     97     def __call__(self, environ, start_response):
     98         method = environ['REQUEST_METHOD'].upper()
     99         if method not in self.allowed_methods:
    100             exc = HTTPMethodNotAllowed(
    101                 'You cannot %s a file' % method,
    102                 headers=[('Allow', ','.join(self.allowed_methods))])
    103             return exc(environ, start_response)
    104         return self.get(environ, start_response)
    105 
    106     def calculate_etag(self):
    107         return '"%s-%s"' % (self.last_modified, self.content_length)
    108 
    109     def get(self, environ, start_response):
    110         headers = self.headers[:]
    111         current_etag = self.calculate_etag()
    112         ETAG.update(headers, current_etag)
    113         if self.expires is not None:
    114             EXPIRES.update(headers, delta=self.expires)
    115 
    116         try:
    117             client_etags = IF_NONE_MATCH.parse(environ)
    118             if client_etags:
    119                 for etag in client_etags:
    120                     if etag == current_etag or etag == '*':
    121                         # horribly inefficient, n^2 performance, yuck!
    122                         for head in list_headers(entity=True):
    123                             head.delete(headers)
    124                         start_response('304 Not Modified', headers)
    125                         return [b'']
    126         except HTTPBadRequest as exce:
    127             return exce.wsgi_application(environ, start_response)
    128 
    129         # If we get If-None-Match and If-Modified-Since, and
    130         # If-None-Match doesn't match, then we should not try to
    131         # figure out If-Modified-Since (which has 1-second granularity
    132         # and just isn't as accurate)
    133         if not client_etags:
    134             try:
    135                 client_clock = IF_MODIFIED_SINCE.parse(environ)
    136                 if (client_clock is not None
    137                     and client_clock >= int(self.last_modified)):
    138                     # horribly inefficient, n^2 performance, yuck!
    139                     for head in list_headers(entity=True):
    140                         head.delete(headers)
    141                     start_response('304 Not Modified', headers)
    142                     return [b''] # empty body
    143             except HTTPBadRequest as exce:
    144                 return exce.wsgi_application(environ, start_response)
    145 
    146         (lower, upper) = (0, self.content_length - 1)
    147         range = RANGE.parse(environ)
    148         if range and 'bytes' == range[0] and 1 == len(range[1]):
    149             (lower, upper) = range[1][0]
    150             upper = upper or (self.content_length - 1)
    151             if upper >= self.content_length or lower > upper:
    152                 return HTTPRequestRangeNotSatisfiable((
    153                   "Range request was made beyond the end of the content,\r\n"
    154                   "which is %s long.\r\n  Range: %s\r\n") % (
    155                      self.content_length, RANGE(environ))
    156                 ).wsgi_application(environ, start_response)
    157 
    158         content_length = upper - lower + 1
    159         CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper,
    160                             total_length = self.content_length)
    161         CONTENT_LENGTH.update(headers, content_length)
    162         if range or content_length != self.content_length:
    163             start_response('206 Partial Content', headers)
    164         else:
    165             start_response('200 OK', headers)
    166         if self.content is not None:
    167             return [self.content[lower:upper+1]]
    168         return (lower, content_length)
    169 
    170 class FileApp(DataApp):
    171     """
    172     Returns an application that will send the file at the given
    173     filename.  Adds a mime type based on ``mimetypes.guess_type()``.
    174     See DataApp for the arguments beyond ``filename``.
    175     """
    176 
    177     def __init__(self, filename, headers=None, **kwargs):
    178         self.filename = filename
    179         content_type, content_encoding = self.guess_type()
    180         if content_type and 'content_type' not in kwargs:
    181             kwargs['content_type'] = content_type
    182         if content_encoding and 'content_encoding' not in kwargs:
    183             kwargs['content_encoding'] = content_encoding
    184         DataApp.__init__(self, None, headers, **kwargs)
    185 
    186     def guess_type(self):
    187         return mimetypes.guess_type(self.filename)
    188 
    189     def update(self, force=False):
    190         stat = os.stat(self.filename)
    191         if not force and stat.st_mtime == self.last_modified:
    192             return
    193         self.last_modified = stat.st_mtime
    194         if stat.st_size < CACHE_SIZE:
    195             fh = open(self.filename,"rb")
    196             self.set_content(fh.read(), stat.st_mtime)
    197             fh.close()
    198         else:
    199             self.content = None
    200             self.content_length = stat.st_size
    201             # This is updated automatically if self.set_content() is
    202             # called
    203             LAST_MODIFIED.update(self.headers, time=self.last_modified)
    204 
    205     def get(self, environ, start_response):
    206         is_head = environ['REQUEST_METHOD'].upper() == 'HEAD'
    207         if 'max-age=0' in CACHE_CONTROL(environ).lower():
    208             self.update(force=True) # RFC 2616 13.2.6
    209         else:
    210             self.update()
    211         if not self.content:
    212             if not os.path.exists(self.filename):
    213                 exc = HTTPNotFound(
    214                     'The resource does not exist',
    215                     comment="No file at %r" % self.filename)
    216                 return exc(environ, start_response)
    217             try:
    218                 file = open(self.filename, 'rb')
    219             except (IOError, OSError) as e:
    220                 exc = HTTPForbidden(
    221                     'You are not permitted to view this file (%s)' % e)
    222                 return exc.wsgi_application(
    223                     environ, start_response)
    224         retval = DataApp.get(self, environ, start_response)
    225         if isinstance(retval, list):
    226             # cached content, exception, or not-modified
    227             if is_head:
    228                 return [b'']
    229             return retval
    230         (lower, content_length) = retval
    231         if is_head:
    232             return [b'']
    233         file.seek(lower)
    234         file_wrapper = environ.get('wsgi.file_wrapper', None)
    235         if file_wrapper:
    236             return file_wrapper(file, BLOCK_SIZE)
    237         else:
    238             return _FileIter(file, size=content_length)
    239 
    240 class _FileIter(object):
    241 
    242     def __init__(self, file, block_size=None, size=None):
    243         self.file = file
    244         self.size = size
    245         self.block_size = block_size or BLOCK_SIZE
    246 
    247     def __iter__(self):
    248         return self
    249 
    250     def next(self):
    251         chunk_size = self.block_size
    252         if self.size is not None:
    253             if chunk_size > self.size:
    254                 chunk_size = self.size
    255             self.size -= chunk_size
    256         data = self.file.read(chunk_size)
    257         if not data:
    258             raise StopIteration
    259         return data
    260     __next__ = next
    261 
    262     def close(self):
    263         self.file.close()
    264 
    265 
    266 class DirectoryApp(object):
    267     """
    268     Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO.
    269     FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory.
    270     To customize FileApp creation override ``DirectoryApp.make_fileapp``
    271     """
    272 
    273     def __init__(self, path):
    274         self.path = os.path.abspath(path)
    275         if not self.path.endswith(os.path.sep):
    276             self.path += os.path.sep
    277         assert os.path.isdir(self.path)
    278         self.cached_apps = {}
    279 
    280     make_fileapp = FileApp
    281 
    282     def __call__(self, environ, start_response):
    283         path_info = environ['PATH_INFO']
    284         app = self.cached_apps.get(path_info)
    285         if app is None:
    286             path = os.path.join(self.path, path_info.lstrip('/'))
    287             if not os.path.normpath(path).startswith(self.path):
    288                 app = HTTPForbidden()
    289             elif os.path.isfile(path):
    290                 app = self.make_fileapp(path)
    291                 self.cached_apps[path_info] = app
    292             else:
    293                 app = HTTPNotFound(comment=path)
    294         return app(environ, start_response)
    295 
    296 
    297 class ArchiveStore(object):
    298     """
    299     Returns an application that serves up a DataApp for items requested
    300     in a given zip or tar archive.
    301 
    302     Constructor Arguments:
    303 
    304         ``filepath``    the path to the archive being served
    305 
    306     ``cache_control()``
    307 
    308         This method provides validated construction of the ``Cache-Control``
    309         header as well as providing for automated filling out of the
    310         ``EXPIRES`` header for HTTP/1.0 clients.
    311     """
    312 
    313     def __init__(self, filepath):
    314         if zipfile.is_zipfile(filepath):
    315             self.archive = zipfile.ZipFile(filepath,"r")
    316         elif tarfile.is_tarfile(filepath):
    317             self.archive = tarfile.TarFileCompat(filepath,"r")
    318         else:
    319             raise AssertionError("filepath '%s' is not a zip or tar " % filepath)
    320         self.expires = None
    321         self.last_modified = time.time()
    322         self.cache = {}
    323 
    324     def cache_control(self, **kwargs):
    325         self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
    326         return self
    327 
    328     def __call__(self, environ, start_response):
    329         path = environ.get("PATH_INFO","")
    330         if path.startswith("/"):
    331             path = path[1:]
    332         application = self.cache.get(path)
    333         if application:
    334             return application(environ, start_response)
    335         try:
    336             info = self.archive.getinfo(path)
    337         except KeyError:
    338             exc = HTTPNotFound("The file requested, '%s', was not found." % path)
    339             return exc.wsgi_application(environ, start_response)
    340         if info.filename.endswith("/"):
    341             exc = HTTPNotFound("Path requested, '%s', is not a file." % path)
    342             return exc.wsgi_application(environ, start_response)
    343         content_type, content_encoding = mimetypes.guess_type(info.filename)
    344         # 'None' is not a valid content-encoding, so don't set the header if
    345         # mimetypes.guess_type returns None
    346         if content_encoding is not None:
    347             app = DataApp(None, content_type = content_type,
    348                                 content_encoding = content_encoding)
    349         else:
    350             app = DataApp(None, content_type = content_type)
    351         app.set_content(self.archive.read(path),
    352                 time.mktime(info.date_time + (0,0,0)))
    353         self.cache[path] = app
    354         app.expires = self.expires
    355         return app(environ, start_response)
    356 
    357