1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) 2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 3 # (c) 2005 Ian Bicking, Clark C. Evans and contributors 4 # This module is part of the Python Paste Project and is released under 5 # the MIT License: http://www.opensource.org/licenses/mit-license.php 6 """ 7 This module handles sending static content such as in-memory data or 8 files. At this time it has cache helpers and understands the 9 if-modified-since request header. 10 """ 11 12 import os, time, mimetypes, zipfile, tarfile 13 from paste.httpexceptions import * 14 from paste.httpheaders import * 15 16 CACHE_SIZE = 4096 17 BLOCK_SIZE = 4096 * 16 18 19 __all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore'] 20 21 class DataApp(object): 22 """ 23 Returns an application that will send content in a single chunk, 24 this application has support for setting cache-control and for 25 responding to conditional (or HEAD) requests. 26 27 Constructor Arguments: 28 29 ``content`` the content being sent to the client 30 31 ``headers`` the headers to send /w the response 32 33 The remaining ``kwargs`` correspond to headers, where the 34 underscore is replaced with a dash. These values are only 35 added to the headers if they are not already provided; thus, 36 they can be used for default values. Examples include, but 37 are not limited to: 38 39 ``content_type`` 40 ``content_encoding`` 41 ``content_location`` 42 43 ``cache_control()`` 44 45 This method provides validated construction of the ``Cache-Control`` 46 header as well as providing for automated filling out of the 47 ``EXPIRES`` header for HTTP/1.0 clients. 48 49 ``set_content()`` 50 51 This method provides a mechanism to set the content after the 52 application has been constructed. This method does things 53 like changing ``Last-Modified`` and ``Content-Length`` headers. 54 55 """ 56 57 allowed_methods = ('GET', 'HEAD') 58 59 def __init__(self, content, headers=None, allowed_methods=None, 60 **kwargs): 61 assert isinstance(headers, (type(None), list)) 62 self.expires = None 63 self.content = None 64 self.content_length = None 65 self.last_modified = 0 66 if allowed_methods is not None: 67 self.allowed_methods = allowed_methods 68 self.headers = headers or [] 69 for (k, v) in kwargs.items(): 70 header = get_header(k) 71 header.update(self.headers, v) 72 ACCEPT_RANGES.update(self.headers, bytes=True) 73 if not CONTENT_TYPE(self.headers): 74 CONTENT_TYPE.update(self.headers) 75 if content is not None: 76 self.set_content(content) 77 78 def cache_control(self, **kwargs): 79 self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None 80 return self 81 82 def set_content(self, content, last_modified=None): 83 assert content is not None 84 if last_modified is None: 85 self.last_modified = time.time() 86 else: 87 self.last_modified = last_modified 88 self.content = content 89 self.content_length = len(content) 90 LAST_MODIFIED.update(self.headers, time=self.last_modified) 91 return self 92 93 def content_disposition(self, **kwargs): 94 CONTENT_DISPOSITION.apply(self.headers, **kwargs) 95 return self 96 97 def __call__(self, environ, start_response): 98 method = environ['REQUEST_METHOD'].upper() 99 if method not in self.allowed_methods: 100 exc = HTTPMethodNotAllowed( 101 'You cannot %s a file' % method, 102 headers=[('Allow', ','.join(self.allowed_methods))]) 103 return exc(environ, start_response) 104 return self.get(environ, start_response) 105 106 def calculate_etag(self): 107 return '"%s-%s"' % (self.last_modified, self.content_length) 108 109 def get(self, environ, start_response): 110 headers = self.headers[:] 111 current_etag = self.calculate_etag() 112 ETAG.update(headers, current_etag) 113 if self.expires is not None: 114 EXPIRES.update(headers, delta=self.expires) 115 116 try: 117 client_etags = IF_NONE_MATCH.parse(environ) 118 if client_etags: 119 for etag in client_etags: 120 if etag == current_etag or etag == '*': 121 # horribly inefficient, n^2 performance, yuck! 122 for head in list_headers(entity=True): 123 head.delete(headers) 124 start_response('304 Not Modified', headers) 125 return [b''] 126 except HTTPBadRequest as exce: 127 return exce.wsgi_application(environ, start_response) 128 129 # If we get If-None-Match and If-Modified-Since, and 130 # If-None-Match doesn't match, then we should not try to 131 # figure out If-Modified-Since (which has 1-second granularity 132 # and just isn't as accurate) 133 if not client_etags: 134 try: 135 client_clock = IF_MODIFIED_SINCE.parse(environ) 136 if (client_clock is not None 137 and client_clock >= int(self.last_modified)): 138 # horribly inefficient, n^2 performance, yuck! 139 for head in list_headers(entity=True): 140 head.delete(headers) 141 start_response('304 Not Modified', headers) 142 return [b''] # empty body 143 except HTTPBadRequest as exce: 144 return exce.wsgi_application(environ, start_response) 145 146 (lower, upper) = (0, self.content_length - 1) 147 range = RANGE.parse(environ) 148 if range and 'bytes' == range[0] and 1 == len(range[1]): 149 (lower, upper) = range[1][0] 150 upper = upper or (self.content_length - 1) 151 if upper >= self.content_length or lower > upper: 152 return HTTPRequestRangeNotSatisfiable(( 153 "Range request was made beyond the end of the content,\r\n" 154 "which is %s long.\r\n Range: %s\r\n") % ( 155 self.content_length, RANGE(environ)) 156 ).wsgi_application(environ, start_response) 157 158 content_length = upper - lower + 1 159 CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper, 160 total_length = self.content_length) 161 CONTENT_LENGTH.update(headers, content_length) 162 if range or content_length != self.content_length: 163 start_response('206 Partial Content', headers) 164 else: 165 start_response('200 OK', headers) 166 if self.content is not None: 167 return [self.content[lower:upper+1]] 168 return (lower, content_length) 169 170 class FileApp(DataApp): 171 """ 172 Returns an application that will send the file at the given 173 filename. Adds a mime type based on ``mimetypes.guess_type()``. 174 See DataApp for the arguments beyond ``filename``. 175 """ 176 177 def __init__(self, filename, headers=None, **kwargs): 178 self.filename = filename 179 content_type, content_encoding = self.guess_type() 180 if content_type and 'content_type' not in kwargs: 181 kwargs['content_type'] = content_type 182 if content_encoding and 'content_encoding' not in kwargs: 183 kwargs['content_encoding'] = content_encoding 184 DataApp.__init__(self, None, headers, **kwargs) 185 186 def guess_type(self): 187 return mimetypes.guess_type(self.filename) 188 189 def update(self, force=False): 190 stat = os.stat(self.filename) 191 if not force and stat.st_mtime == self.last_modified: 192 return 193 self.last_modified = stat.st_mtime 194 if stat.st_size < CACHE_SIZE: 195 fh = open(self.filename,"rb") 196 self.set_content(fh.read(), stat.st_mtime) 197 fh.close() 198 else: 199 self.content = None 200 self.content_length = stat.st_size 201 # This is updated automatically if self.set_content() is 202 # called 203 LAST_MODIFIED.update(self.headers, time=self.last_modified) 204 205 def get(self, environ, start_response): 206 is_head = environ['REQUEST_METHOD'].upper() == 'HEAD' 207 if 'max-age=0' in CACHE_CONTROL(environ).lower(): 208 self.update(force=True) # RFC 2616 13.2.6 209 else: 210 self.update() 211 if not self.content: 212 if not os.path.exists(self.filename): 213 exc = HTTPNotFound( 214 'The resource does not exist', 215 comment="No file at %r" % self.filename) 216 return exc(environ, start_response) 217 try: 218 file = open(self.filename, 'rb') 219 except (IOError, OSError) as e: 220 exc = HTTPForbidden( 221 'You are not permitted to view this file (%s)' % e) 222 return exc.wsgi_application( 223 environ, start_response) 224 retval = DataApp.get(self, environ, start_response) 225 if isinstance(retval, list): 226 # cached content, exception, or not-modified 227 if is_head: 228 return [b''] 229 return retval 230 (lower, content_length) = retval 231 if is_head: 232 return [b''] 233 file.seek(lower) 234 file_wrapper = environ.get('wsgi.file_wrapper', None) 235 if file_wrapper: 236 return file_wrapper(file, BLOCK_SIZE) 237 else: 238 return _FileIter(file, size=content_length) 239 240 class _FileIter(object): 241 242 def __init__(self, file, block_size=None, size=None): 243 self.file = file 244 self.size = size 245 self.block_size = block_size or BLOCK_SIZE 246 247 def __iter__(self): 248 return self 249 250 def next(self): 251 chunk_size = self.block_size 252 if self.size is not None: 253 if chunk_size > self.size: 254 chunk_size = self.size 255 self.size -= chunk_size 256 data = self.file.read(chunk_size) 257 if not data: 258 raise StopIteration 259 return data 260 __next__ = next 261 262 def close(self): 263 self.file.close() 264 265 266 class DirectoryApp(object): 267 """ 268 Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO. 269 FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory. 270 To customize FileApp creation override ``DirectoryApp.make_fileapp`` 271 """ 272 273 def __init__(self, path): 274 self.path = os.path.abspath(path) 275 if not self.path.endswith(os.path.sep): 276 self.path += os.path.sep 277 assert os.path.isdir(self.path) 278 self.cached_apps = {} 279 280 make_fileapp = FileApp 281 282 def __call__(self, environ, start_response): 283 path_info = environ['PATH_INFO'] 284 app = self.cached_apps.get(path_info) 285 if app is None: 286 path = os.path.join(self.path, path_info.lstrip('/')) 287 if not os.path.normpath(path).startswith(self.path): 288 app = HTTPForbidden() 289 elif os.path.isfile(path): 290 app = self.make_fileapp(path) 291 self.cached_apps[path_info] = app 292 else: 293 app = HTTPNotFound(comment=path) 294 return app(environ, start_response) 295 296 297 class ArchiveStore(object): 298 """ 299 Returns an application that serves up a DataApp for items requested 300 in a given zip or tar archive. 301 302 Constructor Arguments: 303 304 ``filepath`` the path to the archive being served 305 306 ``cache_control()`` 307 308 This method provides validated construction of the ``Cache-Control`` 309 header as well as providing for automated filling out of the 310 ``EXPIRES`` header for HTTP/1.0 clients. 311 """ 312 313 def __init__(self, filepath): 314 if zipfile.is_zipfile(filepath): 315 self.archive = zipfile.ZipFile(filepath,"r") 316 elif tarfile.is_tarfile(filepath): 317 self.archive = tarfile.TarFileCompat(filepath,"r") 318 else: 319 raise AssertionError("filepath '%s' is not a zip or tar " % filepath) 320 self.expires = None 321 self.last_modified = time.time() 322 self.cache = {} 323 324 def cache_control(self, **kwargs): 325 self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None 326 return self 327 328 def __call__(self, environ, start_response): 329 path = environ.get("PATH_INFO","") 330 if path.startswith("/"): 331 path = path[1:] 332 application = self.cache.get(path) 333 if application: 334 return application(environ, start_response) 335 try: 336 info = self.archive.getinfo(path) 337 except KeyError: 338 exc = HTTPNotFound("The file requested, '%s', was not found." % path) 339 return exc.wsgi_application(environ, start_response) 340 if info.filename.endswith("/"): 341 exc = HTTPNotFound("Path requested, '%s', is not a file." % path) 342 return exc.wsgi_application(environ, start_response) 343 content_type, content_encoding = mimetypes.guess_type(info.filename) 344 # 'None' is not a valid content-encoding, so don't set the header if 345 # mimetypes.guess_type returns None 346 if content_encoding is not None: 347 app = DataApp(None, content_type = content_type, 348 content_encoding = content_encoding) 349 else: 350 app = DataApp(None, content_type = content_type) 351 app.set_content(self.archive.read(path), 352 time.mktime(info.date_time + (0,0,0))) 353 self.cache[path] = app 354 app.expires = self.expires 355 return app(environ, start_response) 356 357