1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # 4 # Copyright 2009 Google Inc. 5 # 6 # Licensed under the Apache License, Version 2.0 (the "License"); 7 # you may not use this file except in compliance with the License. 8 # You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 """A class to serve pages from zip files and use memcache for performance. 20 21 This contains a class and a function to create an anonymous instance of the 22 class to serve HTTP GET requests. Memcache is used to increase response speed 23 and lower processing cycles used in serving. Credit to Guido van Rossum and 24 his implementation of zipserve which served as a reference as I wrote this. 25 26 MemcachedZipHandler: Class that serves request 27 create_handler: method to create instance of MemcachedZipHandler 28 """ 29 30 __author__ = 'jmatt (at] google.com (Justin Mattson)' 31 32 import email.Utils 33 import logging 34 import mimetypes 35 import re 36 import sys 37 import time 38 import yaml 39 import zipfile 40 41 from google.appengine.api import memcache 42 from google.appengine.ext import webapp 43 from google.appengine.ext.webapp import util 44 from time import localtime, strftime 45 46 def create_handler(zip_files, max_age=None, public=None): 47 """Factory method to create a MemcachedZipHandler instance. 48 49 Args: 50 zip_files: A list of file names, or a list of lists of file name, first 51 member of file mappings. See MemcachedZipHandler documentation for 52 more information about using the list of lists format 53 max_age: The maximum client-side cache lifetime 54 public: Whether this should be declared public in the client-side cache 55 Returns: 56 A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App 57 Engine 58 59 Raises: 60 ValueError: if the zip_files argument is not a list 61 """ 62 # verify argument integrity. If the argument is passed in list format, 63 # convert it to list of lists format 64 if zip_files and type(zip_files).__name__ == 'list': 65 num_items = len(zip_files) 66 while num_items > 0: 67 if type(zip_files[num_items - 1]).__name__ != 'list': 68 zip_files[num_items - 1] = [zip_files[num_items-1]] 69 num_items -= 1 70 else: 71 raise ValueError('File name arguments must be a list') 72 73 class HandlerWrapper(MemcachedZipHandler): 74 """Simple wrapper for an instance of MemcachedZipHandler. 75 76 I'm still not sure why this is needed 77 """ 78 def get(self, name): 79 self.zipfilenames = zip_files 80 self.TrueGet(name) 81 if max_age is not None: 82 MAX_AGE = max_age 83 if public is not None: 84 PUBLIC = public 85 86 return HandlerWrapper 87 88 89 class MemcachedZipHandler(webapp.RequestHandler): 90 """Handles get requests for a given URL. 91 92 Serves a GET request from a series of zip files. As files are served they are 93 put into memcache, which is much faster than retreiving them from the zip 94 source file again. It also uses considerably fewer CPU cycles. 95 """ 96 zipfile_cache = {} # class cache of source zip files 97 MAX_AGE = 600 # max client-side cache lifetime 98 PUBLIC = True # public cache setting 99 CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs 100 NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL 101 REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data 102 REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table 103 REDIRECT_SRC = 'src' # Name of the 'source' attribute for a 104 # redirect table entry 105 REDIRECT_DST = 'dst' # Name of the 'destination' attribute for 106 # a redirect table entry 107 REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a 108 # redirect table entry 109 REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301 110 # redirect should be served 111 REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302 112 # Redirect should be served 113 intlString = 'intl/' 114 validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-cn','zh-TW','zh-tw'] 115 116 def TrueGet(self, reqUri): 117 """The top-level entry point to serving requests. 118 119 Called 'True' get because it does the work when called from the wrapper 120 class' get method. Some logic is applied to the request to serve files 121 from an intl/<lang>/... directory or fall through to the default language. 122 123 Args: 124 name: URL requested 125 126 Returns: 127 None 128 """ 129 langName = 'en' 130 resetLangCookie = False 131 urlLangName = None 132 retry = False 133 isValidIntl = False 134 isStripped = False 135 136 # Try to retrieve the user's lang pref from the cookie. If there is no 137 # lang pref cookie in the request, add set-cookie to the response with the 138 # default value of 'en'. 139 try: 140 langName = self.request.cookies['android_developer_pref_lang'] 141 except KeyError: 142 resetLangCookie = True 143 #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName) 144 logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie) 145 146 # Do some prep for handling intl requests. Parse the url and validate 147 # the intl/lang substring, extract the url lang code (urlLangName) and the 148 # the uri that follows the intl/lang substring(contentUri) 149 sections = reqUri.split("/", 2) 150 isIntl = len(sections) > 2 and (sections[0] == "intl") 151 if isIntl: 152 isValidIntl = sections[1] in self.validLangs 153 urlLangName = sections[1] 154 contentUri = sections[2] 155 logging.info(' Content URI is [%s]...', contentUri) 156 if isValidIntl: 157 if (langName != urlLangName) or (langName == 'en'): 158 # if the lang code in the request is different from that in 159 # the cookie, or if the target lang is en, strip the 160 # intl/nn substring. It will later be redirected to 161 # the user's preferred language url. 162 # logging.info(' Handling a MISMATCHED intl request') 163 reqUri = contentUri 164 isStripped = True 165 isValidIntl = False 166 isIntl = False 167 #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName) 168 #else: 169 # logging.info('INTL PREP no need to reset langName') 170 else: 171 contentUri = reqUri 172 173 # Apply manual redirects from redirects.yaml. This occurs before any 174 # other mutations are performed, to avoid odd redirect behavior 175 # (For example, a user may want to redirect a directory without having 176 # /index.html appended.) 177 did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl) 178 if did_redirect: 179 return 180 181 # Preprocess the req url. If it references a directory or the domain itself, 182 # append '/index.html' to the url and 302 redirect. Otherwise, continue 183 # processing the request below. 184 did_redirect = self.PreprocessUrl(reqUri, langName) 185 if did_redirect: 186 return 187 188 # Send for processing 189 if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped): 190 # handle a 'clean' request. 191 # Try to form a response using the actual request url. 192 # logging.info(' Request being handled as clean: [%s]', name) 193 if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie): 194 # If CreateResponse returns False, there was no such document 195 # in the intl/lang tree. Before going to 404, see if there is an 196 # English-language version of the doc in the default 197 # default tree and return it, else go to 404. 198 self.CreateResponse(contentUri, langName, False, resetLangCookie) 199 200 elif isIntl: 201 # handle the case where we need to pass through an invalid intl req 202 # for processing (so as to get 404 as appropriate). This is needed 203 # because intl urls are passed through clean and retried in English, 204 # if necessary. 205 # logging.info(' Handling an invalid intl request...') 206 self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie) 207 208 else: 209 # handle the case where we have a non-clean url (usually a non-intl 210 # url) that we need to interpret in the context of any lang pref 211 # that is set. Prepend an intl/lang string to the request url and 212 # send it as a 302 redirect. After the redirect, the subsequent 213 # request will be handled as a clean url. 214 self.RedirToIntl(reqUri, self.intlString, langName) 215 216 def ProcessManualRedirects(self, contentUri, langName, isIntl): 217 """Compute any manual redirects for a request and execute them. 218 219 This allows content authors to manually define a set of regex rules which, 220 when matched, will cause an HTTP redirect to be performed. 221 222 Redirect rules are typically stored in a file named redirects.yaml. See the 223 comments in that file for more information about formatting. 224 225 Redirect computations are stored in memcache for performance. 226 227 Note that international URIs are handled automatically, and are assumed to 228 mirror redirects for non-intl requests. 229 230 Args: 231 contentUri: The relative URI (without leading slash) that was requested. 232 This should NOT contain an intl-prefix, if otherwise present. 233 langName: The requested language. 234 isIntl: True if contentUri originally contained an intl prefix. 235 236 Results: 237 boolean: True if a redirect has been set, False otherwise. 238 """ 239 # Redirect data is stored in memcache for performance 240 memcache_key = self.REDIRECT_PREFIX + contentUri 241 redirect_data = memcache.get(memcache_key) 242 if redirect_data is None: 243 logging.info('Redirect cache miss. Computing new redirect data.\n' 244 'Memcache Key: ' + memcache_key) 245 redirect_data = self.ComputeManualRedirectUrl(contentUri) 246 memcache.set(memcache_key, redirect_data) 247 contentUri = redirect_data[0] 248 redirectType = redirect_data[1] 249 250 # If this is an international URL, prepend intl path to minimize 251 # number of redirects 252 if isIntl: 253 contentUri = '/%s%s%s' % (self.intlString, langName, contentUri) 254 255 if redirectType is None: 256 # No redirect necessary 257 return False 258 elif redirectType == self.REDIRECT_TYPE_PERM: 259 logging.info('Sending permanent redirect: ' + contentUri); 260 self.redirect(contentUri, permanent=True) 261 return True 262 elif redirectType == self.REDIRECT_TYPE_TEMP: 263 logging.info('Sending temporary redirect: ' + contentUri); 264 self.redirect(contentUri, permanent=False) 265 return True 266 else: 267 # Invalid redirect type 268 logging.error('Invalid redirect type: %s', redirectType) 269 raise ('Invalid redirect type: %s', redirectType) 270 271 def ComputeManualRedirectUrl(self, uri): 272 """Read redirects file and evaluate redirect rules for a given URI. 273 274 Args: 275 uri: The relative URI (without leading slash) for which redirect data 276 should be computed. No special handling of intl URIs is pefromed 277 at this level. 278 279 Returns: 280 tuple: The computed redirect data. This tuple has two parts: 281 redirect_uri: The new URI that should be used. (If no redirect rule is 282 found, the original input to 'uri' will be returned. 283 redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary' 284 for an HTTP 302 redirect, or None if no redirect should be performed. 285 """ 286 # Redircts are defined in a file named redirects.yaml. 287 try: 288 f = open(self.REDIRECT_FILE) 289 data = yaml.load(f) 290 f.close() 291 except IOError, e: 292 logging.warning('Error opening redirect file (' + self.REDIRECT_FILE + 293 '): ' + e.strerror) 294 return (uri, None) 295 296 # The incoming path is missing a leading slash. However, many parts of the 297 # redirect system require leading slashes to distinguish between relative 298 # and absolute redirects. So, to compensate for this, we'll add a leading 299 # slash here as well. 300 uri = '/' + uri 301 302 # Check to make sure we actually got an iterable list out of the YAML file 303 if data is None: 304 logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid ' 305 'YAML.') 306 elif 'redirects' not in data: 307 logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not ' 308 'properly formatted -- no \'redirects:\' header.') 309 elif hasattr(data['redirects'], '__iter__'): 310 # Iterate through redirect data, try to find a redirect that matches. 311 for redirect in data['redirects']: 312 # Note: re.search adds an implied '^' to the beginning of the regex 313 # This means that the regex must match from the beginning of the 314 # string. 315 try: 316 if re.match(redirect[self.REDIRECT_SRC], uri): 317 # Match found. Apply redirect rule. 318 redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC], 319 redirect[self.REDIRECT_DST], uri) 320 logging.info('Redirect rule matched.\n' 321 'Rule: %s\n' 322 'Src: %s\n' 323 'Dst: %s', 324 redirect[self.REDIRECT_SRC], uri, redirect_uri) 325 if self.REDIRECT_TYPE in redirect: 326 redirect_type = redirect[self.REDIRECT_TYPE] 327 else: 328 # Default redirect type, if unspecified 329 redirect_type = self.REDIRECT_TYPE_PERM 330 return (redirect_uri, redirect_type) 331 except: 332 e = sys.exc_info()[1] 333 raise ('Error while processing redirect rule.\n' 334 'Rule: %s\n' 335 'Error: %s' % (redirect[self.REDIRECT_SRC], e)) 336 # No redirect found, return URL unchanged 337 return (uri, None) 338 339 def isCleanUrl(self, name, langName, isValidIntl, isStripped): 340 """Determine whether to pass an incoming url straight to processing. 341 342 Args: 343 name: The incoming URL 344 345 Returns: 346 boolean: Whether the URL should be sent straight to processing 347 """ 348 # logging.info(' >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl) 349 if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName): 350 return True 351 352 def PreprocessUrl(self, name, langName): 353 """Any preprocessing work on the URL when it comes in. 354 355 Put any work related to interpreting the incoming URL here. For example, 356 this is used to redirect requests for a directory to the index.html file 357 in that directory. Subclasses should override this method to do different 358 preprocessing. 359 360 Args: 361 name: The incoming URL 362 363 Returns: 364 True if the request was redirected to '/index.html'. 365 Otherewise False. 366 """ 367 368 # determine if this is a request for a directory 369 final_path_segment = name 370 final_slash_offset = name.rfind('/') 371 if final_slash_offset != len(name) - 1: 372 final_path_segment = name[final_slash_offset + 1:] 373 if final_path_segment.find('.') == -1: 374 name = ''.join([name, '/']) 375 376 # if this is a directory or the domain itself, redirect to /index.html 377 if not name or (name[len(name) - 1:] == '/'): 378 uri = ''.join(['/', name, 'index.html']) 379 # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName) 380 self.redirect(uri, False) 381 return True 382 else: 383 return False 384 385 def RedirToIntl(self, name, intlString, langName): 386 """Redirect an incoming request to the appropriate intl uri. 387 388 For non-en langName, builds the intl/lang string from a 389 base (en) string and redirects (302) the request to look for 390 a version of the file in langName. For en langName, simply 391 redirects a stripped uri string (intl/nn removed). 392 393 Args: 394 name: The incoming, preprocessed URL 395 396 Returns: 397 The lang-specific URL 398 """ 399 if not (langName == 'en'): 400 builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string]) 401 else: 402 builtIntlLangUri = name 403 uri = ''.join(['/', builtIntlLangUri]) 404 logging.info('-->REDIRECTING %s to %s', name, uri) 405 self.redirect(uri, False) 406 return uri 407 408 def CreateResponse(self, name, langName, isValidIntl, resetLangCookie): 409 """Process the url and form a response, if appropriate. 410 411 Attempts to retrieve the requested file (name) from cache, 412 negative cache, or store (zip) and form the response. 413 For intl requests that are not found (in the localized tree), 414 returns False rather than forming a response, so that 415 the request can be retried with the base url (this is the 416 fallthrough to default language). 417 418 For requests that are found, forms the headers and 419 adds the content to the response entity. If the request was 420 for an intl (localized) url, also resets the language cookie 421 to the language specified in the url if needed, to ensure that 422 the client language and response data remain harmonious. 423 424 Args: 425 name: The incoming, preprocessed URL 426 langName: The language id. Used as necessary to reset the 427 language cookie in the response. 428 isValidIntl: If present, indicates whether the request is 429 for a language-specific url 430 resetLangCookie: Whether the response should reset the 431 language cookie to 'langName' 432 433 Returns: 434 True: A response was successfully created for the request 435 False: No response was created. 436 """ 437 # see if we have the page in the memcache 438 logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]', 439 name, langName, isValidIntl, resetLangCookie) 440 resp_data = self.GetFromCache(name) 441 if resp_data is None: 442 logging.info(' Cache miss for %s', name) 443 resp_data = self.GetFromNegativeCache(name) 444 if resp_data is None: 445 resp_data = self.GetFromStore(name) 446 447 # IF we have the file, put it in the memcache 448 # ELSE put it in the negative cache 449 if resp_data is not None: 450 self.StoreOrUpdateInCache(name, resp_data) 451 elif isValidIntl: 452 # couldn't find the intl doc. Try to fall through to English. 453 #logging.info(' Retrying with base uri...') 454 return False 455 else: 456 logging.info(' Adding %s to negative cache, serving 404', name) 457 self.StoreInNegativeCache(name) 458 self.Write404Error() 459 return True 460 else: 461 # found it in negative cache 462 self.Write404Error() 463 return True 464 465 # found content from cache or store 466 logging.info('FOUND CLEAN') 467 if resetLangCookie: 468 logging.info(' Resetting android_developer_pref_lang cookie to [%s]', 469 langName) 470 expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10 471 self.response.headers.add_header('Set-Cookie', 472 'android_developer_pref_lang=%s; path=/; expires=%s' % 473 (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate)))) 474 mustRevalidate = False 475 if ('.html' in name): 476 # revalidate html files -- workaround for cache inconsistencies for 477 # negotiated responses 478 mustRevalidate = True 479 #logging.info(' Adding [Vary: Cookie] to response...') 480 self.response.headers.add_header('Vary', 'Cookie') 481 content_type, encoding = mimetypes.guess_type(name) 482 if content_type: 483 self.response.headers['Content-Type'] = content_type 484 self.SetCachingHeaders(mustRevalidate) 485 self.response.out.write(resp_data) 486 elif (name == 'favicon.ico'): 487 self.response.headers['Content-Type'] = 'image/x-icon' 488 self.SetCachingHeaders(mustRevalidate) 489 self.response.out.write(resp_data) 490 elif name.endswith('.psd'): 491 self.response.headers['Content-Type'] = 'application/octet-stream' 492 self.SetCachingHeaders(mustRevalidate) 493 self.response.out.write(resp_data) 494 elif name.endswith('.svg'): 495 self.response.headers['Content-Type'] = 'image/svg+xml' 496 self.SetCachingHeaders(mustRevalidate) 497 self.response.out.write(resp_data) 498 elif name.endswith('.mp4'): 499 self.response.headers['Content-Type'] = 'video/mp4' 500 self.SetCachingHeaders(mustRevalidate) 501 self.response.out.write(resp_data) 502 elif name.endswith('.webm'): 503 self.response.headers['Content-Type'] = 'video/webm' 504 self.SetCachingHeaders(mustRevalidate) 505 self.response.out.write(resp_data) 506 elif name.endswith('.ogv'): 507 self.response.headers['Content-Type'] = 'video/ogg' 508 self.SetCachingHeaders(mustRevalidate) 509 self.response.out.write(resp_data) 510 return True 511 512 def GetFromStore(self, file_path): 513 """Retrieve file from zip files. 514 515 Get the file from the source, it must not have been in the memcache. If 516 possible, we'll use the zip file index to quickly locate where the file 517 should be found. (See MapToFileArchive documentation for assumptions about 518 file ordering.) If we don't have an index or don't find the file where the 519 index says we should, look through all the zip files to find it. 520 521 Args: 522 file_path: the file that we're looking for 523 524 Returns: 525 The contents of the requested file 526 """ 527 resp_data = None 528 file_itr = iter(self.zipfilenames) 529 530 # decode any escape characters in the URI 531 # Note: We are currenty just looking for '@' (%40) 532 file_path = file_path.replace('%40', '@') 533 534 # check the index, if we have one, to see what archive the file is in 535 archive_name = self.MapFileToArchive(file_path) 536 if not archive_name: 537 archive_name = file_itr.next()[0] 538 539 while resp_data is None and archive_name: 540 zip_archive = self.LoadZipFile(archive_name) 541 if zip_archive: 542 543 # we expect some lookups will fail, and that's okay, 404s will deal 544 # with that 545 try: 546 resp_data = zip_archive.read(file_path) 547 except (KeyError, RuntimeError), err: 548 # no op 549 x = False 550 if resp_data is not None: 551 logging.info('%s read from %s', file_path, archive_name) 552 553 try: 554 archive_name = file_itr.next()[0] 555 except (StopIteration), err: 556 archive_name = False 557 558 return resp_data 559 560 def LoadZipFile(self, zipfilename): 561 """Convenience method to load zip file. 562 563 Just a convenience method to load the zip file from the data store. This is 564 useful if we ever want to change data stores and also as a means of 565 dependency injection for testing. This method will look at our file cache 566 first, and then load and cache the file if there's a cache miss 567 568 Args: 569 zipfilename: the name of the zip file to load 570 571 Returns: 572 The zip file requested, or None if there is an I/O error 573 """ 574 zip_archive = None 575 zip_archive = self.zipfile_cache.get(zipfilename) 576 if zip_archive is None: 577 try: 578 zip_archive = zipfile.ZipFile(zipfilename) 579 self.zipfile_cache[zipfilename] = zip_archive 580 except (IOError, RuntimeError), err: 581 logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename, 582 err)) 583 return zip_archive 584 585 def MapFileToArchive(self, file_path): 586 """Given a file name, determine what archive it should be in. 587 588 This method makes two critical assumptions. 589 (1) The zip files passed as an argument to the handler, if concatenated 590 in that same order, would result in a total ordering 591 of all the files. See (2) for ordering type. 592 (2) Upper case letters before lower case letters. The traversal of a 593 directory tree is depth first. A parent directory's files are added 594 before the files of any child directories 595 596 Args: 597 file_path: the file to be mapped to an archive 598 599 Returns: 600 The name of the archive where we expect the file to be 601 """ 602 num_archives = len(self.zipfilenames) 603 while num_archives > 0: 604 target = self.zipfilenames[num_archives - 1] 605 if len(target) > 1: 606 if self.CompareFilenames(target[1], file_path) >= 0: 607 return target[0] 608 num_archives -= 1 609 610 return None 611 612 def CompareFilenames(self, file1, file2): 613 """Determines whether file1 is lexigraphically 'before' file2. 614 615 WARNING: This method assumes that paths are output in a depth-first, 616 with parent directories' files stored before childs' 617 618 We say that file1 is lexigraphically before file2 if the last non-matching 619 path segment of file1 is alphabetically before file2. 620 621 Args: 622 file1: the first file path 623 file2: the second file path 624 625 Returns: 626 A positive number if file1 is before file2 627 A negative number if file2 is before file1 628 0 if filenames are the same 629 """ 630 f1_segments = file1.split('/') 631 f2_segments = file2.split('/') 632 633 segment_ptr = 0 634 while (segment_ptr < len(f1_segments) and 635 segment_ptr < len(f2_segments) and 636 f1_segments[segment_ptr] == f2_segments[segment_ptr]): 637 segment_ptr += 1 638 639 if len(f1_segments) == len(f2_segments): 640 641 # we fell off the end, the paths much be the same 642 if segment_ptr == len(f1_segments): 643 return 0 644 645 # we didn't fall of the end, compare the segments where they differ 646 if f1_segments[segment_ptr] < f2_segments[segment_ptr]: 647 return 1 648 elif f1_segments[segment_ptr] > f2_segments[segment_ptr]: 649 return -1 650 else: 651 return 0 652 653 # the number of segments differs, we either mismatched comparing 654 # directories, or comparing a file to a directory 655 else: 656 657 # IF we were looking at the last segment of one of the paths, 658 # the one with fewer segments is first because files come before 659 # directories 660 # ELSE we just need to compare directory names 661 if (segment_ptr + 1 == len(f1_segments) or 662 segment_ptr + 1 == len(f2_segments)): 663 return len(f2_segments) - len(f1_segments) 664 else: 665 if f1_segments[segment_ptr] < f2_segments[segment_ptr]: 666 return 1 667 elif f1_segments[segment_ptr] > f2_segments[segment_ptr]: 668 return -1 669 else: 670 return 0 671 672 def SetCachingHeaders(self, revalidate): 673 """Set caching headers for the request.""" 674 max_age = self.MAX_AGE 675 #self.response.headers['Expires'] = email.Utils.formatdate( 676 # time.time() + max_age, usegmt=True) 677 cache_control = [] 678 if self.PUBLIC: 679 cache_control.append('public') 680 cache_control.append('max-age=%d' % max_age) 681 if revalidate: 682 cache_control.append('must-revalidate') 683 self.response.headers['Cache-Control'] = ', '.join(cache_control) 684 685 def GetFromCache(self, filename): 686 """Get file from memcache, if available. 687 688 Args: 689 filename: The URL of the file to return 690 691 Returns: 692 The content of the file 693 """ 694 return memcache.get('%s%s' % (self.CACHE_PREFIX, filename)) 695 696 def StoreOrUpdateInCache(self, filename, data): 697 """Store data in the cache. 698 699 Store a piece of data in the memcache. Memcache has a maximum item size of 700 1*10^6 bytes. If the data is too large, fail, but log the failure. Future 701 work will consider compressing the data before storing or chunking it 702 703 Args: 704 filename: the name of the file to store 705 data: the data of the file 706 707 Returns: 708 None 709 """ 710 try: 711 if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data): 712 memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data) 713 except (ValueError), err: 714 logging.warning('Data size too large to cache\n%s' % err) 715 716 def Write404Error(self): 717 """Ouptut a simple 404 response.""" 718 self.error(404) 719 self.response.out.write( 720 ''.join(['<html><head><title>404: Not Found</title></head>', 721 '<body><b><h2>Error 404</h2><br/>', 722 'File not found</b></body></html>'])) 723 724 def StoreInNegativeCache(self, filename): 725 """If a non-existant URL is accessed, cache this result as well. 726 727 Future work should consider setting a maximum negative cache size to 728 prevent it from from negatively impacting the real cache. 729 730 Args: 731 filename: URL to add ot negative cache 732 733 Returns: 734 None 735 """ 736 memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1) 737 738 def GetFromNegativeCache(self, filename): 739 """Retrieve from negative cache. 740 741 Args: 742 filename: URL to retreive 743 744 Returns: 745 The file contents if present in the negative cache. 746 """ 747 return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename)) 748 749 def main(): 750 application = webapp.WSGIApplication([('/([^/]+)/(.*)', 751 MemcachedZipHandler)]) 752 util.run_wsgi_app(application) 753 754 755 if __name__ == '__main__': 756 main() 757