Home | History | Annotate | Download | only in app_engine_server
      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 #
      4 # Copyright 2009 Google Inc.
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #   http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 #
     18 
     19 """A class to serve pages from zip files and use memcache for performance.
     20 
     21 This contains a class and a function to create an anonymous instance of the
     22 class to serve HTTP GET requests. Memcache is used to increase response speed
     23 and lower processing cycles used in serving. Credit to Guido van Rossum and
     24 his implementation of zipserve which served as a reference as I wrote this.
     25 
     26   MemcachedZipHandler: Class that serves request
     27   create_handler: method to create instance of MemcachedZipHandler
     28 """
     29 
     30 __author__ = 'jmatt (at] google.com (Justin Mattson)'
     31 
     32 import email.Utils
     33 import logging
     34 import mimetypes
     35 import re
     36 import sys
     37 import time
     38 import yaml
     39 import zipfile
     40 
     41 from google.appengine.api import memcache
     42 from google.appengine.ext import webapp
     43 from google.appengine.ext.webapp import util
     44 from time import localtime, strftime
     45 
     46 def create_handler(zip_files, max_age=None, public=None):
     47   """Factory method to create a MemcachedZipHandler instance.
     48 
     49   Args:
     50     zip_files: A list of file names, or a list of lists of file name, first
     51         member of file mappings. See MemcachedZipHandler documentation for
     52         more information about using the list of lists format
     53     max_age: The maximum client-side cache lifetime
     54     public: Whether this should be declared public in the client-side cache
     55   Returns:
     56     A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
     57     Engine
     58 
     59   Raises:
     60     ValueError: if the zip_files argument is not a list
     61   """
     62   # verify argument integrity. If the argument is passed in list format,
     63   # convert it to list of lists format
     64   if zip_files and type(zip_files).__name__ == 'list':
     65     num_items = len(zip_files)
     66     while num_items > 0:
     67       if type(zip_files[num_items - 1]).__name__ != 'list':
     68         zip_files[num_items - 1] = [zip_files[num_items-1]]
     69       num_items -= 1
     70   else:
     71     raise ValueError('File name arguments must be a list')
     72 
     73   class HandlerWrapper(MemcachedZipHandler):
     74     """Simple wrapper for an instance of MemcachedZipHandler.
     75 
     76     I'm still not sure why this is needed
     77     """
     78     def get(self, name):
     79       self.zipfilenames = zip_files
     80       self.TrueGet(name)
     81       if max_age is not None:
     82         MAX_AGE = max_age
     83       if public is not None:
     84         PUBLIC = public
     85 
     86   return HandlerWrapper
     87 
     88 
     89 class MemcachedZipHandler(webapp.RequestHandler):
     90   """Handles get requests for a given URL.
     91 
     92   Serves a GET request from a series of zip files. As files are served they are
     93   put into memcache, which is much faster than retreiving them from the zip
     94   source file again. It also uses considerably fewer CPU cycles.
     95   """
     96   zipfile_cache = {}                # class cache of source zip files
     97   MAX_AGE = 43200                   # max client-side cache lifetime, in seconds
     98   PUBLIC = True                     # public cache setting
     99   CACHE_PREFIX = 'cache://'         # memcache key prefix for actual URLs
    100   NEG_CACHE_PREFIX = 'noncache://'  # memcache key prefix for non-existant URL
    101   REDIRECT_PREFIX = 'redirect://'   # memcache key prefix for redirect data
    102   REDIRECT_FILE = 'redirects.yaml'  # Name of file that contains redirect table
    103   REDIRECT_SRC = 'src'              # Name of the 'source' attribute for a
    104                                     #   redirect table entry
    105   REDIRECT_DST = 'dst'              # Name of the 'destination' attribute for
    106                                     #   a redirect table entry
    107   REDIRECT_TYPE = 'type'            # Name of the 'type' attribute for a
    108                                     #   redirect table entry
    109   REDIRECT_TYPE_PERM = 'permanent'  # Redirect 'type' string indicating a 301
    110                                     #   redirect should be served
    111   REDIRECT_TYPE_TEMP = 'temporary'  # Redirect 'type'string indicate a 302
    112                                     #   Redirect should be served
    113   intlString = 'intl/'
    114   validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-cn','zh-TW','zh-tw']
    115 
    116   def TrueGet(self, reqUri):
    117     """The top-level entry point to serving requests.
    118 
    119     Called 'True' get because it does the work when called from the wrapper
    120     class' get method. Some logic is applied to the request to serve files
    121     from an intl/<lang>/... directory or fall through to the default language.
    122 
    123     Args:
    124       name: URL requested
    125 
    126     Returns:
    127       None
    128     """
    129     langName = 'en'
    130     resetLangCookie = False
    131     urlLangName = None
    132     retry = False
    133     isValidIntl = False
    134     isStripped = False
    135 
    136     # Try to retrieve the user's lang pref from the cookie. If there is no
    137     # lang pref cookie in the request, add set-cookie to the response with the
    138     # default value of 'en'.
    139     try:
    140       langName = self.request.cookies['android_developer_pref_lang']
    141     except KeyError:
    142       resetLangCookie = True
    143       #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
    144     logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
    145 
    146     # Do some prep for handling intl requests. Parse the url and validate
    147     # the intl/lang substring, extract the url lang code (urlLangName) and the
    148     # the uri that follows the intl/lang substring(contentUri)
    149     sections = reqUri.split("/", 2)
    150     isIntl = len(sections) > 2 and (sections[0] == "intl")
    151     if isIntl:
    152       isValidIntl = sections[1] in self.validLangs
    153       urlLangName = sections[1]
    154       contentUri = sections[2]
    155       logging.info('  Content URI is [%s]...', contentUri)
    156       if isValidIntl:
    157         if (langName != urlLangName) or (langName == 'en'):
    158           # if the lang code in the request is different from that in
    159           # the cookie, or if the target lang is en, strip the
    160           # intl/nn substring. It will later be redirected to
    161           # the user's preferred language url.
    162           # logging.info('  Handling a MISMATCHED intl request')
    163           reqUri = contentUri
    164           isStripped = True
    165           isValidIntl = False
    166           isIntl = False
    167           #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
    168         #else:
    169         #  logging.info('INTL PREP no need to reset langName')
    170     else:
    171       contentUri = reqUri
    172 
    173     # Apply manual redirects from redirects.yaml. This occurs before any
    174     # other mutations are performed, to avoid odd redirect behavior
    175     # (For example, a user may want to redirect a directory without having
    176     # /index.html appended.)
    177     did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
    178     if did_redirect:
    179       return
    180 
    181     # Preprocess the req url. If it references a directory or the domain itself,
    182     # append '/index.html' to the url and 302 redirect. Otherwise, continue
    183     # processing the request below.
    184     did_redirect = self.PreprocessUrl(reqUri, langName)
    185     if did_redirect:
    186       return
    187 
    188     # Send for processing
    189     if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
    190       # handle a 'clean' request.
    191       # Try to form a response using the actual request url.
    192       # logging.info('  Request being handled as clean: [%s]', name)
    193       if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
    194         # If CreateResponse returns False, there was no such document
    195         # in the intl/lang tree. Before going to 404, see if there is an
    196         # English-language version of the doc in the default
    197         # default tree and return it, else go to 404.
    198         self.CreateResponse(contentUri, langName, False, resetLangCookie)
    199 
    200     elif isIntl:
    201       # handle the case where we need to pass through an invalid intl req
    202       # for processing (so as to get 404 as appropriate). This is needed
    203       # because intl urls are passed through clean and retried in English,
    204       # if necessary.
    205       # logging.info('  Handling an invalid intl request...')
    206       self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
    207 
    208     else:
    209       # handle the case where we have a non-clean url (usually a non-intl
    210       # url) that we need to interpret in the context of any lang pref
    211       # that is set. Prepend an intl/lang string to the request url and
    212       # send it as a 302 redirect. After the redirect, the subsequent
    213       # request will be handled as a clean url.
    214       self.RedirToIntl(reqUri, self.intlString, langName)
    215 
    216   def ProcessManualRedirects(self, contentUri, langName, isIntl):
    217     """Compute any manual redirects for a request and execute them.
    218 
    219     This allows content authors to manually define a set of regex rules which,
    220     when matched, will cause an HTTP redirect to be performed.
    221 
    222     Redirect rules are typically stored in a file named redirects.yaml. See the
    223     comments in that file for more information about formatting.
    224 
    225     Redirect computations are stored in memcache for performance.
    226 
    227     Note that international URIs are handled automatically, and are assumed to
    228     mirror redirects for non-intl requests.
    229 
    230     Args:
    231       contentUri: The relative URI (without leading slash) that was requested.
    232         This should NOT contain an intl-prefix, if otherwise present.
    233       langName: The requested language.
    234       isIntl: True if contentUri originally contained an intl prefix.
    235 
    236     Results:
    237       boolean: True if a redirect has been set, False otherwise.
    238     """
    239     # Redirect data is stored in memcache for performance
    240     memcache_key = self.REDIRECT_PREFIX + contentUri
    241     redirect_data = memcache.get(memcache_key)
    242     if redirect_data is None:
    243       logging.info('Redirect cache miss. Computing new redirect data.\n'
    244                    'Memcache Key: ' + memcache_key)
    245       redirect_data = self.ComputeManualRedirectUrl(contentUri)
    246       memcache.set(memcache_key, redirect_data)
    247     contentUri = redirect_data[0]
    248     redirectType = redirect_data[1]
    249 
    250     # If this is an international URL, prepend intl path to minimize
    251     # number of redirects
    252     if isIntl:
    253       contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
    254 
    255     if redirectType is None:
    256       # No redirect necessary
    257       return False
    258     elif redirectType == self.REDIRECT_TYPE_PERM:
    259       logging.info('Sending permanent redirect: ' + contentUri);
    260       self.redirect(contentUri, permanent=True)
    261       return True
    262     elif redirectType == self.REDIRECT_TYPE_TEMP:
    263       logging.info('Sending temporary redirect: ' + contentUri);
    264       self.redirect(contentUri, permanent=False)
    265       return True
    266     else:
    267       # Invalid redirect type
    268       logging.error('Invalid redirect type: %s', redirectType)
    269       raise ('Invalid redirect type: %s', redirectType)
    270 
    271   def ComputeManualRedirectUrl(self, uri):
    272     """Read redirects file and evaluate redirect rules for a given URI.
    273 
    274     Args:
    275       uri: The relative URI (without leading slash) for which redirect data
    276         should be computed. No special handling of intl URIs is pefromed
    277         at this level.
    278 
    279     Returns:
    280       tuple: The computed redirect data. This tuple has two parts:
    281         redirect_uri: The new URI that should be used. (If no redirect rule is
    282           found, the original input to 'uri' will be returned.
    283         redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
    284           for an HTTP 302 redirect, or None if no redirect should be performed.
    285     """
    286     # Redircts are defined in a file named redirects.yaml.
    287     try:
    288       f = open(self.REDIRECT_FILE)
    289       data = yaml.load(f)
    290       f.close()
    291     except IOError, e:
    292       logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
    293                       '): ' + e.strerror)
    294       return (uri, None)
    295 
    296     # The incoming path is missing a leading slash. However, many parts of the
    297     # redirect system require leading slashes to distinguish between relative
    298     # and absolute redirects. So, to compensate for this, we'll add a leading
    299     # slash here as well.
    300     uri = '/' + uri
    301 
    302     # Check to make sure we actually got an iterable list out of the YAML file
    303     if data is None:
    304       logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
    305                       'YAML.')
    306     elif 'redirects' not in data:
    307       logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
    308                       'properly formatted -- no \'redirects:\' header.')
    309     elif hasattr(data['redirects'], '__iter__'):
    310       # Iterate through redirect data, try to find a redirect that matches.
    311       for redirect in data['redirects']:
    312           # Note: re.search adds an implied '^' to the beginning of the regex
    313           # This means that the regex must match from the beginning of the
    314           # string.
    315           try:
    316             if re.match(redirect[self.REDIRECT_SRC], uri):
    317               # Match found. Apply redirect rule.
    318               redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
    319                   redirect[self.REDIRECT_DST], uri)
    320               logging.info('Redirect rule matched.\n'
    321                              'Rule: %s\n'
    322                              'Src: %s\n'
    323                              'Dst: %s',
    324                            redirect[self.REDIRECT_SRC], uri, redirect_uri)
    325               if self.REDIRECT_TYPE in redirect:
    326                 redirect_type = redirect[self.REDIRECT_TYPE]
    327               else:
    328                 # Default redirect type, if unspecified
    329                 redirect_type = self.REDIRECT_TYPE_PERM
    330               return (redirect_uri, redirect_type)
    331           except:
    332             e = sys.exc_info()[1]
    333             raise ('Error while processing redirect rule.\n'
    334                      'Rule: %s\n'
    335                      'Error: %s' % (redirect[self.REDIRECT_SRC], e))
    336     # No redirect found, return URL unchanged
    337     return (uri, None)
    338 
    339   def isCleanUrl(self, name, langName, isValidIntl, isStripped):
    340     """Determine whether to pass an incoming url straight to processing.
    341 
    342        Args:
    343          name: The incoming URL
    344 
    345        Returns:
    346          boolean: Whether the URL should be sent straight to processing
    347     """
    348     # logging.info('  >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
    349     if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
    350       return True
    351 
    352   def PreprocessUrl(self, name, langName):
    353     """Any preprocessing work on the URL when it comes in.
    354 
    355     Put any work related to interpreting the incoming URL here. For example,
    356     this is used to redirect requests for a directory to the index.html file
    357     in that directory. Subclasses should override this method to do different
    358     preprocessing.
    359 
    360     Args:
    361       name: The incoming URL
    362 
    363     Returns:
    364       True if the request was redirected to '/index.html'.
    365       Otherewise False.
    366     """
    367 
    368     # determine if this is a request for a directory
    369     final_path_segment = name
    370     final_slash_offset = name.rfind('/')
    371     if final_slash_offset != len(name) - 1:
    372       final_path_segment = name[final_slash_offset + 1:]
    373       if final_path_segment.find('.') == -1:
    374         name = ''.join([name, '/'])
    375 
    376     # if this is a directory or the domain itself, redirect to /index.html
    377     if not name or (name[len(name) - 1:] == '/'):
    378       uri = ''.join(['/', name, 'index.html'])
    379       # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
    380       self.redirect(uri, False)
    381       return True
    382     else:
    383       return False
    384 
    385   def RedirToIntl(self, name, intlString, langName):
    386     """Redirect an incoming request to the appropriate intl uri.
    387 
    388        For non-en langName, builds the intl/lang string from a
    389        base (en) string and redirects (302) the request to look for
    390        a version of the file in langName. For en langName, simply
    391        redirects a stripped uri string (intl/nn removed).
    392 
    393     Args:
    394       name: The incoming, preprocessed URL
    395 
    396     Returns:
    397       The lang-specific URL
    398     """
    399     if not (langName == 'en'):
    400       builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
    401     else:
    402       builtIntlLangUri = name
    403     uri = ''.join(['/', builtIntlLangUri])
    404     logging.info('-->REDIRECTING %s to  %s', name, uri)
    405     self.redirect(uri, False)
    406     return uri
    407 
    408   def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
    409     """Process the url and form a response, if appropriate.
    410 
    411        Attempts to retrieve the requested file (name) from cache,
    412        negative cache, or store (zip) and form the response.
    413        For intl requests that are not found (in the localized tree),
    414        returns False rather than forming a response, so that
    415        the request can be retried with the base url (this is the
    416        fallthrough to default language).
    417 
    418        For requests that are found, forms the headers and
    419        adds the content to the response entity. If the request was
    420        for an intl (localized) url, also resets the language cookie
    421        to the language specified in the url if needed, to ensure that
    422        the client language and response data remain harmonious.
    423 
    424     Args:
    425       name: The incoming, preprocessed URL
    426       langName: The language id. Used as necessary to reset the
    427                 language cookie in the response.
    428       isValidIntl: If present, indicates whether the request is
    429                    for a language-specific url
    430       resetLangCookie: Whether the response should reset the
    431                        language cookie to 'langName'
    432 
    433     Returns:
    434       True: A response was successfully created for the request
    435       False: No response was created.
    436     """
    437     # see if we have the page in the memcache
    438     logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
    439       name, langName, isValidIntl, resetLangCookie)
    440     resp_data = self.GetFromCache(name)
    441     if resp_data is None:
    442       logging.info('  Cache miss for %s', name)
    443       resp_data = self.GetFromNegativeCache(name)
    444       if resp_data is None:
    445         resp_data = self.GetFromStore(name)
    446 
    447         # IF we have the file, put it in the memcache
    448         # ELSE put it in the negative cache
    449         if resp_data is not None:
    450           self.StoreOrUpdateInCache(name, resp_data)
    451         elif isValidIntl:
    452           # couldn't find the intl doc. Try to fall through to English.
    453           #logging.info('  Retrying with base uri...')
    454           return False
    455         else:
    456           logging.info('  Adding %s to negative cache, serving 404', name)
    457           self.StoreInNegativeCache(name)
    458           self.Write404Error()
    459           return True
    460       else:
    461         # found it in negative cache
    462         self.Write404Error()
    463         return True
    464 
    465     # found content from cache or store
    466     logging.info('FOUND CLEAN')
    467     if resetLangCookie:
    468       logging.info('  Resetting android_developer_pref_lang cookie to [%s]',
    469       langName)
    470       expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
    471       self.response.headers.add_header('Set-Cookie',
    472       'android_developer_pref_lang=%s; path=/; expires=%s' %
    473       (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
    474     mustRevalidate = False
    475     if ('.html' in name):
    476       # revalidate html files -- workaround for cache inconsistencies for
    477       # negotiated responses
    478       mustRevalidate = True
    479       #logging.info('  Adding [Vary: Cookie] to response...')
    480       self.response.headers.add_header('Vary', 'Cookie')
    481     content_type, encoding = mimetypes.guess_type(name)
    482     if content_type:
    483       self.response.headers['Content-Type'] = content_type
    484       self.SetCachingHeaders(mustRevalidate)
    485       self.response.out.write(resp_data)
    486     elif (name == 'favicon.ico'):
    487       self.response.headers['Content-Type'] = 'image/x-icon'
    488       self.SetCachingHeaders(mustRevalidate)
    489       self.response.out.write(resp_data)
    490     elif name.endswith('.psd'):
    491       self.response.headers['Content-Type'] = 'application/octet-stream'
    492       self.SetCachingHeaders(mustRevalidate)
    493       self.response.out.write(resp_data)
    494     elif name.endswith('.svg'):
    495       self.response.headers['Content-Type'] = 'image/svg+xml'
    496       self.SetCachingHeaders(mustRevalidate)
    497       self.response.out.write(resp_data)
    498     elif name.endswith('.mp4'):
    499       self.response.headers['Content-Type'] = 'video/mp4'
    500       self.SetCachingHeaders(mustRevalidate)
    501       self.response.out.write(resp_data)
    502     elif name.endswith('.webm'):
    503       self.response.headers['Content-Type'] = 'video/webm'
    504       self.SetCachingHeaders(mustRevalidate)
    505       self.response.out.write(resp_data)
    506     elif name.endswith('.ogv'):
    507       self.response.headers['Content-Type'] = 'video/ogg'
    508       self.SetCachingHeaders(mustRevalidate)
    509       self.response.out.write(resp_data)
    510     return True
    511 
    512   def GetFromStore(self, file_path):
    513     """Retrieve file from zip files.
    514 
    515     Get the file from the source, it must not have been in the memcache. If
    516     possible, we'll use the zip file index to quickly locate where the file
    517     should be found. (See MapToFileArchive documentation for assumptions about
    518     file ordering.) If we don't have an index or don't find the file where the
    519     index says we should, look through all the zip files to find it.
    520 
    521     Args:
    522       file_path: the file that we're looking for
    523 
    524     Returns:
    525       The contents of the requested file
    526     """
    527     resp_data = None
    528     file_itr = iter(self.zipfilenames)
    529 
    530     # decode any escape characters in the URI
    531     # Note: We are currenty just looking for '@' (%40)
    532     file_path = file_path.replace('%40', '@')
    533 
    534     # check the index, if we have one, to see what archive the file is in
    535     archive_name = self.MapFileToArchive(file_path)
    536     if not archive_name:
    537       archive_name = file_itr.next()[0]
    538 
    539     while resp_data is None and archive_name:
    540       zip_archive = self.LoadZipFile(archive_name)
    541       if zip_archive:
    542 
    543         # we expect some lookups will fail, and that's okay, 404s will deal
    544         # with that
    545         try:
    546           resp_data = zip_archive.read(file_path)
    547         except (KeyError, RuntimeError), err:
    548           # no op
    549           x = False
    550         if resp_data is not None:
    551           logging.info('%s read from %s', file_path, archive_name)
    552 
    553       try:
    554         archive_name = file_itr.next()[0]
    555       except (StopIteration), err:
    556         archive_name = False
    557 
    558     return resp_data
    559 
    560   def LoadZipFile(self, zipfilename):
    561     """Convenience method to load zip file.
    562 
    563     Just a convenience method to load the zip file from the data store. This is
    564     useful if we ever want to change data stores and also as a means of
    565     dependency injection for testing. This method will look at our file cache
    566     first, and then load and cache the file if there's a cache miss
    567 
    568     Args:
    569       zipfilename: the name of the zip file to load
    570 
    571     Returns:
    572       The zip file requested, or None if there is an I/O error
    573     """
    574     zip_archive = None
    575     zip_archive = self.zipfile_cache.get(zipfilename)
    576     if zip_archive is None:
    577       try:
    578         zip_archive = zipfile.ZipFile(zipfilename)
    579         self.zipfile_cache[zipfilename] = zip_archive
    580       except (IOError, RuntimeError), err:
    581         logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
    582                                                              err))
    583     return zip_archive
    584 
    585   def MapFileToArchive(self, file_path):
    586     """Given a file name, determine what archive it should be in.
    587 
    588     This method makes two critical assumptions.
    589     (1) The zip files passed as an argument to the handler, if concatenated
    590         in that same order, would result in a total ordering
    591         of all the files. See (2) for ordering type.
    592     (2) Upper case letters before lower case letters. The traversal of a
    593         directory tree is depth first. A parent directory's files are added
    594         before the files of any child directories
    595 
    596     Args:
    597       file_path: the file to be mapped to an archive
    598 
    599     Returns:
    600       The name of the archive where we expect the file to be
    601     """
    602     num_archives = len(self.zipfilenames)
    603     while num_archives > 0:
    604       target = self.zipfilenames[num_archives - 1]
    605       if len(target) > 1:
    606         if self.CompareFilenames(target[1], file_path) >= 0:
    607           return target[0]
    608       num_archives -= 1
    609 
    610     return None
    611 
    612   def CompareFilenames(self, file1, file2):
    613     """Determines whether file1 is lexigraphically 'before' file2.
    614 
    615     WARNING: This method assumes that paths are output in a depth-first,
    616     with parent directories' files stored before childs'
    617 
    618     We say that file1 is lexigraphically before file2 if the last non-matching
    619     path segment of file1 is alphabetically before file2.
    620 
    621     Args:
    622       file1: the first file path
    623       file2: the second file path
    624 
    625     Returns:
    626       A positive number if file1 is before file2
    627       A negative number if file2 is before file1
    628       0 if filenames are the same
    629     """
    630     f1_segments = file1.split('/')
    631     f2_segments = file2.split('/')
    632 
    633     segment_ptr = 0
    634     while (segment_ptr < len(f1_segments) and
    635            segment_ptr < len(f2_segments) and
    636            f1_segments[segment_ptr] == f2_segments[segment_ptr]):
    637       segment_ptr += 1
    638 
    639     if len(f1_segments) == len(f2_segments):
    640 
    641       # we fell off the end, the paths much be the same
    642       if segment_ptr == len(f1_segments):
    643         return 0
    644 
    645       # we didn't fall of the end, compare the segments where they differ
    646       if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
    647         return 1
    648       elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
    649         return -1
    650       else:
    651         return 0
    652 
    653       # the number of segments differs, we either mismatched comparing
    654       # directories, or comparing a file to a directory
    655     else:
    656 
    657       # IF we were looking at the last segment of one of the paths,
    658       # the one with fewer segments is first because files come before
    659       # directories
    660       # ELSE we just need to compare directory names
    661       if (segment_ptr + 1 == len(f1_segments) or
    662           segment_ptr + 1 == len(f2_segments)):
    663         return len(f2_segments) - len(f1_segments)
    664       else:
    665         if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
    666           return 1
    667         elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
    668           return -1
    669         else:
    670           return 0
    671 
    672   def SetCachingHeaders(self, revalidate):
    673     """Set caching headers for the request."""
    674     max_age = self.MAX_AGE
    675     #self.response.headers['Expires'] = email.Utils.formatdate(
    676     #    time.time() + max_age, usegmt=True)
    677     cache_control = []
    678     if self.PUBLIC:
    679       cache_control.append('public')
    680     cache_control.append('max-age=%d' % max_age)
    681     if revalidate:
    682       cache_control.append('must-revalidate')
    683     self.response.headers['Cache-Control'] = ', '.join(cache_control)
    684 
    685   def GetFromCache(self, filename):
    686     """Get file from memcache, if available.
    687 
    688     Args:
    689       filename: The URL of the file to return
    690 
    691     Returns:
    692       The content of the file
    693     """
    694     return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))
    695 
    696   def StoreOrUpdateInCache(self, filename, data):
    697     """Store data in the cache.
    698 
    699     Store a piece of data in the memcache. Memcache has a maximum item size of
    700     1*10^6 bytes. If the data is too large, fail, but log the failure. Future
    701     work will consider compressing the data before storing or chunking it
    702 
    703     Args:
    704       filename: the name of the file to store
    705       data: the data of the file
    706 
    707     Returns:
    708       None
    709     """
    710     try:
    711       if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
    712         memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
    713     except (ValueError), err:
    714       logging.warning('Data size too large to cache\n%s' % err)
    715 
    716   def Write404Error(self):
    717     """Ouptut a simple 404 response."""
    718     self.error(404)
    719     self.response.out.write(
    720         ''.join(['<html><head><title>404: Not Found</title></head>',
    721                  '<body><b><h2>Error 404</h2><br/>',
    722                  'File not found</b></body></html>']))
    723 
    724   def StoreInNegativeCache(self, filename):
    725     """If a non-existant URL is accessed, cache this result as well.
    726 
    727     Future work should consider setting a maximum negative cache size to
    728     prevent it from from negatively impacting the real cache.
    729 
    730     Args:
    731       filename: URL to add ot negative cache
    732 
    733     Returns:
    734       None
    735     """
    736     memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)
    737 
    738   def GetFromNegativeCache(self, filename):
    739     """Retrieve from negative cache.
    740 
    741     Args:
    742       filename: URL to retreive
    743 
    744     Returns:
    745       The file contents if present in the negative cache.
    746     """
    747     return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))
    748 
    749 def main():
    750   application = webapp.WSGIApplication([('/([^/]+)/(.*)',
    751                                          MemcachedZipHandler)])
    752   util.run_wsgi_app(application)
    753 
    754 
    755 if __name__ == '__main__':
    756   main()
    757