Home | History | Annotate | Download | only in server2
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 from collections import defaultdict
      6 import posixpath
      7 
      8 from future import Future
      9 from path_util import SplitParent
     10 from special_paths import SITE_VERIFICATION_FILE
     11 
     12 def _Normalize(file_name, splittext=False):
     13   normalized = file_name
     14   if splittext:
     15     normalized = posixpath.splitext(file_name)[0]
     16   normalized = normalized.replace('.', '').replace('-', '').replace('_', '')
     17   return normalized.lower()
     18 
     19 def _CommonNormalizedPrefix(first_file, second_file):
     20   return posixpath.commonprefix((_Normalize(first_file),
     21                                  _Normalize(second_file)))
     22 
     23 
     24 class PathCanonicalizer(object):
     25   '''Transforms paths into their canonical forms. Since the docserver has had
     26   many incarnations - e.g. there didn't use to be apps/ - there may be old
     27   paths lying around the webs. We try to redirect those to where they are now.
     28   '''
     29   def __init__(self,
     30                file_system,
     31                object_store_creator,
     32                strip_extensions):
     33     # |strip_extensions| is a list of file extensions (e.g. .html) that should
     34     # be stripped for a path's canonical form.
     35     self._cache = object_store_creator.Create(
     36         PathCanonicalizer, category=file_system.GetIdentity())
     37     self._file_system = file_system
     38     self._strip_extensions = strip_extensions
     39 
     40   def _LoadCache(self):
     41     cached_future = self._cache.GetMulti(('canonical_paths',
     42                                           'simplified_paths_map'))
     43 
     44     def resolve():
     45       # |canonical_paths| is the pre-calculated set of canonical paths.
     46       # |simplified_paths_map| is a lazily populated mapping of simplified file
     47       # names to a list of full paths that contain them. For example,
     48       #  - browseraction: [extensions/browserAction.html]
     49       #  - storage: [apps/storage.html, extensions/storage.html]
     50       cached = cached_future.Get()
     51       canonical_paths, simplified_paths_map = (
     52           cached.get('canonical_paths'), cached.get('simplified_paths_map'))
     53 
     54       if canonical_paths is None:
     55         assert simplified_paths_map is None
     56         canonical_paths = set()
     57         simplified_paths_map = defaultdict(list)
     58         for base, dirs, files in self._file_system.Walk(''):
     59           for path in dirs + files:
     60             path_without_ext, ext = posixpath.splitext(path)
     61             canonical_path = posixpath.join(base, path_without_ext)
     62             if (ext not in self._strip_extensions or
     63                 path == SITE_VERIFICATION_FILE):
     64               canonical_path += ext
     65             canonical_paths.add(canonical_path)
     66             simplified_paths_map[_Normalize(path, splittext=True)].append(
     67                 canonical_path)
     68         # Store |simplified_paths_map| sorted. Ties in length are broken by
     69         # taking the shortest, lexicographically smallest path.
     70         for path_list in simplified_paths_map.itervalues():
     71           path_list.sort(key=lambda p: (len(p), p))
     72         self._cache.SetMulti({
     73           'canonical_paths': canonical_paths,
     74           'simplified_paths_map': simplified_paths_map,
     75         })
     76       else:
     77         assert simplified_paths_map is not None
     78 
     79       return canonical_paths, simplified_paths_map
     80 
     81     return Future(callback=resolve)
     82 
     83   def Canonicalize(self, path):
     84     '''Returns the canonical path for |path|.
     85     '''
     86     canonical_paths, simplified_paths_map = self._LoadCache().Get()
     87 
     88     # Path may already be the canonical path.
     89     if path in canonical_paths:
     90       return path
     91 
     92     # Path not found. Our single heuristic: find |base| in the directory
     93     # structure with the longest common prefix of |path|.
     94     _, base = SplitParent(path)
     95 
     96     # Paths with a non-extension dot separator lose information in
     97     # _SimplifyFileName, so we try paths both with and without the dot to
     98     # maximize the possibility of finding the right path.
     99     potential_paths = (
    100         simplified_paths_map.get(_Normalize(base), []) +
    101         simplified_paths_map.get(_Normalize(base, splittext=True), []))
    102 
    103     if potential_paths == []:
    104       # There is no file with anything close to that name.
    105       return path
    106 
    107     # The most likely canonical file is the one with the longest common prefix
    108     # with |path|. This is slightly weaker than it could be; |path| is
    109     # compared without symbols, not the simplified form of |path|,
    110     # which may matter.
    111     max_prefix = potential_paths[0]
    112     max_prefix_length = len(_CommonNormalizedPrefix(max_prefix, path))
    113     for path_for_file in potential_paths[1:]:
    114       prefix_length = len(_CommonNormalizedPrefix(path_for_file, path))
    115       if prefix_length > max_prefix_length:
    116         max_prefix, max_prefix_length = path_for_file, prefix_length
    117 
    118     return max_prefix
    119 
    120   def Cron(self):
    121     return self._LoadCache()
    122