Home | History | Annotate | Download | only in web-page-replay
      1 # Copyright 2015 Google Inc. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #      http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 r"""Rules parser.
     16 
     17 The input syntax is:
     18   [{"comment": ignored_value},
     19    {"rule_class_name1": {"arg1": value, "arg2": value, ...}},
     20    {"rule_class_name2": {"arg1": value, "arg2": value, ...}},
     21    ...]
     22 E.g.:
     23   [{"comment": "this text is ignored"},
     24    {"SendStatus": {"url": "example\\.com/ss.*", "status": 204}},
     25    {"ModifyUrl": {"url": "(example\\.com)(/.*)", "new_url": "{1}"}}
     26   ]
     27 """
     28 
     29 import json
     30 import re
     31 
     32 
     33 class Error(Exception):
     34   pass
     35 
     36 
     37 class Rules(object):
     38 
     39   """A parsed sequence of Rule objects."""
     40 
     41   def __init__(self, file_obj=None, allowed_imports=None):
     42     """Initializes from the given file object.
     43 
     44     Args:
     45       file_obj: A file object.
     46       allowed_imports: A set of strings, defaults to {'rules'}.
     47         Use {'*'} to allow any import path.
     48     """
     49     if allowed_imports is None:
     50       allowed_imports = {'rules'}
     51     self._rules = [] if file_obj is None else _Load(file_obj, allowed_imports)
     52 
     53   def Contains(self, rule_type_name):
     54     """Returns true if any rule matches the given type name.
     55 
     56     Args:
     57       rule_type_name: a string.
     58     Returns:
     59       True if any rule matches, else False.
     60     """
     61     return any(rule for rule in self._rules if rule.IsType(rule_type_name))
     62 
     63   def Find(self, rule_type_name):
     64     """Returns a _Rule object containing all rules with the given type name.
     65 
     66     Args:
     67       rule_type_name: a string.
     68     Returns:
     69       A callable object that expects two arguments:
     70         request: the httparchive ArchivedHttpRequest
     71         response: the httparchive ArchivedHttpResponse
     72       and returns the rule return_value of the first rule that returns
     73       should_stop == True, or the last rule's return_value if all rules returns
     74       should_stop == False.
     75     """
     76     matches = [rule for rule in self._rules if rule.IsType(rule_type_name)]
     77     return _Rule(matches)
     78 
     79   def __str__(self):
     80     return _ToString(self._rules)
     81 
     82   def __repr__(self):
     83     return str(self)
     84 
     85 
     86 class _Rule(object):
     87   """Calls a sequence of Rule objects until one returns should_stop."""
     88 
     89   def __init__(self, rules):
     90     self._rules = rules
     91 
     92   def __call__(self, request, response):
     93     """Calls the rules until one returns should_stop.
     94 
     95     Args:
     96       request: the httparchive ArchivedHttpRequest.
     97       response: the httparchive ArchivedHttpResponse, which may be None.
     98     Returns:
     99       The rule return_value of the first rule that returns should_stop == True,
    100       or the last rule's return_value if all rules return should_stop == False.
    101     """
    102     return_value = None
    103     for rule in self._rules:
    104       should_stop, return_value = rule.ApplyRule(
    105           return_value, request, response)
    106       if should_stop:
    107         break
    108     return return_value
    109 
    110   def __str__(self):
    111     return _ToString(self._rules)
    112 
    113   def __repr__(self):
    114     return str(self)
    115 
    116 
    117 def _ToString(rules):
    118   """Formats a sequence of Rule objects into a string."""
    119   return '[\n%s\n]' % '\n'.join('%s' % rule for rule in rules)
    120 
    121 
    122 def _Load(file_obj, allowed_imports):
    123   """Parses and evaluates all rules in the given file.
    124 
    125   Args:
    126     file_obj: a file object.
    127     allowed_imports: a sequence of strings, e.g.: {'rules'}.
    128   Returns:
    129     a list of rules.
    130   """
    131   rules = []
    132   entries = json.load(file_obj)
    133   if not isinstance(entries, list):
    134     raise Error('Expecting a list, not %s', type(entries))
    135   for i, entry in enumerate(entries):
    136     if not isinstance(entry, dict):
    137       raise Error('%s: Expecting a dict, not %s', i, type(entry))
    138     if len(entry) != 1:
    139       raise Error('%s: Expecting 1 item, not %d', i, len(entry))
    140     name, args = next(entry.iteritems())
    141     if not isinstance(name, basestring):
    142       raise Error('%s: Expecting a string TYPE, not %s', i, type(name))
    143     if not re.match(r'(\w+\.)*\w+$', name):
    144       raise Error('%s: Expecting a classname TYPE, not %s', i, name)
    145     if name == 'comment':
    146       continue
    147     if not isinstance(args, dict):
    148       raise Error('%s: Expecting a dict ARGS, not %s', i, type(args))
    149     fullname = str(name)
    150     if '.' not in fullname:
    151       fullname = 'rules.%s' % fullname
    152 
    153     modulename, classname = fullname.rsplit('.', 1)
    154     if '*' not in allowed_imports and modulename not in allowed_imports:
    155       raise Error('%s: Package %r is not in allowed_imports', i, modulename)
    156 
    157     module = __import__(modulename, fromlist=[classname])
    158     clazz = getattr(module, classname)
    159 
    160     missing = {s for s in ('IsType', 'ApplyRule') if not hasattr(clazz, s)}
    161     if missing:
    162       raise Error('%s: %s lacks %s', i, clazz.__name__, ' and '.join(missing))
    163 
    164     rule = clazz(**args)
    165 
    166     rules.append(rule)
    167   return rules
    168