Home | History | Annotate | Download | only in webpagereplay
      1 #!/usr/bin/env python
      2 # Copyright 2013 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #      http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 
     16 """Inject javascript into html page source code."""
     17 
     18 import logging
     19 import os
     20 import re
     21 import util
     22 
     23 DOCTYPE_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<!doctype html>',
     24                         re.IGNORECASE | re.DOTALL)
     25 HTML_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<html.*?>',
     26                      re.IGNORECASE | re.DOTALL)
     27 HEAD_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<head.*?>',
     28                      re.IGNORECASE | re.DOTALL)
     29 
     30 
     31 def GetInjectScript(scripts):
     32   """Loads |scripts| from disk and returns a string of their content."""
     33   lines = []
     34   if scripts:
     35     if not isinstance(scripts, list):
     36       scripts = scripts.split(',')
     37     for script in scripts:
     38       if os.path.exists(script):
     39         with open(script) as f:
     40           lines.extend(f.read())
     41       elif util.resource_exists(script):
     42         lines.extend(util.resource_string(script))
     43       else:
     44         raise Exception('Script does not exist: %s', script)
     45 
     46   def MinifyScript(script):
     47     """Remove C-style comments and line breaks from script.
     48     Note: statements must be ';' terminated, and not depending on newline"""
     49     # Regex adapted from http://ostermiller.org/findcomment.html.
     50     MULTILINE_COMMENT_RE = re.compile(r'/\*.*?\*/', re.DOTALL | re.MULTILINE)
     51     SINGLELINE_COMMENT_RE = re.compile(r'//.*', re.MULTILINE)
     52     # Remove C-style comments from JS.
     53     script = re.sub(MULTILINE_COMMENT_RE, '', script)
     54     script = re.sub(SINGLELINE_COMMENT_RE, '', script)
     55     # Remove line breaks.
     56     script = script.translate(None, '\r\n')
     57     return script
     58 
     59   return MinifyScript(''.join(lines))
     60 
     61 
     62 def InjectScript(content, content_type, script_to_inject):
     63   """Inject |script_to_inject| into |content| if |content_type| is 'text/html'.
     64 
     65   Inject |script_to_inject| into |content| immediately after <head>, <html> or
     66   <!doctype html>, if one of them is found. Otherwise, inject at the beginning.
     67 
     68   Returns:
     69     content, already_injected
     70     |content| is the new content if script is injected, otherwise the original.
     71     |already_injected| indicates if |script_to_inject| is already in |content|.
     72   """
     73   already_injected = False
     74   if content_type and content_type == 'text/html':
     75     already_injected = not content or script_to_inject in content
     76     if not already_injected:
     77       def InsertScriptAfter(matchobj):
     78         return '%s<script>%s</script>' % (matchobj.group(0), script_to_inject)
     79 
     80       content, is_injected = HEAD_RE.subn(InsertScriptAfter, content, 1)
     81       if not is_injected:
     82         content, is_injected = HTML_RE.subn(InsertScriptAfter, content, 1)
     83       if not is_injected:
     84         content, is_injected = DOCTYPE_RE.subn(InsertScriptAfter, content, 1)
     85       if not is_injected:
     86         content = '<script>%s</script>%s' % (script_to_inject, content)
     87         logging.warning('Inject at the very beginning, because no tag of '
     88                         '<head>, <html> or <!doctype html> is found.')
     89   return content, already_injected
     90