Home | History | Annotate | Download | only in webpagereplay
      1 #!/usr/bin/env python
      2 # Copyright 2010 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #      http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 
     16 """Replays web pages under simulated network conditions.
     17 
     18 Must be run as administrator (sudo).
     19 
     20 To record web pages:
     21   1. Start the program in record mode.
     22      $ sudo ./replay.py --record archive.wpr
     23   2. Load the web pages you want to record in a web browser. It is important to
     24      clear browser caches before this so that all subresources are requested
     25      from the network.
     26   3. Kill the process to stop recording.
     27 
     28 To replay web pages:
     29   1. Start the program in replay mode with a previously recorded archive.
     30      $ sudo ./replay.py archive.wpr
     31   2. Load recorded pages in a web browser. A 404 will be served for any pages or
     32      resources not in the recorded archive.
     33 
     34 Network simulation examples:
     35   # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
     36   $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
     37 
     38   # 1% packet loss rate
     39   $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
     40 """
     41 
     42 import json
     43 import logging
     44 import optparse
     45 import os
     46 import socket
     47 import sys
     48 import traceback
     49 
     50 import customhandlers
     51 import dnsproxy
     52 import httparchive
     53 import httpclient
     54 import httpproxy
     55 import net_configs
     56 import platformsettings
     57 import rules_parser
     58 import script_injector
     59 import servermanager
     60 import trafficshaper
     61 
     62 if sys.version < '2.6':
     63   print 'Need Python 2.6 or greater.'
     64   sys.exit(1)
     65 
     66 
     67 def configure_logging(log_level_name, log_file_name=None):
     68   """Configure logging level and format.
     69 
     70   Args:
     71     log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
     72     log_file_name: a file name
     73   """
     74   if logging.root.handlers:
     75     logging.critical('A logging method (e.g. "logging.warn(...)")'
     76                      ' was called before logging was configured.')
     77   log_level = getattr(logging, log_level_name.upper())
     78   log_format = (
     79     '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d  '
     80     '%(message)s')
     81 
     82 
     83   logging.basicConfig(level=log_level, format=log_format)
     84   logger = logging.getLogger()
     85   if log_file_name:
     86     fh = logging.FileHandler(log_file_name)
     87     fh.setLevel(log_level)
     88     fh.setFormatter(logging.Formatter(log_format))
     89     logger.addHandler(fh)
     90   system_handler = platformsettings.get_system_logging_handler()
     91   if system_handler:
     92     logger.addHandler(system_handler)
     93 
     94 
     95 def AddDnsForward(server_manager, host):
     96   """Forward DNS traffic."""
     97   server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
     98 
     99 
    100 def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
    101                 http_archive):
    102   dns_filters = []
    103   if options.dns_private_passthrough:
    104     private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
    105     dns_filters.append(private_filter)
    106     server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
    107     server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
    108   if options.shaping_dns:
    109     delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
    110     dns_filters.append(delay_filter)
    111     server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
    112     server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
    113   server_manager.Append(dnsproxy.DnsProxyServer, host, port,
    114                         dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
    115 
    116 
    117 def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive):
    118   if options.rules_path:
    119     with open(options.rules_path) as file_obj:
    120       allowed_imports = [
    121           name.strip() for name in options.allowed_rule_imports.split(',')]
    122       rules = rules_parser.Rules(file_obj, allowed_imports)
    123     logging.info('Parsed %s rules:\n%s', options.rules_path, rules)
    124   else:
    125     rules = rules_parser.Rules()
    126   inject_script = script_injector.GetInjectScript(options.inject_scripts)
    127   custom_handlers = customhandlers.CustomHandlers(options, http_archive)
    128   custom_handlers.add_server_manager_handler(server_manager)
    129   archive_fetch = httpclient.ControllableHttpArchiveFetch(
    130       http_archive, real_dns_lookup,
    131       inject_script,
    132       options.diff_unknown_requests, options.record,
    133       use_closest_match=options.use_closest_match,
    134       scramble_images=options.scramble_images)
    135   server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
    136   server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
    137   server_manager.Append(
    138       httpproxy.HttpProxyServer,
    139       archive_fetch, custom_handlers, rules,
    140       host=host, port=options.port, use_delays=options.use_server_delay,
    141       **options.shaping_http)
    142   if options.ssl:
    143     if options.should_generate_certs:
    144       server_manager.Append(
    145           httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules,
    146           options.https_root_ca_cert_path, host=host, port=options.ssl_port,
    147           use_delays=options.use_server_delay, **options.shaping_http)
    148     else:
    149       server_manager.Append(
    150           httpproxy.SingleCertHttpsProxyServer, archive_fetch,
    151           custom_handlers, rules, options.https_root_ca_cert_path, host=host,
    152           port=options.ssl_port, use_delays=options.use_server_delay,
    153           **options.shaping_http)
    154   if options.http_to_https_port:
    155     server_manager.Append(
    156         httpproxy.HttpToHttpsProxyServer,
    157         archive_fetch, custom_handlers, rules,
    158         host=host, port=options.http_to_https_port,
    159         use_delays=options.use_server_delay,
    160         **options.shaping_http)
    161 
    162 
    163 def AddTrafficShaper(server_manager, options, host):
    164   if options.shaping_dummynet:
    165     server_manager.AppendTrafficShaper(
    166         trafficshaper.TrafficShaper, host=host,
    167         use_loopback=not options.server_mode and host == '127.0.0.1',
    168         **options.shaping_dummynet)
    169 
    170 
    171 class OptionsWrapper(object):
    172   """Add checks, updates, and methods to option values.
    173 
    174   Example:
    175     options, args = option_parser.parse_args()
    176     options = OptionsWrapper(options, option_parser)  # run checks and updates
    177     if options.record and options.HasTrafficShaping():
    178        [...]
    179   """
    180   _TRAFFICSHAPING_OPTIONS = {
    181       'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'}
    182   _CONFLICTING_OPTIONS = (
    183       ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
    184                   'spdy', 'use_server_delay')),
    185       ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
    186                   'use_server_delay')),  # same as --record
    187       ('net', ('down', 'up', 'delay_ms')),
    188       ('server', ('server_mode',)),
    189   )
    190 
    191   def __init__(self, options, parser):
    192     self._options = options
    193     self._parser = parser
    194     self._nondefaults = set([
    195         name for name, value in parser.defaults.items()
    196         if getattr(options, name) != value])
    197     self._CheckConflicts()
    198     self._CheckValidIp('host')
    199     self._CheckFeatureSupport()
    200     self._MassageValues()
    201 
    202   def _CheckConflicts(self):
    203     """Give an error if mutually exclusive options are used."""
    204     for option, bad_options in self._CONFLICTING_OPTIONS:
    205       if option in self._nondefaults:
    206         for bad_option in bad_options:
    207           if bad_option in self._nondefaults:
    208             self._parser.error('Option --%s cannot be used with --%s.' %
    209                                 (bad_option, option))
    210 
    211   def _CheckValidIp(self, name):
    212     """Give an error if option |name| is not a valid IPv4 address."""
    213     value = getattr(self._options, name)
    214     if value:
    215       try:
    216         socket.inet_aton(value)
    217       except Exception:
    218         self._parser.error('Option --%s must be a valid IPv4 address.' % name)
    219 
    220   def _CheckFeatureSupport(self):
    221     if (self._options.should_generate_certs and
    222         not platformsettings.HasSniSupport()):
    223       self._parser.error('Option --should_generate_certs requires pyOpenSSL '
    224                          '0.13 or greater for SNI support.')
    225 
    226   def _ShapingKeywordArgs(self, shaping_key):
    227     """Return the shaping keyword args for |shaping_key|.
    228 
    229     Args:
    230       shaping_key: one of 'dummynet', 'dns', 'http'.
    231     Returns:
    232       {}  # if shaping_key does not apply, or options have default values.
    233       {k: v, ...}
    234     """
    235     kwargs = {}
    236     def AddItemIfSet(d, kw_key, opt_key=None):
    237       opt_key = opt_key or kw_key
    238       if opt_key in self._nondefaults:
    239         d[kw_key] = getattr(self, opt_key)
    240     if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
    241         self.shaping_type == shaping_key):
    242       AddItemIfSet(kwargs, 'delay_ms')
    243       if shaping_key in ('dummynet', 'http'):
    244         AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
    245         AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
    246         if shaping_key == 'dummynet':
    247           AddItemIfSet(kwargs, 'packet_loss_rate')
    248           AddItemIfSet(kwargs, 'init_cwnd')
    249         elif self.shaping_type != 'none':
    250           if 'packet_loss_rate' in self._nondefaults:
    251             logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
    252                          self.shaping_type, self.packet_loss_rate)
    253           if 'init_cwnd' in self._nondefaults:
    254             logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
    255                          self.shaping_type, self.init_cwnd)
    256     return kwargs
    257 
    258   def _MassageValues(self):
    259     """Set options that depend on the values of other options."""
    260     if self.append and not self.record:
    261       self._options.record = True
    262     if self.net:
    263       self._options.down, self._options.up, self._options.delay_ms = \
    264           net_configs.GetNetConfig(self.net)
    265       self._nondefaults.update(['down', 'up', 'delay_ms'])
    266     if not self.ssl:
    267       self._options.https_root_ca_cert_path = None
    268     self.shaping_dns = self._ShapingKeywordArgs('dns')
    269     self.shaping_http = self._ShapingKeywordArgs('http')
    270     self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
    271 
    272   def __getattr__(self, name):
    273     """Make the original option values available."""
    274     return getattr(self._options, name)
    275 
    276   def __repr__(self):
    277     """Return a json representation of the original options dictionary."""
    278     return json.dumps(self._options.__dict__)
    279 
    280   def IsRootRequired(self):
    281     """Returns True iff the options require whole program root access."""
    282     if self.server:
    283       return True
    284 
    285     def IsPrivilegedPort(port):
    286       return port and port < 1024
    287 
    288     if IsPrivilegedPort(self.port) or (self.ssl and
    289                                        IsPrivilegedPort(self.ssl_port)):
    290       return True
    291 
    292     if self.dns_forwarding:
    293       if IsPrivilegedPort(self.dns_port):
    294         return True
    295       if not self.server_mode and self.host == '127.0.0.1':
    296         return True
    297 
    298     return False
    299 
    300 
    301 def replay(options, replay_filename):
    302   if options.admin_check and options.IsRootRequired():
    303     platformsettings.rerun_as_administrator()
    304   configure_logging(options.log_level, options.log_file)
    305   server_manager = servermanager.ServerManager(options.record)
    306   if options.server:
    307     AddDnsForward(server_manager, options.server)
    308   else:
    309     real_dns_lookup = dnsproxy.RealDnsLookup(
    310         name_servers=[platformsettings.get_original_primary_nameserver()])
    311     if options.record:
    312       httparchive.HttpArchive.AssertWritable(replay_filename)
    313       if options.append and os.path.exists(replay_filename):
    314         http_archive = httparchive.HttpArchive.Load(replay_filename)
    315         logging.info('Appending to %s (loaded %d existing responses)',
    316                      replay_filename, len(http_archive))
    317       else:
    318         http_archive = httparchive.HttpArchive()
    319     else:
    320       http_archive = httparchive.HttpArchive.Load(replay_filename)
    321       logging.info('Loaded %d responses from %s',
    322                    len(http_archive), replay_filename)
    323     server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
    324     server_manager.AppendRecordCallback(http_archive.clear)
    325 
    326     ipfw_dns_host = None
    327     if options.dns_forwarding or options.shaping_dummynet:
    328       # compute the ip/host used for the DNS server and traffic shaping
    329       ipfw_dns_host = options.host
    330       if not ipfw_dns_host:
    331         ipfw_dns_host = platformsettings.get_server_ip_address(
    332             options.server_mode)
    333 
    334     if options.dns_forwarding:
    335       if not options.server_mode and ipfw_dns_host == '127.0.0.1':
    336         AddDnsForward(server_manager, ipfw_dns_host)
    337       AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
    338                   real_dns_lookup, http_archive)
    339     if options.ssl and options.https_root_ca_cert_path is None:
    340       options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__),
    341                                                      'wpr_cert.pem')
    342     http_proxy_address = options.host
    343     if not http_proxy_address:
    344       http_proxy_address = platformsettings.get_httpproxy_ip_address(
    345           options.server_mode)
    346     AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
    347                 http_archive)
    348     AddTrafficShaper(server_manager, options, ipfw_dns_host)
    349 
    350   exit_status = 0
    351   try:
    352     server_manager.Run()
    353   except KeyboardInterrupt:
    354     logging.info('Shutting down.')
    355   except (dnsproxy.DnsProxyException,
    356           trafficshaper.TrafficShaperException,
    357           platformsettings.NotAdministratorError,
    358           platformsettings.DnsUpdateError) as e:
    359     logging.critical('%s: %s', e.__class__.__name__, e)
    360     exit_status = 1
    361   except Exception:
    362     logging.critical(traceback.format_exc())
    363     exit_status = 2
    364 
    365   if options.record:
    366     http_archive.Persist(replay_filename)
    367     logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
    368   return exit_status
    369 
    370 
    371 def GetOptionParser():
    372   class PlainHelpFormatter(optparse.IndentedHelpFormatter):
    373     def format_description(self, description):
    374       if description:
    375         return description + '\n'
    376       else:
    377         return ''
    378   option_parser = optparse.OptionParser(
    379       usage='%prog [options] replay_file',
    380       formatter=PlainHelpFormatter(),
    381       description=__doc__,
    382       epilog='http://code.google.com/p/web-page-replay/')
    383 
    384   option_parser.add_option('-r', '--record', default=False,
    385       action='store_true',
    386       help='Download real responses and record them to replay_file')
    387   option_parser.add_option('--append', default=False,
    388       action='store_true',
    389       help='Append responses to replay_file.')
    390   option_parser.add_option('-l', '--log_level', default='debug',
    391       action='store',
    392       type='choice',
    393       choices=('debug', 'info', 'warning', 'error', 'critical'),
    394       help='Minimum verbosity level to log')
    395   option_parser.add_option('-f', '--log_file', default=None,
    396       action='store',
    397       type='string',
    398       help='Log file to use in addition to writting logs to stderr.')
    399 
    400   network_group = optparse.OptionGroup(option_parser,
    401       'Network Simulation Options',
    402       'These options configure the network simulation in replay mode')
    403   network_group.add_option('-u', '--up', default='0',
    404       action='store',
    405       type='string',
    406       help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
    407   network_group.add_option('-d', '--down', default='0',
    408       action='store',
    409       type='string',
    410       help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
    411   network_group.add_option('-m', '--delay_ms', default='0',
    412       action='store',
    413       type='string',
    414       help='Propagation delay (latency) in milliseconds. Zero means no delay.')
    415   network_group.add_option('-p', '--packet_loss_rate', default='0',
    416       action='store',
    417       type='string',
    418       help='Packet loss rate in range [0..1]. Zero means no loss.')
    419   network_group.add_option('-w', '--init_cwnd', default='0',
    420       action='store',
    421       type='string',
    422       help='Set initial cwnd (linux only, requires kernel patch)')
    423   network_group.add_option('--net', default=None,
    424       action='store',
    425       type='choice',
    426       choices=net_configs.NET_CONFIG_NAMES,
    427       help='Select a set of network options: %s.' % ', '.join(
    428           net_configs.NET_CONFIG_NAMES))
    429   network_group.add_option('--shaping_type', default='dummynet',
    430       action='store',
    431       choices=('dummynet', 'proxy'),
    432       help='When shaping is configured (i.e. --up, --down, etc.) decides '
    433            'whether to use |dummynet| (default), or |proxy| servers.')
    434   option_parser.add_option_group(network_group)
    435 
    436   harness_group = optparse.OptionGroup(option_parser,
    437       'Replay Harness Options',
    438       'These advanced options configure various aspects of the replay harness')
    439   harness_group.add_option('-S', '--server', default=None,
    440       action='store',
    441       type='string',
    442       help='IP address of host running "replay.py --server_mode". '
    443            'This only changes the primary DNS nameserver to use the given IP.')
    444   harness_group.add_option('-M', '--server_mode', default=False,
    445       action='store_true',
    446       help='Run replay DNS & http proxies, and trafficshaping on --port '
    447            'without changing the primary DNS nameserver. '
    448            'Other hosts may connect to this using "replay.py --server" '
    449            'or by pointing their DNS to this server.')
    450   harness_group.add_option('-i', '--inject_scripts', default='deterministic.js',
    451       action='store',
    452       dest='inject_scripts',
    453       help='A comma separated list of JavaScript sources to inject in all '
    454            'pages. By default a script is injected that eliminates sources '
    455            'of entropy such as Date() and Math.random() deterministic. '
    456            'CAUTION: Without deterministic.js, many pages will not replay.')
    457   harness_group.add_option('-D', '--no-diff_unknown_requests', default=True,
    458       action='store_false',
    459       dest='diff_unknown_requests',
    460       help='During replay, do not show a diff of unknown requests against '
    461            'their nearest match in the archive.')
    462   harness_group.add_option('-C', '--use_closest_match', default=False,
    463       action='store_true',
    464       dest='use_closest_match',
    465       help='During replay, if a request is not found, serve the closest match'
    466            'in the archive instead of giving a 404.')
    467   harness_group.add_option('-U', '--use_server_delay', default=False,
    468       action='store_true',
    469       dest='use_server_delay',
    470       help='During replay, simulate server delay by delaying response time to'
    471            'requests.')
    472   harness_group.add_option('-I', '--screenshot_dir', default=None,
    473       action='store',
    474       type='string',
    475       help='Save PNG images of the loaded page in the given directory.')
    476   harness_group.add_option('-P', '--no-dns_private_passthrough', default=True,
    477       action='store_false',
    478       dest='dns_private_passthrough',
    479       help='Don\'t forward DNS requests that resolve to private network '
    480            'addresses. CAUTION: With this option important services like '
    481            'Kerberos will resolve to the HTTP proxy address.')
    482   harness_group.add_option('-x', '--no-dns_forwarding', default=True,
    483       action='store_false',
    484       dest='dns_forwarding',
    485       help='Don\'t forward DNS requests to the local replay server. '
    486            'CAUTION: With this option an external mechanism must be used to '
    487            'forward traffic to the replay server.')
    488   harness_group.add_option('--host', default=None,
    489       action='store',
    490       type='str',
    491       help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
    492            '127.0.0.1, depending on --server_mode and platform.')
    493   harness_group.add_option('-o', '--port', default=80,
    494       action='store',
    495       type='int',
    496       help='Port number to listen on.')
    497   harness_group.add_option('--ssl_port', default=443,
    498       action='store',
    499       type='int',
    500       help='SSL port number to listen on.')
    501   harness_group.add_option('--http_to_https_port', default=None,
    502       action='store',
    503       type='int',
    504       help='Port on which WPR will listen for HTTP requests that it will send '
    505            'along as HTTPS requests.')
    506   harness_group.add_option('--dns_port', default=53,
    507       action='store',
    508       type='int',
    509       help='DNS port number to listen on.')
    510   harness_group.add_option('-c', '--https_root_ca_cert_path', default=None,
    511       action='store',
    512       type='string',
    513       help='Certificate file to use with SSL (gets auto-generated if needed).')
    514   harness_group.add_option('--no-ssl', default=True,
    515       action='store_false',
    516       dest='ssl',
    517       help='Do not setup an SSL proxy.')
    518   option_parser.add_option_group(harness_group)
    519   harness_group.add_option('--should_generate_certs', default=False,
    520       action='store_true',
    521       help='Use OpenSSL to generate certificate files for requested hosts.')
    522   harness_group.add_option('--no-admin-check', default=True,
    523       action='store_false',
    524       dest='admin_check',
    525       help='Do not check if administrator access is needed.')
    526   harness_group.add_option('--scramble_images', default=False,
    527       action='store_true',
    528       dest='scramble_images',
    529       help='Scramble image responses.')
    530   harness_group.add_option('--rules_path', default=None,
    531       action='store',
    532       help='Path of file containing Python rules.')
    533   harness_group.add_option('--allowed_rule_imports', default='rules',
    534       action='store',
    535       help='A comma-separate list of allowed rule imports, or \'*\' to allow'
    536            ' all packages.  Defaults to \'%default\'.')
    537   return option_parser
    538 
    539 
    540 def main():
    541   option_parser = GetOptionParser()
    542   options, args = option_parser.parse_args()
    543   options = OptionsWrapper(options, option_parser)
    544 
    545   if options.server:
    546     replay_filename = None
    547   elif len(args) != 1:
    548     option_parser.error('Must specify a replay_file')
    549   else:
    550     replay_filename = args[0]
    551 
    552   return replay(options, replay_filename)
    553 
    554 
    555 if __name__ == '__main__':
    556   sys.exit(main())
    557