Home | History | Annotate | Download | only in web-page-replay
      1 #!/usr/bin/env python
      2 # Copyright 2010 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #      http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 
     16 """Replays web pages under simulated network conditions.
     17 
     18 Must be run as administrator (sudo).
     19 
     20 To record web pages:
     21   1. Start the program in record mode.
     22      $ sudo ./replay.py --record archive.wpr
     23   2. Load the web pages you want to record in a web browser. It is important to
     24      clear browser caches before this so that all subresources are requested
     25      from the network.
     26   3. Kill the process to stop recording.
     27 
     28 To replay web pages:
     29   1. Start the program in replay mode with a previously recorded archive.
     30      $ sudo ./replay.py archive.wpr
     31   2. Load recorded pages in a web browser. A 404 will be served for any pages or
     32      resources not in the recorded archive.
     33 
     34 Network simulation examples:
     35   # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
     36   $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
     37 
     38   # 1% packet loss rate
     39   $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
     40 """
     41 
     42 import argparse
     43 import json
     44 import logging
     45 import os
     46 import socket
     47 import sys
     48 import traceback
     49 
     50 import customhandlers
     51 import dnsproxy
     52 import httparchive
     53 import httpclient
     54 import httpproxy
     55 import net_configs
     56 import platformsettings
     57 import rules_parser
     58 import script_injector
     59 import servermanager
     60 import trafficshaper
     61 
     62 if sys.version < '2.6':
     63   print 'Need Python 2.6 or greater.'
     64   sys.exit(1)
     65 
     66 
     67 def configure_logging(log_level_name, log_file_name=None):
     68   """Configure logging level and format.
     69 
     70   Args:
     71     log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
     72     log_file_name: a file name
     73   """
     74   if logging.root.handlers:
     75     logging.critical('A logging method (e.g. "logging.warn(...)")'
     76                      ' was called before logging was configured.')
     77   log_level = getattr(logging, log_level_name.upper())
     78   log_format = (
     79     '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d  '
     80     '%(message)s')
     81 
     82 
     83   logging.basicConfig(level=log_level, format=log_format)
     84   logger = logging.getLogger()
     85   if log_file_name:
     86     fh = logging.FileHandler(log_file_name)
     87     fh.setLevel(log_level)
     88     fh.setFormatter(logging.Formatter(log_format))
     89     logger.addHandler(fh)
     90   system_handler = platformsettings.get_system_logging_handler()
     91   if system_handler:
     92     logger.addHandler(system_handler)
     93 
     94 
     95 def AddDnsForward(server_manager, host):
     96   """Forward DNS traffic."""
     97   server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
     98 
     99 
    100 def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
    101                 http_archive):
    102   dns_filters = []
    103   if options.dns_private_passthrough:
    104     private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
    105     dns_filters.append(private_filter)
    106     server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
    107     server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
    108   if options.shaping_dns:
    109     delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
    110     dns_filters.append(delay_filter)
    111     server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
    112     server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
    113   server_manager.Append(dnsproxy.DnsProxyServer, host, port,
    114                         dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
    115 
    116 
    117 def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive):
    118   if options.rules_path:
    119     with open(options.rules_path) as file_obj:
    120       allowed_imports = [
    121           name.strip() for name in options.allowed_rule_imports.split(',')]
    122       rules = rules_parser.Rules(file_obj, allowed_imports)
    123     logging.info('Parsed %s rules:\n%s', options.rules_path, rules)
    124   else:
    125     rules = rules_parser.Rules()
    126   inject_script = script_injector.GetInjectScript(options.inject_scripts)
    127   custom_handlers = customhandlers.CustomHandlers(options, http_archive)
    128   custom_handlers.add_server_manager_handler(server_manager)
    129   archive_fetch = httpclient.ControllableHttpArchiveFetch(
    130       http_archive, real_dns_lookup,
    131       inject_script,
    132       options.diff_unknown_requests, options.record,
    133       use_closest_match=options.use_closest_match,
    134       scramble_images=options.scramble_images)
    135   server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
    136   server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
    137   allow_generate_304 = not options.record
    138   server_manager.Append(
    139       httpproxy.HttpProxyServer,
    140       archive_fetch, custom_handlers, rules,
    141       host=host, port=options.port, use_delays=options.use_server_delay,
    142       allow_generate_304=allow_generate_304,
    143       **options.shaping_http)
    144   if options.ssl:
    145     if options.should_generate_certs:
    146       server_manager.Append(
    147           httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules,
    148           options.https_root_ca_cert_path, host=host, port=options.ssl_port,
    149           allow_generate_304=allow_generate_304,
    150           use_delays=options.use_server_delay, **options.shaping_http)
    151     else:
    152       server_manager.Append(
    153           httpproxy.SingleCertHttpsProxyServer, archive_fetch,
    154           custom_handlers, rules, options.https_root_ca_cert_path, host=host,
    155           port=options.ssl_port, use_delays=options.use_server_delay,
    156           allow_generate_304=allow_generate_304,
    157           **options.shaping_http)
    158   if options.http_to_https_port:
    159     server_manager.Append(
    160         httpproxy.HttpToHttpsProxyServer,
    161         archive_fetch, custom_handlers, rules,
    162         host=host, port=options.http_to_https_port,
    163         use_delays=options.use_server_delay,
    164         allow_generate_304=allow_generate_304,
    165         **options.shaping_http)
    166 
    167 
    168 def AddTrafficShaper(server_manager, options, host):
    169   if options.shaping_dummynet:
    170     server_manager.AppendTrafficShaper(
    171         trafficshaper.TrafficShaper, host=host,
    172         use_loopback=not options.server_mode and host == '127.0.0.1',
    173         **options.shaping_dummynet)
    174 
    175 
    176 class OptionsWrapper(object):
    177   """Add checks, updates, and methods to option values.
    178 
    179   Example:
    180     options, args = arg_parser.parse_args()
    181     options = OptionsWrapper(options, arg_parser)  # run checks and updates
    182     if options.record and options.HasTrafficShaping():
    183        [...]
    184   """
    185   _TRAFFICSHAPING_OPTIONS = {
    186       'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'}
    187   _CONFLICTING_OPTIONS = (
    188       ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
    189                   'spdy', 'use_server_delay')),
    190       ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
    191                   'use_server_delay')),  # same as --record
    192       ('net', ('down', 'up', 'delay_ms')),
    193       ('server', ('server_mode',)),
    194   )
    195 
    196   def __init__(self, options, parser):
    197     self._options = options
    198     self._parser = parser
    199     self._nondefaults = set([
    200         action.dest for action in parser._optionals._actions
    201         if getattr(options, action.dest, action.default) is not action.default])
    202     self._CheckConflicts()
    203     self._CheckValidIp('host')
    204     self._CheckFeatureSupport()
    205     self._MassageValues()
    206 
    207   def _CheckConflicts(self):
    208     """Give an error if mutually exclusive options are used."""
    209     for option, bad_options in self._CONFLICTING_OPTIONS:
    210       if option in self._nondefaults:
    211         for bad_option in bad_options:
    212           if bad_option in self._nondefaults:
    213             self._parser.error('Option --%s cannot be used with --%s.' %
    214                                 (bad_option, option))
    215 
    216   def _CheckValidIp(self, name):
    217     """Give an error if option |name| is not a valid IPv4 address."""
    218     value = getattr(self._options, name)
    219     if value:
    220       try:
    221         socket.inet_aton(value)
    222       except Exception:
    223         self._parser.error('Option --%s must be a valid IPv4 address.' % name)
    224 
    225   def _CheckFeatureSupport(self):
    226     if (self._options.should_generate_certs and
    227         not platformsettings.HasSniSupport()):
    228       self._parser.error('Option --should_generate_certs requires pyOpenSSL '
    229                          '0.13 or greater for SNI support.')
    230 
    231   def _ShapingKeywordArgs(self, shaping_key):
    232     """Return the shaping keyword args for |shaping_key|.
    233 
    234     Args:
    235       shaping_key: one of 'dummynet', 'dns', 'http'.
    236     Returns:
    237       {}  # if shaping_key does not apply, or options have default values.
    238       {k: v, ...}
    239     """
    240     kwargs = {}
    241     def AddItemIfSet(d, kw_key, opt_key=None):
    242       opt_key = opt_key or kw_key
    243       if opt_key in self._nondefaults:
    244         d[kw_key] = getattr(self, opt_key)
    245     if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
    246         self.shaping_type == shaping_key):
    247       AddItemIfSet(kwargs, 'delay_ms')
    248       if shaping_key in ('dummynet', 'http'):
    249         AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
    250         AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
    251         if shaping_key == 'dummynet':
    252           AddItemIfSet(kwargs, 'packet_loss_rate')
    253           AddItemIfSet(kwargs, 'init_cwnd')
    254         elif self.shaping_type != 'none':
    255           if 'packet_loss_rate' in self._nondefaults:
    256             logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
    257                          self.shaping_type, self.packet_loss_rate)
    258           if 'init_cwnd' in self._nondefaults:
    259             logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
    260                          self.shaping_type, self.init_cwnd)
    261     return kwargs
    262 
    263   def _MassageValues(self):
    264     """Set options that depend on the values of other options."""
    265     if self.append and not self.record:
    266       self._options.record = True
    267     if self.net:
    268       self._options.down, self._options.up, self._options.delay_ms = \
    269           net_configs.GetNetConfig(self.net)
    270       self._nondefaults.update(['down', 'up', 'delay_ms'])
    271     if not self.ssl:
    272       self._options.https_root_ca_cert_path = None
    273     self.shaping_dns = self._ShapingKeywordArgs('dns')
    274     self.shaping_http = self._ShapingKeywordArgs('http')
    275     self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
    276 
    277   def __getattr__(self, name):
    278     """Make the original option values available."""
    279     return getattr(self._options, name)
    280 
    281   def __repr__(self):
    282     """Return a json representation of the original options dictionary."""
    283     return json.dumps(self._options.__dict__)
    284 
    285   def IsRootRequired(self):
    286     """Returns True iff the options require whole program root access."""
    287     if self.server:
    288       return True
    289 
    290     def IsPrivilegedPort(port):
    291       return port and port < 1024
    292 
    293     if IsPrivilegedPort(self.port) or (self.ssl and
    294                                        IsPrivilegedPort(self.ssl_port)):
    295       return True
    296 
    297     if self.dns_forwarding:
    298       if IsPrivilegedPort(self.dns_port):
    299         return True
    300       if not self.server_mode and self.host == '127.0.0.1':
    301         return True
    302 
    303     return False
    304 
    305 
    306 def replay(options, replay_filename):
    307   if options.admin_check and options.IsRootRequired():
    308     platformsettings.rerun_as_administrator()
    309   configure_logging(options.log_level, options.log_file)
    310   server_manager = servermanager.ServerManager(options.record)
    311   if options.server:
    312     AddDnsForward(server_manager, options.server)
    313   else:
    314     real_dns_lookup = dnsproxy.RealDnsLookup(
    315         name_servers=[platformsettings.get_original_primary_nameserver()])
    316     if options.record:
    317       httparchive.HttpArchive.AssertWritable(replay_filename)
    318       if options.append and os.path.exists(replay_filename):
    319         http_archive = httparchive.HttpArchive.Load(replay_filename)
    320         logging.info('Appending to %s (loaded %d existing responses)',
    321                      replay_filename, len(http_archive))
    322       else:
    323         http_archive = httparchive.HttpArchive()
    324     else:
    325       http_archive = httparchive.HttpArchive.Load(replay_filename)
    326       logging.info('Loaded %d responses from %s',
    327                    len(http_archive), replay_filename)
    328     server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
    329     server_manager.AppendRecordCallback(http_archive.clear)
    330 
    331     ipfw_dns_host = None
    332     if options.dns_forwarding or options.shaping_dummynet:
    333       # compute the ip/host used for the DNS server and traffic shaping
    334       ipfw_dns_host = options.host
    335       if not ipfw_dns_host:
    336         ipfw_dns_host = platformsettings.get_server_ip_address(
    337             options.server_mode)
    338 
    339     if options.dns_forwarding:
    340       if not options.server_mode and ipfw_dns_host == '127.0.0.1':
    341         AddDnsForward(server_manager, ipfw_dns_host)
    342       AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
    343                   real_dns_lookup, http_archive)
    344     if options.ssl and options.https_root_ca_cert_path is None:
    345       options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__),
    346                                                      'wpr_cert.pem')
    347     http_proxy_address = options.host
    348     if not http_proxy_address:
    349       http_proxy_address = platformsettings.get_httpproxy_ip_address(
    350           options.server_mode)
    351     AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
    352                 http_archive)
    353     AddTrafficShaper(server_manager, options, ipfw_dns_host)
    354 
    355   exit_status = 0
    356   try:
    357     server_manager.Run()
    358   except KeyboardInterrupt:
    359     logging.info('Shutting down.')
    360   except (dnsproxy.DnsProxyException,
    361           trafficshaper.TrafficShaperException,
    362           platformsettings.NotAdministratorError,
    363           platformsettings.DnsUpdateError) as e:
    364     logging.critical('%s: %s', e.__class__.__name__, e)
    365     exit_status = 1
    366   except Exception:
    367     logging.critical(traceback.format_exc())
    368     exit_status = 2
    369 
    370   if options.record:
    371     http_archive.Persist(replay_filename)
    372     logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
    373   return exit_status
    374 
    375 
    376 def GetParser():
    377   arg_parser = argparse.ArgumentParser(
    378       usage='%(prog)s [options] replay_file',
    379       description=__doc__,
    380       formatter_class=argparse.RawDescriptionHelpFormatter,
    381       epilog='http://code.google.com/p/web-page-replay/')
    382 
    383   arg_parser.add_argument('replay_filename', type=str, help='Replay file',
    384                           nargs='?')
    385 
    386   arg_parser.add_argument('-r', '--record', default=False,
    387       action='store_true',
    388       help='Download real responses and record them to replay_file')
    389   arg_parser.add_argument('--append', default=False,
    390       action='store_true',
    391       help='Append responses to replay_file.')
    392   arg_parser.add_argument('-l', '--log_level', default='debug',
    393       action='store',
    394       type=str,
    395       choices=('debug', 'info', 'warning', 'error', 'critical'),
    396       help='Minimum verbosity level to log')
    397   arg_parser.add_argument('-f', '--log_file', default=None,
    398       action='store',
    399       type=str,
    400       help='Log file to use in addition to writting logs to stderr.')
    401 
    402   network_group = arg_parser.add_argument_group(
    403       title='Network Simulation Options',
    404       description=('These options configure the network simulation in '
    405                    'replay mode'))
    406   network_group.add_argument('-u', '--up', default='0',
    407       action='store',
    408       type=str,
    409       help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
    410   network_group.add_argument('-d', '--down', default='0',
    411       action='store',
    412       type=str,
    413       help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
    414   network_group.add_argument('-m', '--delay_ms', default='0',
    415       action='store',
    416       type=str,
    417       help='Propagation delay (latency) in milliseconds. Zero means no delay.')
    418   network_group.add_argument('-p', '--packet_loss_rate', default='0',
    419       action='store',
    420       type=str,
    421       help='Packet loss rate in range [0..1]. Zero means no loss.')
    422   network_group.add_argument('-w', '--init_cwnd', default='0',
    423       action='store',
    424       type=str,
    425       help='Set initial cwnd (linux only, requires kernel patch)')
    426   network_group.add_argument('--net', default=None,
    427       action='store',
    428       type=str,
    429       choices=net_configs.NET_CONFIG_NAMES,
    430       help='Select a set of network options: %s.' % ', '.join(
    431           net_configs.NET_CONFIG_NAMES))
    432   network_group.add_argument('--shaping_type', default='dummynet',
    433       action='store',
    434       choices=('dummynet', 'proxy'),
    435       help='When shaping is configured (i.e. --up, --down, etc.) decides '
    436            'whether to use |dummynet| (default), or |proxy| servers.')
    437 
    438   harness_group = arg_parser.add_argument_group(
    439       title='Replay Harness Options',
    440       description=('These advanced options configure various aspects '
    441                    'of the replay harness'))
    442   harness_group.add_argument('-S', '--server', default=None,
    443       action='store',
    444       type=str,
    445       help='IP address of host running "replay.py --server_mode". '
    446            'This only changes the primary DNS nameserver to use the given IP.')
    447   harness_group.add_argument('-M', '--server_mode', default=False,
    448       action='store_true',
    449       help='Run replay DNS & http proxies, and trafficshaping on --port '
    450            'without changing the primary DNS nameserver. '
    451            'Other hosts may connect to this using "replay.py --server" '
    452            'or by pointing their DNS to this server.')
    453   harness_group.add_argument('-i', '--inject_scripts', default='deterministic.js',
    454       action='store',
    455       dest='inject_scripts',
    456       help='A comma separated list of JavaScript sources to inject in all '
    457            'pages. By default a script is injected that eliminates sources '
    458            'of entropy such as Date() and Math.random() deterministic. '
    459            'CAUTION: Without deterministic.js, many pages will not replay.')
    460   harness_group.add_argument('-D', '--no-diff_unknown_requests', default=True,
    461       action='store_false',
    462       dest='diff_unknown_requests',
    463       help='During replay, do not show a diff of unknown requests against '
    464            'their nearest match in the archive.')
    465   harness_group.add_argument('-C', '--use_closest_match', default=False,
    466       action='store_true',
    467       dest='use_closest_match',
    468       help='During replay, if a request is not found, serve the closest match'
    469            'in the archive instead of giving a 404.')
    470   harness_group.add_argument('-U', '--use_server_delay', default=False,
    471       action='store_true',
    472       dest='use_server_delay',
    473       help='During replay, simulate server delay by delaying response time to'
    474            'requests.')
    475   harness_group.add_argument('-I', '--screenshot_dir', default=None,
    476       action='store',
    477       type=str,
    478       help='Save PNG images of the loaded page in the given directory.')
    479   harness_group.add_argument('-P', '--no-dns_private_passthrough', default=True,
    480       action='store_false',
    481       dest='dns_private_passthrough',
    482       help='Don\'t forward DNS requests that resolve to private network '
    483            'addresses. CAUTION: With this option important services like '
    484            'Kerberos will resolve to the HTTP proxy address.')
    485   harness_group.add_argument('-x', '--no-dns_forwarding', default=True,
    486       action='store_false',
    487       dest='dns_forwarding',
    488       help='Don\'t forward DNS requests to the local replay server. '
    489            'CAUTION: With this option an external mechanism must be used to '
    490            'forward traffic to the replay server.')
    491   harness_group.add_argument('--host', default=None,
    492       action='store',
    493       type=str,
    494       help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
    495            '127.0.0.1, depending on --server_mode and platform.')
    496   harness_group.add_argument('-o', '--port', default=80,
    497       action='store',
    498       type=int,
    499       help='Port number to listen on.')
    500   harness_group.add_argument('--ssl_port', default=443,
    501       action='store',
    502       type=int,
    503       help='SSL port number to listen on.')
    504   harness_group.add_argument('--http_to_https_port', default=None,
    505       action='store',
    506       type=int,
    507       help='Port on which WPR will listen for HTTP requests that it will send '
    508            'along as HTTPS requests.')
    509   harness_group.add_argument('--dns_port', default=53,
    510       action='store',
    511       type=int,
    512       help='DNS port number to listen on.')
    513   harness_group.add_argument('-c', '--https_root_ca_cert_path', default=None,
    514       action='store',
    515       type=str,
    516       help='Certificate file to use with SSL (gets auto-generated if needed).')
    517   harness_group.add_argument('--no-ssl', default=True,
    518       action='store_false',
    519       dest='ssl',
    520       help='Do not setup an SSL proxy.')
    521   harness_group.add_argument('--should_generate_certs', default=False,
    522       action='store_true',
    523       help='Use OpenSSL to generate certificate files for requested hosts.')
    524   harness_group.add_argument('--no-admin-check', default=True,
    525       action='store_false',
    526       dest='admin_check',
    527       help='Do not check if administrator access is needed.')
    528   harness_group.add_argument('--scramble_images', default=False,
    529       action='store_true',
    530       dest='scramble_images',
    531       help='Scramble image responses.')
    532   harness_group.add_argument('--rules_path', default=None,
    533       action='store',
    534       help='Path of file containing Python rules.')
    535   harness_group.add_argument('--allowed_rule_imports', default='rules',
    536       action='store',
    537       help='A comma-separate list of allowed rule imports, or \'*\' to allow'
    538            ' all packages.  Defaults to %(default)s.')
    539   return arg_parser
    540 
    541 
    542 def main():
    543   arg_parser = GetParser()
    544   options = arg_parser.parse_args()
    545   options = OptionsWrapper(options, arg_parser)
    546 
    547   if options.server:
    548     options.replay_filename = None
    549   elif options.replay_filename is None:
    550     arg_parser.error('Must specify a replay_file')
    551   return replay(options, options.replay_filename)
    552 
    553 
    554 if __name__ == '__main__':
    555   sys.exit(main())
    556