1 #!/usr/bin/env python 2 # Copyright 2010 Google Inc. All Rights Reserved. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 """Replays web pages under simulated network conditions. 17 18 Must be run as administrator (sudo). 19 20 To record web pages: 21 1. Start the program in record mode. 22 $ sudo ./replay.py --record archive.wpr 23 2. Load the web pages you want to record in a web browser. It is important to 24 clear browser caches before this so that all subresources are requested 25 from the network. 26 3. Kill the process to stop recording. 27 28 To replay web pages: 29 1. Start the program in replay mode with a previously recorded archive. 30 $ sudo ./replay.py archive.wpr 31 2. Load recorded pages in a web browser. A 404 will be served for any pages or 32 resources not in the recorded archive. 33 34 Network simulation examples: 35 # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time 36 $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr 37 38 # 1% packet loss rate 39 $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr 40 """ 41 42 import json 43 import logging 44 import optparse 45 import os 46 import socket 47 import sys 48 import traceback 49 50 import customhandlers 51 import dnsproxy 52 import httparchive 53 import httpclient 54 import httpproxy 55 import net_configs 56 import platformsettings 57 import rules_parser 58 import script_injector 59 import servermanager 60 import trafficshaper 61 62 if sys.version < '2.6': 63 print 'Need Python 2.6 or greater.' 64 sys.exit(1) 65 66 67 def configure_logging(log_level_name, log_file_name=None): 68 """Configure logging level and format. 69 70 Args: 71 log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'. 72 log_file_name: a file name 73 """ 74 if logging.root.handlers: 75 logging.critical('A logging method (e.g. "logging.warn(...)")' 76 ' was called before logging was configured.') 77 log_level = getattr(logging, log_level_name.upper()) 78 log_format = ( 79 '(%(levelname)s) %(asctime)s %(module)s.%(funcName)s:%(lineno)d ' 80 '%(message)s') 81 82 83 logging.basicConfig(level=log_level, format=log_format) 84 logger = logging.getLogger() 85 if log_file_name: 86 fh = logging.FileHandler(log_file_name) 87 fh.setLevel(log_level) 88 fh.setFormatter(logging.Formatter(log_format)) 89 logger.addHandler(fh) 90 system_handler = platformsettings.get_system_logging_handler() 91 if system_handler: 92 logger.addHandler(system_handler) 93 94 95 def AddDnsForward(server_manager, host): 96 """Forward DNS traffic.""" 97 server_manager.Append(platformsettings.set_temporary_primary_nameserver, host) 98 99 100 def AddDnsProxy(server_manager, options, host, port, real_dns_lookup, 101 http_archive): 102 dns_filters = [] 103 if options.dns_private_passthrough: 104 private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive) 105 dns_filters.append(private_filter) 106 server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts) 107 server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts) 108 if options.shaping_dns: 109 delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns) 110 dns_filters.append(delay_filter) 111 server_manager.AppendRecordCallback(delay_filter.SetRecordMode) 112 server_manager.AppendReplayCallback(delay_filter.SetReplayMode) 113 server_manager.Append(dnsproxy.DnsProxyServer, host, port, 114 dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters)) 115 116 117 def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive): 118 if options.rules_path: 119 with open(options.rules_path) as file_obj: 120 allowed_imports = [ 121 name.strip() for name in options.allowed_rule_imports.split(',')] 122 rules = rules_parser.Rules(file_obj, allowed_imports) 123 logging.info('Parsed %s rules:\n%s', options.rules_path, rules) 124 else: 125 rules = rules_parser.Rules() 126 inject_script = script_injector.GetInjectScript(options.inject_scripts) 127 custom_handlers = customhandlers.CustomHandlers(options, http_archive) 128 custom_handlers.add_server_manager_handler(server_manager) 129 archive_fetch = httpclient.ControllableHttpArchiveFetch( 130 http_archive, real_dns_lookup, 131 inject_script, 132 options.diff_unknown_requests, options.record, 133 use_closest_match=options.use_closest_match, 134 scramble_images=options.scramble_images) 135 server_manager.AppendRecordCallback(archive_fetch.SetRecordMode) 136 server_manager.AppendReplayCallback(archive_fetch.SetReplayMode) 137 server_manager.Append( 138 httpproxy.HttpProxyServer, 139 archive_fetch, custom_handlers, rules, 140 host=host, port=options.port, use_delays=options.use_server_delay, 141 **options.shaping_http) 142 if options.ssl: 143 if options.should_generate_certs: 144 server_manager.Append( 145 httpproxy.HttpsProxyServer, archive_fetch, custom_handlers, rules, 146 options.https_root_ca_cert_path, host=host, port=options.ssl_port, 147 use_delays=options.use_server_delay, **options.shaping_http) 148 else: 149 server_manager.Append( 150 httpproxy.SingleCertHttpsProxyServer, archive_fetch, 151 custom_handlers, rules, options.https_root_ca_cert_path, host=host, 152 port=options.ssl_port, use_delays=options.use_server_delay, 153 **options.shaping_http) 154 if options.http_to_https_port: 155 server_manager.Append( 156 httpproxy.HttpToHttpsProxyServer, 157 archive_fetch, custom_handlers, rules, 158 host=host, port=options.http_to_https_port, 159 use_delays=options.use_server_delay, 160 **options.shaping_http) 161 162 163 def AddTrafficShaper(server_manager, options, host): 164 if options.shaping_dummynet: 165 server_manager.AppendTrafficShaper( 166 trafficshaper.TrafficShaper, host=host, 167 use_loopback=not options.server_mode and host == '127.0.0.1', 168 **options.shaping_dummynet) 169 170 171 class OptionsWrapper(object): 172 """Add checks, updates, and methods to option values. 173 174 Example: 175 options, args = option_parser.parse_args() 176 options = OptionsWrapper(options, option_parser) # run checks and updates 177 if options.record and options.HasTrafficShaping(): 178 [...] 179 """ 180 _TRAFFICSHAPING_OPTIONS = { 181 'down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'} 182 _CONFLICTING_OPTIONS = ( 183 ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', 184 'spdy', 'use_server_delay')), 185 ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net', 186 'use_server_delay')), # same as --record 187 ('net', ('down', 'up', 'delay_ms')), 188 ('server', ('server_mode',)), 189 ) 190 191 def __init__(self, options, parser): 192 self._options = options 193 self._parser = parser 194 self._nondefaults = set([ 195 name for name, value in parser.defaults.items() 196 if getattr(options, name) != value]) 197 self._CheckConflicts() 198 self._CheckValidIp('host') 199 self._CheckFeatureSupport() 200 self._MassageValues() 201 202 def _CheckConflicts(self): 203 """Give an error if mutually exclusive options are used.""" 204 for option, bad_options in self._CONFLICTING_OPTIONS: 205 if option in self._nondefaults: 206 for bad_option in bad_options: 207 if bad_option in self._nondefaults: 208 self._parser.error('Option --%s cannot be used with --%s.' % 209 (bad_option, option)) 210 211 def _CheckValidIp(self, name): 212 """Give an error if option |name| is not a valid IPv4 address.""" 213 value = getattr(self._options, name) 214 if value: 215 try: 216 socket.inet_aton(value) 217 except Exception: 218 self._parser.error('Option --%s must be a valid IPv4 address.' % name) 219 220 def _CheckFeatureSupport(self): 221 if (self._options.should_generate_certs and 222 not platformsettings.HasSniSupport()): 223 self._parser.error('Option --should_generate_certs requires pyOpenSSL ' 224 '0.13 or greater for SNI support.') 225 226 def _ShapingKeywordArgs(self, shaping_key): 227 """Return the shaping keyword args for |shaping_key|. 228 229 Args: 230 shaping_key: one of 'dummynet', 'dns', 'http'. 231 Returns: 232 {} # if shaping_key does not apply, or options have default values. 233 {k: v, ...} 234 """ 235 kwargs = {} 236 def AddItemIfSet(d, kw_key, opt_key=None): 237 opt_key = opt_key or kw_key 238 if opt_key in self._nondefaults: 239 d[kw_key] = getattr(self, opt_key) 240 if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or 241 self.shaping_type == shaping_key): 242 AddItemIfSet(kwargs, 'delay_ms') 243 if shaping_key in ('dummynet', 'http'): 244 AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down') 245 AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up') 246 if shaping_key == 'dummynet': 247 AddItemIfSet(kwargs, 'packet_loss_rate') 248 AddItemIfSet(kwargs, 'init_cwnd') 249 elif self.shaping_type != 'none': 250 if 'packet_loss_rate' in self._nondefaults: 251 logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s', 252 self.shaping_type, self.packet_loss_rate) 253 if 'init_cwnd' in self._nondefaults: 254 logging.warn('Shaping type, %s, ignores --init_cwnd=%s', 255 self.shaping_type, self.init_cwnd) 256 return kwargs 257 258 def _MassageValues(self): 259 """Set options that depend on the values of other options.""" 260 if self.append and not self.record: 261 self._options.record = True 262 if self.net: 263 self._options.down, self._options.up, self._options.delay_ms = \ 264 net_configs.GetNetConfig(self.net) 265 self._nondefaults.update(['down', 'up', 'delay_ms']) 266 if not self.ssl: 267 self._options.https_root_ca_cert_path = None 268 self.shaping_dns = self._ShapingKeywordArgs('dns') 269 self.shaping_http = self._ShapingKeywordArgs('http') 270 self.shaping_dummynet = self._ShapingKeywordArgs('dummynet') 271 272 def __getattr__(self, name): 273 """Make the original option values available.""" 274 return getattr(self._options, name) 275 276 def __repr__(self): 277 """Return a json representation of the original options dictionary.""" 278 return json.dumps(self._options.__dict__) 279 280 def IsRootRequired(self): 281 """Returns True iff the options require whole program root access.""" 282 if self.server: 283 return True 284 285 def IsPrivilegedPort(port): 286 return port and port < 1024 287 288 if IsPrivilegedPort(self.port) or (self.ssl and 289 IsPrivilegedPort(self.ssl_port)): 290 return True 291 292 if self.dns_forwarding: 293 if IsPrivilegedPort(self.dns_port): 294 return True 295 if not self.server_mode and self.host == '127.0.0.1': 296 return True 297 298 return False 299 300 301 def replay(options, replay_filename): 302 if options.admin_check and options.IsRootRequired(): 303 platformsettings.rerun_as_administrator() 304 configure_logging(options.log_level, options.log_file) 305 server_manager = servermanager.ServerManager(options.record) 306 if options.server: 307 AddDnsForward(server_manager, options.server) 308 else: 309 real_dns_lookup = dnsproxy.RealDnsLookup( 310 name_servers=[platformsettings.get_original_primary_nameserver()]) 311 if options.record: 312 httparchive.HttpArchive.AssertWritable(replay_filename) 313 if options.append and os.path.exists(replay_filename): 314 http_archive = httparchive.HttpArchive.Load(replay_filename) 315 logging.info('Appending to %s (loaded %d existing responses)', 316 replay_filename, len(http_archive)) 317 else: 318 http_archive = httparchive.HttpArchive() 319 else: 320 http_archive = httparchive.HttpArchive.Load(replay_filename) 321 logging.info('Loaded %d responses from %s', 322 len(http_archive), replay_filename) 323 server_manager.AppendRecordCallback(real_dns_lookup.ClearCache) 324 server_manager.AppendRecordCallback(http_archive.clear) 325 326 ipfw_dns_host = None 327 if options.dns_forwarding or options.shaping_dummynet: 328 # compute the ip/host used for the DNS server and traffic shaping 329 ipfw_dns_host = options.host 330 if not ipfw_dns_host: 331 ipfw_dns_host = platformsettings.get_server_ip_address( 332 options.server_mode) 333 334 if options.dns_forwarding: 335 if not options.server_mode and ipfw_dns_host == '127.0.0.1': 336 AddDnsForward(server_manager, ipfw_dns_host) 337 AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port, 338 real_dns_lookup, http_archive) 339 if options.ssl and options.https_root_ca_cert_path is None: 340 options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__), 341 'wpr_cert.pem') 342 http_proxy_address = options.host 343 if not http_proxy_address: 344 http_proxy_address = platformsettings.get_httpproxy_ip_address( 345 options.server_mode) 346 AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup, 347 http_archive) 348 AddTrafficShaper(server_manager, options, ipfw_dns_host) 349 350 exit_status = 0 351 try: 352 server_manager.Run() 353 except KeyboardInterrupt: 354 logging.info('Shutting down.') 355 except (dnsproxy.DnsProxyException, 356 trafficshaper.TrafficShaperException, 357 platformsettings.NotAdministratorError, 358 platformsettings.DnsUpdateError) as e: 359 logging.critical('%s: %s', e.__class__.__name__, e) 360 exit_status = 1 361 except Exception: 362 logging.critical(traceback.format_exc()) 363 exit_status = 2 364 365 if options.record: 366 http_archive.Persist(replay_filename) 367 logging.info('Saved %d responses to %s', len(http_archive), replay_filename) 368 return exit_status 369 370 371 def GetOptionParser(): 372 class PlainHelpFormatter(optparse.IndentedHelpFormatter): 373 def format_description(self, description): 374 if description: 375 return description + '\n' 376 else: 377 return '' 378 option_parser = optparse.OptionParser( 379 usage='%prog [options] replay_file', 380 formatter=PlainHelpFormatter(), 381 description=__doc__, 382 epilog='http://code.google.com/p/web-page-replay/') 383 384 option_parser.add_option('-r', '--record', default=False, 385 action='store_true', 386 help='Download real responses and record them to replay_file') 387 option_parser.add_option('--append', default=False, 388 action='store_true', 389 help='Append responses to replay_file.') 390 option_parser.add_option('-l', '--log_level', default='debug', 391 action='store', 392 type='choice', 393 choices=('debug', 'info', 'warning', 'error', 'critical'), 394 help='Minimum verbosity level to log') 395 option_parser.add_option('-f', '--log_file', default=None, 396 action='store', 397 type='string', 398 help='Log file to use in addition to writting logs to stderr.') 399 400 network_group = optparse.OptionGroup(option_parser, 401 'Network Simulation Options', 402 'These options configure the network simulation in replay mode') 403 network_group.add_option('-u', '--up', default='0', 404 action='store', 405 type='string', 406 help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.') 407 network_group.add_option('-d', '--down', default='0', 408 action='store', 409 type='string', 410 help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.') 411 network_group.add_option('-m', '--delay_ms', default='0', 412 action='store', 413 type='string', 414 help='Propagation delay (latency) in milliseconds. Zero means no delay.') 415 network_group.add_option('-p', '--packet_loss_rate', default='0', 416 action='store', 417 type='string', 418 help='Packet loss rate in range [0..1]. Zero means no loss.') 419 network_group.add_option('-w', '--init_cwnd', default='0', 420 action='store', 421 type='string', 422 help='Set initial cwnd (linux only, requires kernel patch)') 423 network_group.add_option('--net', default=None, 424 action='store', 425 type='choice', 426 choices=net_configs.NET_CONFIG_NAMES, 427 help='Select a set of network options: %s.' % ', '.join( 428 net_configs.NET_CONFIG_NAMES)) 429 network_group.add_option('--shaping_type', default='dummynet', 430 action='store', 431 choices=('dummynet', 'proxy'), 432 help='When shaping is configured (i.e. --up, --down, etc.) decides ' 433 'whether to use |dummynet| (default), or |proxy| servers.') 434 option_parser.add_option_group(network_group) 435 436 harness_group = optparse.OptionGroup(option_parser, 437 'Replay Harness Options', 438 'These advanced options configure various aspects of the replay harness') 439 harness_group.add_option('-S', '--server', default=None, 440 action='store', 441 type='string', 442 help='IP address of host running "replay.py --server_mode". ' 443 'This only changes the primary DNS nameserver to use the given IP.') 444 harness_group.add_option('-M', '--server_mode', default=False, 445 action='store_true', 446 help='Run replay DNS & http proxies, and trafficshaping on --port ' 447 'without changing the primary DNS nameserver. ' 448 'Other hosts may connect to this using "replay.py --server" ' 449 'or by pointing their DNS to this server.') 450 harness_group.add_option('-i', '--inject_scripts', default='deterministic.js', 451 action='store', 452 dest='inject_scripts', 453 help='A comma separated list of JavaScript sources to inject in all ' 454 'pages. By default a script is injected that eliminates sources ' 455 'of entropy such as Date() and Math.random() deterministic. ' 456 'CAUTION: Without deterministic.js, many pages will not replay.') 457 harness_group.add_option('-D', '--no-diff_unknown_requests', default=True, 458 action='store_false', 459 dest='diff_unknown_requests', 460 help='During replay, do not show a diff of unknown requests against ' 461 'their nearest match in the archive.') 462 harness_group.add_option('-C', '--use_closest_match', default=False, 463 action='store_true', 464 dest='use_closest_match', 465 help='During replay, if a request is not found, serve the closest match' 466 'in the archive instead of giving a 404.') 467 harness_group.add_option('-U', '--use_server_delay', default=False, 468 action='store_true', 469 dest='use_server_delay', 470 help='During replay, simulate server delay by delaying response time to' 471 'requests.') 472 harness_group.add_option('-I', '--screenshot_dir', default=None, 473 action='store', 474 type='string', 475 help='Save PNG images of the loaded page in the given directory.') 476 harness_group.add_option('-P', '--no-dns_private_passthrough', default=True, 477 action='store_false', 478 dest='dns_private_passthrough', 479 help='Don\'t forward DNS requests that resolve to private network ' 480 'addresses. CAUTION: With this option important services like ' 481 'Kerberos will resolve to the HTTP proxy address.') 482 harness_group.add_option('-x', '--no-dns_forwarding', default=True, 483 action='store_false', 484 dest='dns_forwarding', 485 help='Don\'t forward DNS requests to the local replay server. ' 486 'CAUTION: With this option an external mechanism must be used to ' 487 'forward traffic to the replay server.') 488 harness_group.add_option('--host', default=None, 489 action='store', 490 type='str', 491 help='The IP address to bind all servers to. Defaults to 0.0.0.0 or ' 492 '127.0.0.1, depending on --server_mode and platform.') 493 harness_group.add_option('-o', '--port', default=80, 494 action='store', 495 type='int', 496 help='Port number to listen on.') 497 harness_group.add_option('--ssl_port', default=443, 498 action='store', 499 type='int', 500 help='SSL port number to listen on.') 501 harness_group.add_option('--http_to_https_port', default=None, 502 action='store', 503 type='int', 504 help='Port on which WPR will listen for HTTP requests that it will send ' 505 'along as HTTPS requests.') 506 harness_group.add_option('--dns_port', default=53, 507 action='store', 508 type='int', 509 help='DNS port number to listen on.') 510 harness_group.add_option('-c', '--https_root_ca_cert_path', default=None, 511 action='store', 512 type='string', 513 help='Certificate file to use with SSL (gets auto-generated if needed).') 514 harness_group.add_option('--no-ssl', default=True, 515 action='store_false', 516 dest='ssl', 517 help='Do not setup an SSL proxy.') 518 option_parser.add_option_group(harness_group) 519 harness_group.add_option('--should_generate_certs', default=False, 520 action='store_true', 521 help='Use OpenSSL to generate certificate files for requested hosts.') 522 harness_group.add_option('--no-admin-check', default=True, 523 action='store_false', 524 dest='admin_check', 525 help='Do not check if administrator access is needed.') 526 harness_group.add_option('--scramble_images', default=False, 527 action='store_true', 528 dest='scramble_images', 529 help='Scramble image responses.') 530 harness_group.add_option('--rules_path', default=None, 531 action='store', 532 help='Path of file containing Python rules.') 533 harness_group.add_option('--allowed_rule_imports', default='rules', 534 action='store', 535 help='A comma-separate list of allowed rule imports, or \'*\' to allow' 536 ' all packages. Defaults to \'%default\'.') 537 return option_parser 538 539 540 def main(): 541 option_parser = GetOptionParser() 542 options, args = option_parser.parse_args() 543 options = OptionsWrapper(options, option_parser) 544 545 if options.server: 546 replay_filename = None 547 elif len(args) != 1: 548 option_parser.error('Must specify a replay_file') 549 else: 550 replay_filename = args[0] 551 552 return replay(options, replay_filename) 553 554 555 if __name__ == '__main__': 556 sys.exit(main()) 557