Home | History | Annotate | Download | only in utils
      1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Utility to use a browser to visit multiple URLs.
      6 
      7 Prerequisites:
      8   1. The command_line package from tools/site_compare
      9   2. Either the IE BHO or Firefox extension (or both)
     10 
     11 Installation:
     12   1. Build the IE BHO, or call regsvr32 on a prebuilt binary
     13   2. Add a file called "measurepageloadtimeextension@google.com" to
     14      the default Firefox profile directory under extensions, containing
     15      the path to the Firefox extension root
     16 
     17 Invoke with the command line arguments as documented within
     18 the command line.
     19 """
     20 
     21 import command_line
     22 import scrapers
     23 import socket
     24 import time
     25 
     26 from drivers import windowing
     27 
     28 # Constants
     29 MAX_URL = 1024
     30 PORT = 42492
     31 
     32 def SetupIterationCommandLine(cmd):
     33   """Adds the necessary flags for iteration to a command.
     34 
     35   Args:
     36     cmd: an object created by cmdline.AddCommand
     37   """
     38   cmd.AddArgument(
     39     ["-b", "--browser"], "Browser to use (ie, firefox, chrome)",
     40     type="string", required=True)
     41   cmd.AddArgument(
     42     ["-b1v", "--browserver"], "Version of browser", metaname="VERSION")
     43   cmd.AddArgument(
     44     ["-p", "--browserpath"], "Path to browser.",
     45     type="string", required=False)
     46   cmd.AddArgument(
     47     ["-u", "--url"], "URL to visit")
     48   cmd.AddArgument(
     49     ["-l", "--list"], "File containing list of URLs to visit", type="readfile")
     50   cmd.AddMutualExclusion(["--url", "--list"])
     51   cmd.AddArgument(
     52     ["-s", "--startline"], "First line of URL list", type="int")
     53   cmd.AddArgument(
     54     ["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
     55   cmd.AddArgument(
     56     ["-c", "--count"], "Number of lines of URL file to use", type="int")
     57   cmd.AddDependency("--startline", "--list")
     58   cmd.AddRequiredGroup(["--url", "--list"])
     59   cmd.AddDependency("--endline", "--list")
     60   cmd.AddDependency("--count", "--list")
     61   cmd.AddMutualExclusion(["--count", "--endline"])
     62   cmd.AddDependency("--count", "--startline")
     63   cmd.AddArgument(
     64     ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
     65     "finish loading",
     66     type="int", default=300)
     67   cmd.AddArgument(
     68     ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
     69 
     70 
     71 def Iterate(command, iteration_func):
     72   """Iterates over a list of URLs, calling a function on each.
     73 
     74   Args:
     75     command: the command line containing the iteration flags
     76     iteration_func: called for each URL with (proc, wnd, url, result)
     77   """
     78 
     79   # Retrieve the browser scraper to use to invoke the browser
     80   scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
     81 
     82   def AttachToBrowser(path, timeout):
     83     """Invoke the browser process and connect to the socket."""
     84     (proc, frame, wnd) = scraper.GetBrowser(path)
     85 
     86     if not wnd: raise ValueError("Could not invoke browser.")
     87 
     88     # Try to connect the socket. If it fails, wait and try
     89     # again. Do this for ten seconds
     90     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
     91 
     92     for attempt in xrange(10):
     93       try:
     94         s.connect(("localhost", PORT))
     95       except socket.error:
     96         time.sleep(1)
     97         continue
     98       break
     99 
    100     try:
    101       s.getpeername()
    102     except socket.error:
    103       raise ValueError("Could not connect to browser")
    104 
    105     if command["--size"]:
    106       # Resize and reposition the frame
    107       windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd)
    108 
    109     s.settimeout(timeout)
    110 
    111     Iterate.proc = proc
    112     Iterate.wnd = wnd
    113     Iterate.s = s
    114 
    115   def DetachFromBrowser():
    116     """Close the socket and kill the process if necessary."""
    117     if Iterate.s:
    118       Iterate.s.close()
    119       Iterate.s = None
    120 
    121     if Iterate.proc:
    122       if not windowing.WaitForProcessExit(Iterate.proc, 0):
    123         try:
    124           windowing.EndProcess(Iterate.proc)
    125           windowing.WaitForProcessExit(Iterate.proc, 0)
    126         except pywintypes.error:
    127           # Exception here most likely means the process died on its own
    128           pass
    129       Iterate.proc = None
    130 
    131   if command["--browserpath"]:
    132     browser = command["--browserpath"]
    133   else:
    134     browser = None
    135 
    136   # Read the URLs from the file
    137   if command["--url"]:
    138     url_list = [command["--url"]]
    139   else:
    140     startline = command["--startline"]
    141     if command["--count"]:
    142       endline = startline+command["--count"]
    143     else:
    144       endline = command["--endline"]
    145 
    146     url_list = []
    147     file = open(command["--list"], "r")
    148 
    149     for line in xrange(startline-1):
    150       file.readline()
    151 
    152     for line in xrange(endline-startline):
    153       url_list.append(file.readline().strip())
    154 
    155   timeout = command["--timeout"]
    156 
    157   # Loop through the URLs and send them through the socket
    158   Iterate.s    = None
    159   Iterate.proc = None
    160   Iterate.wnd  = None
    161 
    162   for url in url_list:
    163     # Invoke the browser if necessary
    164     if not Iterate.proc:
    165       AttachToBrowser(browser, timeout)
    166     # Send the URL and wait for a response
    167     Iterate.s.send(url + "\n")
    168 
    169     response = ""
    170 
    171     while (response.find("\n") < 0):
    172 
    173       try:
    174         recv = Iterate.s.recv(MAX_URL)
    175         response = response + recv
    176 
    177         # Workaround for an oddity: when Firefox closes
    178         # gracefully, somehow Python doesn't detect it.
    179         # (Telnet does)
    180         if not recv:
    181           raise socket.error
    182 
    183       except socket.timeout:
    184         response = url + ",hang\n"
    185         DetachFromBrowser()
    186       except socket.error:
    187         # If there was a socket error, it's probably a crash
    188         response = url + ",crash\n"
    189         DetachFromBrowser()
    190 
    191       # If we received a timeout response, restart the browser
    192       if response[-9:] == ",timeout\n":
    193         DetachFromBrowser()
    194 
    195       # Invoke the iteration function
    196       iteration_func(url, Iterate.proc, Iterate.wnd, response)
    197 
    198   # We're done
    199   DetachFromBrowser()
    200