Home | History | Annotate | Download | only in chrome
      1 #!/usr/bin/env python
      2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Does scraping for all currently-known versions of Chrome"""
      7 
      8 import pywintypes
      9 import types
     10 
     11 from drivers import keyboard
     12 from drivers import mouse
     13 from drivers import windowing
     14 
     15 
     16 # TODO: this has moved, use some logic to find it. For now,
     17 # expects a subst k:.
     18 DEFAULT_PATH = r"k:\chrome.exe"
     19 
     20 
     21 def InvokeBrowser(path):
     22   """Invoke the Chrome browser.
     23 
     24   Args:
     25     path: full path to browser
     26 
     27   Returns:
     28     A tuple of (main window, process handle, address bar, render pane)
     29   """
     30 
     31   # Reuse an existing instance of the browser if we can find one. This
     32   # may not work correctly, especially if the window is behind other windows.
     33 
     34   # TODO(jhaas): make this work with Vista
     35   wnds = windowing.FindChildWindows(0, "Chrome_XPFrame")
     36   if len(wnds):
     37     wnd = wnds[0]
     38     proc = None
     39   else:
     40     # Invoke Chrome
     41     (proc, wnd) = windowing.InvokeAndWait(path)
     42 
     43   # Get windows we'll need
     44   address_bar = windowing.FindChildWindow(wnd, "Chrome_AutocompleteEdit")
     45   render_pane = GetChromeRenderPane(wnd)
     46 
     47   return (wnd, proc, address_bar, render_pane)
     48 
     49 
     50 def Scrape(urls, outdir, size, pos, timeout, kwargs):
     51   """Invoke a browser, send it to a series of URLs, and save its output.
     52 
     53   Args:
     54     urls: list of URLs to scrape
     55     outdir: directory to place output
     56     size: size of browser window to use
     57     pos: position of browser window
     58     timeout: amount of time to wait for page to load
     59     kwargs: miscellaneous keyword args
     60 
     61   Returns:
     62     None if success, else an error string
     63   """
     64   if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
     65   else: path = DEFAULT_PATH
     66 
     67   (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)
     68 
     69   # Resize and reposition the frame
     70   windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)
     71 
     72   # Visit each URL we're given
     73   if type(urls) in types.StringTypes: urls = [urls]
     74 
     75   timedout = False
     76 
     77   for url in urls:
     78     # Double-click in the address bar, type the name, and press Enter
     79     mouse.ClickInWindow(address_bar)
     80     keyboard.TypeString(url, 0.1)
     81     keyboard.TypeString("\n")
     82 
     83     # Wait for the page to finish loading
     84     load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
     85     timedout = load_time < 0
     86 
     87     if timedout:
     88       break
     89 
     90     # Scrape the page
     91     image = windowing.ScrapeWindow(render_pane)
     92 
     93     # Save to disk
     94     if "filename" in kwargs:
     95       if callable(kwargs["filename"]):
     96         filename = kwargs["filename"](url)
     97       else:
     98         filename = kwargs["filename"]
     99     else:
    100       filename = windowing.URLtoFilename(url, outdir, ".bmp")
    101     image.save(filename)
    102 
    103   if proc:
    104     windowing.SetForegroundWindow(wnd)
    105 
    106     # Send Alt-F4, then wait for process to end
    107     keyboard.TypeString(r"{\4}", use_modifiers=True)
    108     if not windowing.WaitForProcessExit(proc, timeout):
    109       windowing.EndProcess(proc)
    110       return "crashed"
    111 
    112   if timedout:
    113     return "timeout"
    114 
    115   return None
    116 
    117 
    118 def Time(urls, size, timeout, kwargs):
    119   """Measure how long it takes to load each of a series of URLs
    120 
    121   Args:
    122     urls: list of URLs to time
    123     size: size of browser window to use
    124     timeout: amount of time to wait for page to load
    125     kwargs: miscellaneous keyword args
    126 
    127   Returns:
    128     A list of tuples (url, time). "time" can be "crashed" or "timeout"
    129   """
    130   if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
    131   else: path = DEFAULT_PATH
    132   proc = None
    133 
    134   # Visit each URL we're given
    135   if type(urls) in types.StringTypes: urls = [urls]
    136 
    137   ret = []
    138   for url in urls:
    139     try:
    140       # Invoke the browser if necessary
    141       if not proc:
    142         (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)
    143 
    144         # Resize and reposition the frame
    145         windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
    146 
    147       # Double-click in the address bar, type the name, and press Enter
    148       mouse.ClickInWindow(address_bar)
    149       keyboard.TypeString(url, 0.1)
    150       keyboard.TypeString("\n")
    151 
    152       # Wait for the page to finish loading
    153       load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
    154 
    155       timedout = load_time < 0
    156 
    157       if timedout:
    158         load_time = "timeout"
    159 
    160         # Send an alt-F4 to make the browser close; if this times out,
    161         # we've probably got a crash
    162         windowing.SetForegroundWindow(wnd)
    163 
    164         keyboard.TypeString(r"{\4}", use_modifiers=True)
    165         if not windowing.WaitForProcessExit(proc, timeout):
    166           windowing.EndProcess(proc)
    167           load_time = "crashed"
    168         proc = None
    169     except pywintypes.error:
    170       proc = None
    171       load_time = "crashed"
    172 
    173     ret.append( (url, load_time) )
    174 
    175   if proc:
    176     windowing.SetForegroundWindow(wnd)
    177     keyboard.TypeString(r"{\4}", use_modifiers=True)
    178     if not windowing.WaitForProcessExit(proc, timeout):
    179       windowing.EndProcess(proc)
    180 
    181   return ret
    182 
    183 
    184 def main():
    185   # We're being invoked rather than imported, so run some tests
    186   path = r"c:\sitecompare\scrapes\chrome\0.1.97.0"
    187   windowing.PreparePath(path)
    188 
    189   # Scrape three sites and save the results
    190   Scrape([
    191     "http://www.microsoft.com",
    192     "http://www.google.com",
    193     "http://www.sun.com"],
    194          path, (1024, 768), (0, 0))
    195   return 0
    196 
    197 
    198 if __name__ == "__main__":
    199   sys.exit(main())
    200