1 #!/usr/bin/env python 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Does scraping for all currently-known versions of Chrome""" 7 8 import pywintypes 9 import types 10 11 from drivers import keyboard 12 from drivers import mouse 13 from drivers import windowing 14 15 16 # TODO: this has moved, use some logic to find it. For now, 17 # expects a subst k:. 18 DEFAULT_PATH = r"k:\chrome.exe" 19 20 21 def InvokeBrowser(path): 22 """Invoke the Chrome browser. 23 24 Args: 25 path: full path to browser 26 27 Returns: 28 A tuple of (main window, process handle, address bar, render pane) 29 """ 30 31 # Reuse an existing instance of the browser if we can find one. This 32 # may not work correctly, especially if the window is behind other windows. 33 34 # TODO(jhaas): make this work with Vista 35 wnds = windowing.FindChildWindows(0, "Chrome_XPFrame") 36 if len(wnds): 37 wnd = wnds[0] 38 proc = None 39 else: 40 # Invoke Chrome 41 (proc, wnd) = windowing.InvokeAndWait(path) 42 43 # Get windows we'll need 44 address_bar = windowing.FindChildWindow(wnd, "Chrome_AutocompleteEdit") 45 render_pane = GetChromeRenderPane(wnd) 46 47 return (wnd, proc, address_bar, render_pane) 48 49 50 def Scrape(urls, outdir, size, pos, timeout, kwargs): 51 """Invoke a browser, send it to a series of URLs, and save its output. 52 53 Args: 54 urls: list of URLs to scrape 55 outdir: directory to place output 56 size: size of browser window to use 57 pos: position of browser window 58 timeout: amount of time to wait for page to load 59 kwargs: miscellaneous keyword args 60 61 Returns: 62 None if success, else an error string 63 """ 64 if "path" in kwargs and kwargs["path"]: path = kwargs["path"] 65 else: path = DEFAULT_PATH 66 67 (wnd, proc, address_bar, render_pane) = InvokeBrowser(path) 68 69 # Resize and reposition the frame 70 windowing.MoveAndSizeWindow(wnd, pos, size, render_pane) 71 72 # Visit each URL we're given 73 if type(urls) in types.StringTypes: urls = [urls] 74 75 timedout = False 76 77 for url in urls: 78 # Double-click in the address bar, type the name, and press Enter 79 mouse.ClickInWindow(address_bar) 80 keyboard.TypeString(url, 0.1) 81 keyboard.TypeString("\n") 82 83 # Wait for the page to finish loading 84 load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout) 85 timedout = load_time < 0 86 87 if timedout: 88 break 89 90 # Scrape the page 91 image = windowing.ScrapeWindow(render_pane) 92 93 # Save to disk 94 if "filename" in kwargs: 95 if callable(kwargs["filename"]): 96 filename = kwargs["filename"](url) 97 else: 98 filename = kwargs["filename"] 99 else: 100 filename = windowing.URLtoFilename(url, outdir, ".bmp") 101 image.save(filename) 102 103 if proc: 104 windowing.SetForegroundWindow(wnd) 105 106 # Send Alt-F4, then wait for process to end 107 keyboard.TypeString(r"{\4}", use_modifiers=True) 108 if not windowing.WaitForProcessExit(proc, timeout): 109 windowing.EndProcess(proc) 110 return "crashed" 111 112 if timedout: 113 return "timeout" 114 115 return None 116 117 118 def Time(urls, size, timeout, kwargs): 119 """Measure how long it takes to load each of a series of URLs 120 121 Args: 122 urls: list of URLs to time 123 size: size of browser window to use 124 timeout: amount of time to wait for page to load 125 kwargs: miscellaneous keyword args 126 127 Returns: 128 A list of tuples (url, time). "time" can be "crashed" or "timeout" 129 """ 130 if "path" in kwargs and kwargs["path"]: path = kwargs["path"] 131 else: path = DEFAULT_PATH 132 proc = None 133 134 # Visit each URL we're given 135 if type(urls) in types.StringTypes: urls = [urls] 136 137 ret = [] 138 for url in urls: 139 try: 140 # Invoke the browser if necessary 141 if not proc: 142 (wnd, proc, address_bar, render_pane) = InvokeBrowser(path) 143 144 # Resize and reposition the frame 145 windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) 146 147 # Double-click in the address bar, type the name, and press Enter 148 mouse.ClickInWindow(address_bar) 149 keyboard.TypeString(url, 0.1) 150 keyboard.TypeString("\n") 151 152 # Wait for the page to finish loading 153 load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout) 154 155 timedout = load_time < 0 156 157 if timedout: 158 load_time = "timeout" 159 160 # Send an alt-F4 to make the browser close; if this times out, 161 # we've probably got a crash 162 windowing.SetForegroundWindow(wnd) 163 164 keyboard.TypeString(r"{\4}", use_modifiers=True) 165 if not windowing.WaitForProcessExit(proc, timeout): 166 windowing.EndProcess(proc) 167 load_time = "crashed" 168 proc = None 169 except pywintypes.error: 170 proc = None 171 load_time = "crashed" 172 173 ret.append( (url, load_time) ) 174 175 if proc: 176 windowing.SetForegroundWindow(wnd) 177 keyboard.TypeString(r"{\4}", use_modifiers=True) 178 if not windowing.WaitForProcessExit(proc, timeout): 179 windowing.EndProcess(proc) 180 181 return ret 182 183 184 def main(): 185 # We're being invoked rather than imported, so run some tests 186 path = r"c:\sitecompare\scrapes\chrome\0.1.97.0" 187 windowing.PreparePath(path) 188 189 # Scrape three sites and save the results 190 Scrape([ 191 "http://www.microsoft.com", 192 "http://www.google.com", 193 "http://www.sun.com"], 194 path, (1024, 768), (0, 0)) 195 return 0 196 197 198 if __name__ == "__main__": 199 sys.exit(main()) 200