1 #!/usr/bin/env python 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Does scraping for all known versions of IE.""" 7 8 import pywintypes 9 import time 10 import types 11 12 from drivers import keyboard 13 from drivers import mouse 14 from drivers import windowing 15 16 # Default version 17 version = "7.0.5730.1" 18 19 DEFAULT_PATH = r"c:\program files\internet explorer\iexplore.exe" 20 21 def GetBrowser(path): 22 """Invoke the IE browser and return the process, frame, and content window. 23 24 Args: 25 path: full path to browser 26 27 Returns: 28 A tuple of (process handle, render pane) 29 """ 30 if not path: path = DEFAULT_PATH 31 32 (iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path) 33 return (ieproc, iewnd, render_pane) 34 35 36 def InvokeBrowser(path): 37 """Invoke the IE browser. 38 39 Args: 40 path: full path to browser 41 42 Returns: 43 A tuple of (main window, process handle, address bar, 44 render_pane, tab_window) 45 """ 46 # Invoke IE 47 (ieproc, iewnd) = windowing.InvokeAndWait(path) 48 49 # Get windows we'll need 50 for tries in xrange(10): 51 try: 52 address_bar = windowing.FindChildWindow( 53 iewnd, "WorkerW|Navigation Bar/ReBarWindow32/" 54 "Address Band Root/ComboBoxEx32/ComboBox/Edit") 55 render_pane = windowing.FindChildWindow( 56 iewnd, "TabWindowClass/Shell DocObject View") 57 tab_window = windowing.FindChildWindow( 58 iewnd, "CommandBarClass/ReBarWindow32/TabBandClass/DirectUIHWND") 59 except IndexError: 60 time.sleep(1) 61 continue 62 break 63 64 return (iewnd, ieproc, address_bar, render_pane, tab_window) 65 66 67 def Scrape(urls, outdir, size, pos, timeout=20, **kwargs): 68 """Invoke a browser, send it to a series of URLs, and save its output. 69 70 Args: 71 urls: list of URLs to scrape 72 outdir: directory to place output 73 size: size of browser window to use 74 pos: position of browser window 75 timeout: amount of time to wait for page to load 76 kwargs: miscellaneous keyword args 77 78 Returns: 79 None if success, else an error string 80 """ 81 path = r"c:\program files\internet explorer\iexplore.exe" 82 83 if "path" in kwargs and kwargs["path"]: path = kwargs["path"] 84 85 (iewnd, ieproc, address_bar, render_pane, tab_window) = ( 86 InvokeBrowser(path) ) 87 88 # Resize and reposition the frame 89 windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane) 90 91 # Visit each URL we're given 92 if type(urls) in types.StringTypes: urls = [urls] 93 94 timedout = False 95 96 for url in urls: 97 98 # Double-click in the address bar, type the name, and press Enter 99 mouse.DoubleClickInWindow(address_bar) 100 keyboard.TypeString(url) 101 keyboard.TypeString("\n") 102 103 # Wait for the page to finish loading 104 load_time = windowing.WaitForThrobber( 105 tab_window, (6, 8, 22, 24), timeout) 106 timedout = load_time < 0 107 108 if timedout: 109 break 110 111 # Scrape the page 112 image = windowing.ScrapeWindow(render_pane) 113 114 # Save to disk 115 if "filename" in kwargs: 116 if callable(kwargs["filename"]): 117 filename = kwargs["filename"](url) 118 else: 119 filename = kwargs["filename"] 120 else: 121 filename = windowing.URLtoFilename(url, outdir, ".bmp") 122 image.save(filename) 123 124 windowing.EndProcess(ieproc) 125 126 if timedout: 127 return "timeout" 128 129 130 def Time(urls, size, timeout, **kwargs): 131 """Measure how long it takes to load each of a series of URLs 132 133 Args: 134 urls: list of URLs to time 135 size: size of browser window to use 136 timeout: amount of time to wait for page to load 137 kwargs: miscellaneous keyword args 138 139 Returns: 140 A list of tuples (url, time). "time" can be "crashed" or "timeout" 141 """ 142 if "path" in kwargs and kwargs["path"]: path = kwargs["path"] 143 else: path = DEFAULT_PATH 144 proc = None 145 146 # Visit each URL we're given 147 if type(urls) in types.StringTypes: urls = [urls] 148 149 ret = [] 150 for url in urls: 151 try: 152 # Invoke the browser if necessary 153 if not proc: 154 (wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path) 155 156 # Resize and reposition the frame 157 windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane) 158 159 # Double-click in the address bar, type the name, and press Enter 160 mouse.DoubleClickInWindow(address_bar) 161 keyboard.TypeString(url) 162 keyboard.TypeString("\n") 163 164 # Wait for the page to finish loading 165 load_time = windowing.WaitForThrobber( 166 tab_window, (6, 8, 22, 24), timeout) 167 timedout = load_time < 0 168 169 if timedout: 170 load_time = "timeout" 171 172 # Send an alt-F4 to make the browser close; if this times out, 173 # we've probably got a crash 174 keyboard.TypeString(r"{\4}", use_modifiers=True) 175 if not windowing.WaitForProcessExit(proc, timeout): 176 windowing.EndProcess(proc) 177 load_time = "crashed" 178 proc = None 179 except pywintypes.error: 180 load_time = "crashed" 181 proc = None 182 183 ret.append( (url, load_time) ) 184 185 # Send an alt-F4 to make the browser close; if this times out, 186 # we've probably got a crash 187 if proc: 188 keyboard.TypeString(r"{\4}", use_modifiers=True) 189 if not windowing.WaitForProcessExit(proc, timeout): 190 windowing.EndProcess(proc) 191 192 return ret 193 194 195 def main(): 196 # We're being invoked rather than imported, so run some tests 197 path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11" 198 windowing.PreparePath(path) 199 200 # Scrape three sites and save the results 201 Scrape( 202 ["http://www.microsoft.com", 203 "http://www.google.com", 204 "http://www.sun.com"], 205 path, (1024, 768), (0, 0)) 206 return 0 207 208 209 if __name__ == "__main__": 210 sys.exit(main()) 211