Home | History | Annotate | Download | only in ie
      1 #!/usr/bin/env python
      2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Does scraping for all known versions of IE."""
      7 
      8 import pywintypes
      9 import time
     10 import types
     11 
     12 from drivers import keyboard
     13 from drivers import mouse
     14 from drivers import windowing
     15 
     16 # Default version
     17 version = "7.0.5730.1"
     18 
     19 DEFAULT_PATH = r"c:\program files\internet explorer\iexplore.exe"
     20 
     21 def GetBrowser(path):
     22   """Invoke the IE browser and return the process, frame, and content window.
     23 
     24   Args:
     25     path: full path to browser
     26 
     27   Returns:
     28     A tuple of (process handle, render pane)
     29   """
     30   if not path: path = DEFAULT_PATH
     31 
     32   (iewnd, ieproc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
     33   return (ieproc, iewnd, render_pane)
     34 
     35 
     36 def InvokeBrowser(path):
     37   """Invoke the IE browser.
     38 
     39   Args:
     40     path: full path to browser
     41 
     42   Returns:
     43     A tuple of (main window, process handle, address bar,
     44                 render_pane, tab_window)
     45   """
     46   # Invoke IE
     47   (ieproc, iewnd) = windowing.InvokeAndWait(path)
     48 
     49   # Get windows we'll need
     50   for tries in xrange(10):
     51     try:
     52       address_bar = windowing.FindChildWindow(
     53         iewnd, "WorkerW|Navigation Bar/ReBarWindow32/"
     54         "Address Band Root/ComboBoxEx32/ComboBox/Edit")
     55       render_pane = windowing.FindChildWindow(
     56         iewnd, "TabWindowClass/Shell DocObject View")
     57       tab_window = windowing.FindChildWindow(
     58         iewnd, "CommandBarClass/ReBarWindow32/TabBandClass/DirectUIHWND")
     59     except IndexError:
     60       time.sleep(1)
     61       continue
     62     break
     63 
     64   return (iewnd, ieproc, address_bar, render_pane, tab_window)
     65 
     66 
     67 def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
     68   """Invoke a browser, send it to a series of URLs, and save its output.
     69 
     70   Args:
     71     urls: list of URLs to scrape
     72     outdir: directory to place output
     73     size: size of browser window to use
     74     pos: position of browser window
     75     timeout: amount of time to wait for page to load
     76     kwargs: miscellaneous keyword args
     77 
     78   Returns:
     79     None if success, else an error string
     80   """
     81   path = r"c:\program files\internet explorer\iexplore.exe"
     82 
     83   if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
     84 
     85   (iewnd, ieproc, address_bar, render_pane, tab_window) = (
     86     InvokeBrowser(path) )
     87 
     88   # Resize and reposition the frame
     89   windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane)
     90 
     91   # Visit each URL we're given
     92   if type(urls) in types.StringTypes: urls = [urls]
     93 
     94   timedout = False
     95 
     96   for url in urls:
     97 
     98     # Double-click in the address bar, type the name, and press Enter
     99     mouse.DoubleClickInWindow(address_bar)
    100     keyboard.TypeString(url)
    101     keyboard.TypeString("\n")
    102 
    103     # Wait for the page to finish loading
    104     load_time = windowing.WaitForThrobber(
    105       tab_window, (6, 8, 22, 24), timeout)
    106     timedout = load_time < 0
    107 
    108     if timedout:
    109       break
    110 
    111     # Scrape the page
    112     image = windowing.ScrapeWindow(render_pane)
    113 
    114     # Save to disk
    115     if "filename" in kwargs:
    116       if callable(kwargs["filename"]):
    117         filename = kwargs["filename"](url)
    118       else:
    119         filename = kwargs["filename"]
    120     else:
    121       filename = windowing.URLtoFilename(url, outdir, ".bmp")
    122     image.save(filename)
    123 
    124   windowing.EndProcess(ieproc)
    125 
    126   if timedout:
    127     return "timeout"
    128 
    129 
    130 def Time(urls, size, timeout, **kwargs):
    131   """Measure how long it takes to load each of a series of URLs
    132 
    133   Args:
    134     urls: list of URLs to time
    135     size: size of browser window to use
    136     timeout: amount of time to wait for page to load
    137     kwargs: miscellaneous keyword args
    138 
    139   Returns:
    140     A list of tuples (url, time). "time" can be "crashed" or "timeout"
    141   """
    142   if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
    143   else: path = DEFAULT_PATH
    144   proc = None
    145 
    146   # Visit each URL we're given
    147   if type(urls) in types.StringTypes: urls = [urls]
    148 
    149   ret = []
    150   for url in urls:
    151     try:
    152       # Invoke the browser if necessary
    153       if not proc:
    154         (wnd, proc, address_bar, render_pane, tab_window) = InvokeBrowser(path)
    155 
    156         # Resize and reposition the frame
    157         windowing.MoveAndSizeWindow(wnd, (0,0), size, render_pane)
    158 
    159       # Double-click in the address bar, type the name, and press Enter
    160       mouse.DoubleClickInWindow(address_bar)
    161       keyboard.TypeString(url)
    162       keyboard.TypeString("\n")
    163 
    164       # Wait for the page to finish loading
    165       load_time = windowing.WaitForThrobber(
    166         tab_window, (6, 8, 22, 24), timeout)
    167       timedout = load_time < 0
    168 
    169       if timedout:
    170         load_time = "timeout"
    171 
    172         # Send an alt-F4 to make the browser close; if this times out,
    173         # we've probably got a crash
    174         keyboard.TypeString(r"{\4}", use_modifiers=True)
    175         if not windowing.WaitForProcessExit(proc, timeout):
    176           windowing.EndProcess(proc)
    177           load_time = "crashed"
    178         proc = None
    179     except pywintypes.error:
    180       load_time = "crashed"
    181       proc = None
    182 
    183     ret.append( (url, load_time) )
    184 
    185   # Send an alt-F4 to make the browser close; if this times out,
    186   # we've probably got a crash
    187   if proc:
    188     keyboard.TypeString(r"{\4}", use_modifiers=True)
    189     if not windowing.WaitForProcessExit(proc, timeout):
    190       windowing.EndProcess(proc)
    191 
    192   return ret
    193 
    194 
    195 def main():
    196   # We're being invoked rather than imported, so run some tests
    197   path = r"c:\sitecompare\scrapes\ie7\7.0.5380.11"
    198   windowing.PreparePath(path)
    199 
    200   # Scrape three sites and save the results
    201   Scrape(
    202     ["http://www.microsoft.com",
    203      "http://www.google.com",
    204      "http://www.sun.com"],
    205     path, (1024, 768), (0, 0))
    206   return 0
    207 
    208 
    209 if __name__ == "__main__":
    210   sys.exit(main())
    211