Home | History | Annotate | Download | only in site_compare
      1 #!/usr/bin/env python
      2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """SiteCompare component to handle bulk scrapes.
      7 
      8 Invokes a list of browsers and sends them to a list of URLs,
      9 saving the rendered results to a specified directory, then
     10 performs comparison operations on the resulting bitmaps and
     11 saves the results
     12 """
     13 
     14 
     15 # This line is necessary to work around a QEMU bug
     16 import _imaging
     17 
     18 import os            # Functions for walking the directory tree
     19 import types         # Runtime type-checking
     20 
     21 import command_line  # command-line parsing
     22 import drivers       # Functions for driving keyboard/mouse/windows, OS-specific
     23 import operators     # Functions that, given two bitmaps as input, produce
     24                      # output depending on the performance of an operation
     25 import scrapers      # Functions that know how to capture a render from
     26                      # particular browsers
     27 
     28 import commands.compare2  # compare one page in two versions of same browser
     29 import commands.maskmaker # generate a mask based on repeated scrapes
     30 import commands.measure   # measure length of time a page takes to load
     31 import commands.scrape    # scrape a URL or series of URLs to a bitmap
     32 
     33 # The timeload command is obsolete (too flaky); it may be reinstated
     34 # later but for now it's been superceded by "measure"
     35 # import commands.timeload  # measure length of time a page takes to load
     36 
     37 def Scrape(browsers, urls, window_size=(1024, 768),
     38            window_pos=(0, 0), timeout=20, save_path=None, **kwargs):
     39   """Invoke one or more browsers over one or more URLs, scraping renders.
     40 
     41   Args:
     42     browsers: browsers to invoke with optional version strings
     43     urls: URLs to visit
     44     window_size: size of the browser window to display
     45     window_pos: location of browser window
     46     timeout: time (in seconds) to wait for page to load
     47     save_path: root of save path, automatically appended with browser and
     48       version
     49     kwargs: miscellaneous keyword args, passed to scraper
     50   Returns:
     51     None
     52 
     53   @TODO(jhaas): more parameters, or perhaps an indefinite dictionary
     54   parameter, for things like length of time to wait for timeout, speed
     55   of mouse clicks, etc. Possibly on a per-browser, per-URL, or
     56   per-browser-per-URL basis
     57   """
     58 
     59   if type(browsers) in types.StringTypes: browsers = [browsers]
     60 
     61   if save_path is None:
     62     # default save path is "scrapes" off the current root
     63     save_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
     64 
     65   for browser in browsers:
     66     # Browsers should be tuples of (browser, version)
     67     if type(browser) in types.StringTypes: browser = (browser, None)
     68     scraper = scrapers.GetScraper(browser)
     69 
     70     full_path = os.path.join(save_path, browser[0], scraper.version)
     71     drivers.windowing.PreparePath(full_path)
     72 
     73     scraper.Scrape(urls, full_path, window_size, window_pos, timeout, kwargs)
     74 
     75 
     76 def Compare(base, compare, ops, root_path=None, out_path=None):
     77   """Compares a series of scrapes using a series of operators.
     78 
     79   Args:
     80     base: (browser, version) tuple of version to consider the baseline
     81     compare: (browser, version) tuple of version to compare to
     82     ops: list of operators plus operator arguments
     83     root_path: root of the scrapes
     84     out_path: place to put any output from the operators
     85 
     86   Returns:
     87     None
     88 
     89   @TODO(jhaas): this method will likely change, to provide a robust and
     90   well-defined way of chaining operators, applying operators conditionally,
     91   and full-featured scripting of the operator chain. There also needs
     92   to be better definition of the output; right now it's to stdout and
     93   a log.txt file, with operator-dependent images saved for error output
     94   """
     95   if root_path is None:
     96     # default save path is "scrapes" off the current root
     97     root_path = os.path.join(os.path.split(__file__)[0], "Scrapes")
     98 
     99   if out_path is None:
    100     out_path = os.path.join(os.path.split(__file__)[0], "Compares")
    101 
    102   if type(base) in types.StringTypes: base = (base, None)
    103   if type(compare) in types.StringTypes: compare = (compare, None)
    104   if type(ops) in types.StringTypes: ops = [ops]
    105 
    106   base_dir = os.path.join(root_path, base[0])
    107   compare_dir = os.path.join(root_path, compare[0])
    108 
    109   if base[1] is None:
    110     # base defaults to earliest capture
    111     base = (base[0], max(os.listdir(base_dir)))
    112 
    113   if compare[1] is None:
    114     # compare defaults to latest capture
    115     compare = (compare[0], min(os.listdir(compare_dir)))
    116 
    117   out_path = os.path.join(out_path, base[0], base[1], compare[0], compare[1])
    118   drivers.windowing.PreparePath(out_path)
    119 
    120   # TODO(jhaas): right now we're just dumping output to a log file
    121   # (and the console), which works as far as it goes but isn't nearly
    122   # robust enough. Change this after deciding exactly what we want to
    123   # change it to.
    124   out_file = open(os.path.join(out_path, "log.txt"), "w")
    125   description_string = ("Comparing %s %s to %s %s" %
    126                         (base[0], base[1], compare[0], compare[1]))
    127   out_file.write(description_string)
    128   print description_string
    129 
    130   base_dir = os.path.join(base_dir, base[1])
    131   compare_dir = os.path.join(compare_dir, compare[1])
    132 
    133   for filename in os.listdir(base_dir):
    134     out_file.write("%s: " % filename)
    135 
    136     if not os.path.isfile(os.path.join(compare_dir, filename)):
    137       out_file.write("Does not exist in target directory\n")
    138       print "File %s does not exist in target directory" % filename
    139       continue
    140 
    141     base_filename = os.path.join(base_dir, filename)
    142     compare_filename = os.path.join(compare_dir, filename)
    143 
    144     for op in ops:
    145       if type(op) in types.StringTypes: op = (op, None)
    146 
    147       module = operators.GetOperator(op[0])
    148 
    149       ret = module.Compare(base_filename, compare_filename)
    150       if ret is None:
    151         print "%s: OK" % (filename,)
    152         out_file.write("OK\n")
    153       else:
    154         print "%s: %s" % (filename, ret[0])
    155         out_file.write("%s\n" % (ret[0]))
    156         ret[1].save(os.path.join(out_path, filename))
    157 
    158   out_file.close()
    159 
    160 
    161 def main():
    162   """Main executable. Parse the command line and invoke the command."""
    163   cmdline = command_line.CommandLine()
    164 
    165   # The below two commands are currently unstable so have been disabled
    166   # commands.compare2.CreateCommand(cmdline)
    167   # commands.maskmaker.CreateCommand(cmdline)
    168   commands.measure.CreateCommand(cmdline)
    169   commands.scrape.CreateCommand(cmdline)
    170 
    171   cmdline.ParseCommandLine()
    172   return 0
    173 
    174 
    175 if __name__ == "__main__":
    176   sys.exit(main())
    177