Home | History | Annotate | Download | only in rebaseline_server
      1 #!/usr/bin/python
      2 
      3 """
      4 Copyright 2013 Google Inc.
      5 
      6 Use of this source code is governed by a BSD-style license that can be
      7 found in the LICENSE file.
      8 
      9 HTTP server for our HTML rebaseline viewer.
     10 """
     11 
     12 # System-level imports
     13 import argparse
     14 import BaseHTTPServer
     15 import json
     16 import logging
     17 import os
     18 import posixpath
     19 import re
     20 import shutil
     21 import socket
     22 import subprocess
     23 import sys
     24 import thread
     25 import threading
     26 import time
     27 import urlparse
     28 
     29 # Imports from within Skia
     30 #
     31 # We need to add the 'tools' directory, so that we can import svn.py within
     32 # that directory.
     33 # Make sure that the 'tools' dir is in the PYTHONPATH, but add it at the *end*
     34 # so any dirs that are already in the PYTHONPATH will be preferred.
     35 PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
     36 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
     37 TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
     38 if TOOLS_DIRECTORY not in sys.path:
     39   sys.path.append(TOOLS_DIRECTORY)
     40 import svn
     41 
     42 # Imports from local dir
     43 import results
     44 
     45 ACTUALS_SVN_REPO = 'http://skia-autogen.googlecode.com/svn/gm-actual'
     46 PATHSPLIT_RE = re.compile('/([^/]+)/(.+)')
     47 EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm')
     48 GENERATED_IMAGES_ROOT = os.path.join(PARENT_DIRECTORY, 'static',
     49                                      'generated-images')
     50 
     51 # A simple dictionary of file name extensions to MIME types. The empty string
     52 # entry is used as the default when no extension was given or if the extension
     53 # has no entry in this dictionary.
     54 MIME_TYPE_MAP = {'': 'application/octet-stream',
     55                  'html': 'text/html',
     56                  'css': 'text/css',
     57                  'png': 'image/png',
     58                  'js': 'application/javascript',
     59                  'json': 'application/json'
     60                  }
     61 
     62 DEFAULT_ACTUALS_DIR = '.gm-actuals'
     63 DEFAULT_PORT = 8888
     64 
     65 # How often (in seconds) clients should reload while waiting for initial
     66 # results to load.
     67 RELOAD_INTERVAL_UNTIL_READY = 10
     68 
     69 _HTTP_HEADER_CONTENT_LENGTH = 'Content-Length'
     70 _HTTP_HEADER_CONTENT_TYPE = 'Content-Type'
     71 
     72 _SERVER = None   # This gets filled in by main()
     73 
     74 
     75 def _run_command(args, directory):
     76   """Runs a command and returns stdout as a single string.
     77 
     78   Args:
     79     args: the command to run, as a list of arguments
     80     directory: directory within which to run the command
     81 
     82   Returns: stdout, as a string
     83 
     84   Raises an Exception if the command failed (exited with nonzero return code).
     85   """
     86   logging.debug('_run_command: %s in directory %s' % (args, directory))
     87   proc = subprocess.Popen(args, cwd=directory,
     88                           stdout=subprocess.PIPE,
     89                           stderr=subprocess.PIPE)
     90   (stdout, stderr) = proc.communicate()
     91   if proc.returncode is not 0:
     92     raise Exception('command "%s" failed in dir "%s": %s' %
     93                     (args, directory, stderr))
     94   return stdout
     95 
     96 
     97 def _get_routable_ip_address():
     98   """Returns routable IP address of this host (the IP address of its network
     99      interface that would be used for most traffic, not its localhost
    100      interface).  See http://stackoverflow.com/a/166589 """
    101   sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    102   sock.connect(('8.8.8.8', 80))
    103   host = sock.getsockname()[0]
    104   sock.close()
    105   return host
    106 
    107 
    108 def _create_svn_checkout(dir_path, repo_url):
    109   """Creates local checkout of an SVN repository at the specified directory
    110   path, returning an svn.Svn object referring to the local checkout.
    111 
    112   Args:
    113     dir_path: path to the local checkout; if this directory does not yet exist,
    114               it will be created and the repo will be checked out into it
    115     repo_url: URL of SVN repo to check out into dir_path (unless the local
    116               checkout already exists)
    117   Returns: an svn.Svn object referring to the local checkout.
    118   """
    119   local_checkout = svn.Svn(dir_path)
    120   if not os.path.isdir(dir_path):
    121     os.makedirs(dir_path)
    122     local_checkout.Checkout(repo_url, '.')
    123   return local_checkout
    124 
    125 
    126 class Server(object):
    127   """ HTTP server for our HTML rebaseline viewer. """
    128 
    129   def __init__(self,
    130                actuals_dir=DEFAULT_ACTUALS_DIR,
    131                port=DEFAULT_PORT, export=False, editable=True,
    132                reload_seconds=0):
    133     """
    134     Args:
    135       actuals_dir: directory under which we will check out the latest actual
    136                    GM results
    137       port: which TCP port to listen on for HTTP requests
    138       export: whether to allow HTTP clients on other hosts to access this server
    139       editable: whether HTTP clients are allowed to submit new baselines
    140       reload_seconds: polling interval with which to check for new results;
    141                       if 0, don't check for new results at all
    142     """
    143     self._actuals_dir = actuals_dir
    144     self._port = port
    145     self._export = export
    146     self._editable = editable
    147     self._reload_seconds = reload_seconds
    148     self._actuals_repo = _create_svn_checkout(
    149         dir_path=actuals_dir, repo_url=ACTUALS_SVN_REPO)
    150 
    151     # Reentrant lock that must be held whenever updating EITHER of:
    152     # 1. self._results
    153     # 2. the expected or actual results on local disk
    154     self.results_rlock = threading.RLock()
    155     # self._results will be filled in by calls to update_results()
    156     self._results = None
    157 
    158   @property
    159   def results(self):
    160     """ Returns the most recently generated results, or None if update_results()
    161     has not been called yet. """
    162     return self._results
    163 
    164   @property
    165   def is_exported(self):
    166     """ Returns true iff HTTP clients on other hosts are allowed to access
    167     this server. """
    168     return self._export
    169 
    170   @property
    171   def is_editable(self):
    172     """ Returns true iff HTTP clients are allowed to submit new baselines. """
    173     return self._editable
    174 
    175   @property
    176   def reload_seconds(self):
    177     """ Returns the result reload period in seconds, or 0 if we don't reload
    178     results. """
    179     return self._reload_seconds
    180 
    181   def update_results(self):
    182     """ Create or update self._results, based on the expectations in
    183     EXPECTATIONS_DIR and the latest actuals from skia-autogen.
    184 
    185     We hold self.results_rlock while we do this, to guarantee that no other
    186     thread attempts to update either self._results or the underlying files at
    187     the same time.
    188     """
    189     with self.results_rlock:
    190       logging.info('Updating actual GM results in %s from SVN repo %s ...' % (
    191           self._actuals_dir, ACTUALS_SVN_REPO))
    192       self._actuals_repo.Update('.')
    193 
    194       # We only update the expectations dir if the server was run with a
    195       # nonzero --reload argument; otherwise, we expect the user to maintain
    196       # her own expectations as she sees fit.
    197       #
    198       # Because the Skia repo is moving from SVN to git, and git does not
    199       # support updating a single directory tree, we have to update the entire
    200       # repo checkout.
    201       #
    202       # Because Skia uses depot_tools, we have to update using "gclient sync"
    203       # instead of raw git (or SVN) update.  Happily, this will work whether
    204       # the checkout was created using git or SVN.
    205       if self._reload_seconds:
    206         logging.info(
    207             'Updating expected GM results in %s by syncing Skia repo ...' %
    208             EXPECTATIONS_DIR)
    209         _run_command(['gclient', 'sync'], TRUNK_DIRECTORY)
    210 
    211       logging.info(
    212           ('Parsing results from actuals in %s and expectations in %s, '
    213            + 'and generating pixel diffs (may take a while) ...') % (
    214                self._actuals_dir, EXPECTATIONS_DIR))
    215       self._results = results.Results(
    216           actuals_root=self._actuals_dir,
    217           expected_root=EXPECTATIONS_DIR,
    218           generated_images_root=GENERATED_IMAGES_ROOT)
    219 
    220   def _result_loader(self, reload_seconds=0):
    221     """ Call self.update_results(), either once or periodically.
    222 
    223     Params:
    224       reload_seconds: integer; if nonzero, reload results at this interval
    225           (in which case, this method will never return!)
    226     """
    227     self.update_results()
    228     logging.info('Initial results loaded. Ready for requests on %s' % self._url)
    229     if reload_seconds:
    230       while True:
    231         time.sleep(reload_seconds)
    232         self.update_results()
    233 
    234   def run(self):
    235     arg_tuple = (self._reload_seconds,)  # start_new_thread needs a tuple,
    236                                          # even though it holds just one param
    237     thread.start_new_thread(self._result_loader, arg_tuple)
    238 
    239     if self._export:
    240       server_address = ('', self._port)
    241       host = _get_routable_ip_address()
    242       if self._editable:
    243         logging.warning('Running with combination of "export" and "editable" '
    244                         'flags.  Users on other machines will '
    245                         'be able to modify your GM expectations!')
    246     else:
    247       host = '127.0.0.1'
    248       server_address = (host, self._port)
    249     http_server = BaseHTTPServer.HTTPServer(server_address, HTTPRequestHandler)
    250     self._url = 'http://%s:%d' % (host, http_server.server_port)
    251     logging.info('Listening for requests on %s' % self._url)
    252     http_server.serve_forever()
    253 
    254 
    255 class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    256   """ HTTP request handlers for various types of queries this server knows
    257       how to handle (static HTML and Javascript, expected/actual results, etc.)
    258   """
    259   def do_GET(self):
    260     """ Handles all GET requests, forwarding them to the appropriate
    261         do_GET_* dispatcher. """
    262     if self.path == '' or self.path == '/' or self.path == '/index.html' :
    263       self.redirect_to('/static/index.html')
    264       return
    265     if self.path == '/favicon.ico' :
    266       self.redirect_to('/static/favicon.ico')
    267       return
    268 
    269     # All requests must be of this form:
    270     #   /dispatcher/remainder
    271     # where 'dispatcher' indicates which do_GET_* dispatcher to run
    272     # and 'remainder' is the remaining path sent to that dispatcher.
    273     normpath = posixpath.normpath(self.path)
    274     (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
    275     dispatchers = {
    276       'results': self.do_GET_results,
    277       'static': self.do_GET_static,
    278     }
    279     dispatcher = dispatchers[dispatcher_name]
    280     dispatcher(remainder)
    281 
    282   def do_GET_results(self, type):
    283     """ Handle a GET request for GM results.
    284 
    285     Args:
    286       type: string indicating which set of results to return;
    287             must be one of the results.RESULTS_* constants
    288     """
    289     logging.debug('do_GET_results: sending results of type "%s"' % type)
    290     try:
    291       # Since we must make multiple calls to the Results object, grab a
    292       # reference to it in case it is updated to point at a new Results
    293       # object within another thread.
    294       #
    295       # TODO(epoger): Rather than using a global variable for the handler
    296       # to refer to the Server object, make Server a subclass of
    297       # HTTPServer, and then it could be available to the handler via
    298       # the handler's .server instance variable.
    299       results_obj = _SERVER.results
    300       if results_obj:
    301         response_dict = self.package_results(results_obj, type)
    302       else:
    303         now = int(time.time())
    304         response_dict = {
    305             'header': {
    306                 'resultsStillLoading': True,
    307                 'timeUpdated': now,
    308                 'timeNextUpdateAvailable': now + RELOAD_INTERVAL_UNTIL_READY,
    309             },
    310         }
    311       self.send_json_dict(response_dict)
    312     except:
    313       self.send_error(404)
    314       raise
    315 
    316   def package_results(self, results_obj, type):
    317     """ Given a nonempty "results" object, package it as a response_dict
    318     as needed within do_GET_results.
    319 
    320     Args:
    321       results_obj: nonempty "results" object
    322       type: string indicating which set of results to return;
    323             must be one of the results.RESULTS_* constants
    324     """
    325     response_dict = results_obj.get_results_of_type(type)
    326     time_updated = results_obj.get_timestamp()
    327     response_dict['header'] = {
    328         # Timestamps:
    329         # 1. when this data was last updated
    330         # 2. when the caller should check back for new data (if ever)
    331         #
    332         # We only return these timestamps if the --reload argument was passed;
    333         # otherwise, we have no idea when the expectations were last updated
    334         # (we allow the user to maintain her own expectations as she sees fit).
    335         'timeUpdated': time_updated if _SERVER.reload_seconds else None,
    336         'timeNextUpdateAvailable': (
    337             (time_updated+_SERVER.reload_seconds) if _SERVER.reload_seconds
    338             else None),
    339 
    340         # The type we passed to get_results_of_type()
    341         'type': type,
    342 
    343         # Hash of testData, which the client must return with any edits--
    344         # this ensures that the edits were made to a particular dataset.
    345         'dataHash': str(hash(repr(response_dict['testData']))),
    346 
    347         # Whether the server will accept edits back.
    348         'isEditable': _SERVER.is_editable,
    349 
    350         # Whether the service is accessible from other hosts.
    351         'isExported': _SERVER.is_exported,
    352     }
    353     return response_dict
    354 
    355   def do_GET_static(self, path):
    356     """ Handle a GET request for a file under the 'static' directory.
    357     Only allow serving of files within the 'static' directory that is a
    358     filesystem sibling of this script.
    359 
    360     Args:
    361       path: path to file (under static directory) to retrieve
    362     """
    363     # Strip arguments ('?resultsToLoad=all') from the path
    364     path = urlparse.urlparse(path).path
    365 
    366     logging.debug('do_GET_static: sending file "%s"' % path)
    367     static_dir = os.path.realpath(os.path.join(PARENT_DIRECTORY, 'static'))
    368     full_path = os.path.realpath(os.path.join(static_dir, path))
    369     if full_path.startswith(static_dir):
    370       self.send_file(full_path)
    371     else:
    372       logging.error(
    373           'Attempted do_GET_static() of path [%s] outside of static dir [%s]'
    374           % (full_path, static_dir))
    375       self.send_error(404)
    376 
    377   def do_POST(self):
    378     """ Handles all POST requests, forwarding them to the appropriate
    379         do_POST_* dispatcher. """
    380     # All requests must be of this form:
    381     #   /dispatcher
    382     # where 'dispatcher' indicates which do_POST_* dispatcher to run.
    383     normpath = posixpath.normpath(self.path)
    384     dispatchers = {
    385       '/edits': self.do_POST_edits,
    386     }
    387     try:
    388       dispatcher = dispatchers[normpath]
    389       dispatcher()
    390       self.send_response(200)
    391     except:
    392       self.send_error(404)
    393       raise
    394 
    395   def do_POST_edits(self):
    396     """ Handle a POST request with modifications to GM expectations, in this
    397     format:
    398 
    399     {
    400       'oldResultsType': 'all',    # type of results that the client loaded
    401                                   # and then made modifications to
    402       'oldResultsHash': 39850913, # hash of results when the client loaded them
    403                                   # (ensures that the client and server apply
    404                                   # modifications to the same base)
    405       'modifications': [
    406         {
    407           'builder': 'Test-Android-Nexus10-MaliT604-Arm7-Debug',
    408           'test': 'strokerect',
    409           'config': 'gpu',
    410           'expectedHashType': 'bitmap-64bitMD5',
    411           'expectedHashDigest': '1707359671708613629',
    412         },
    413         ...
    414       ],
    415     }
    416 
    417     Raises an Exception if there were any problems.
    418     """
    419     if not _SERVER.is_editable:
    420       raise Exception('this server is not running in --editable mode')
    421 
    422     content_type = self.headers[_HTTP_HEADER_CONTENT_TYPE]
    423     if content_type != 'application/json;charset=UTF-8':
    424       raise Exception('unsupported %s [%s]' % (
    425           _HTTP_HEADER_CONTENT_TYPE, content_type))
    426 
    427     content_length = int(self.headers[_HTTP_HEADER_CONTENT_LENGTH])
    428     json_data = self.rfile.read(content_length)
    429     data = json.loads(json_data)
    430     logging.debug('do_POST_edits: received new GM expectations data [%s]' %
    431                   data)
    432 
    433     # Update the results on disk with the information we received from the
    434     # client.
    435     # We must hold _SERVER.results_rlock while we do this, to guarantee that
    436     # no other thread updates expectations (from the Skia repo) while we are
    437     # updating them (using the info we received from the client).
    438     with _SERVER.results_rlock:
    439       oldResultsType = data['oldResultsType']
    440       oldResults = _SERVER.results.get_results_of_type(oldResultsType)
    441       oldResultsHash = str(hash(repr(oldResults['testData'])))
    442       if oldResultsHash != data['oldResultsHash']:
    443         raise Exception('results of type "%s" changed while the client was '
    444                         'making modifications. The client should reload the '
    445                         'results and submit the modifications again.' %
    446                         oldResultsType)
    447       _SERVER.results.edit_expectations(data['modifications'])
    448       # Read the updated results back from disk.
    449       _SERVER.update_results()
    450 
    451   def redirect_to(self, url):
    452     """ Redirect the HTTP client to a different url.
    453 
    454     Args:
    455       url: URL to redirect the HTTP client to
    456     """
    457     self.send_response(301)
    458     self.send_header('Location', url)
    459     self.end_headers()
    460 
    461   def send_file(self, path):
    462     """ Send the contents of the file at this path, with a mimetype based
    463         on the filename extension.
    464 
    465     Args:
    466       path: path of file whose contents to send to the HTTP client
    467     """
    468     # Grab the extension if there is one
    469     extension = os.path.splitext(path)[1]
    470     if len(extension) >= 1:
    471       extension = extension[1:]
    472 
    473     # Determine the MIME type of the file from its extension
    474     mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
    475 
    476     # Open the file and send it over HTTP
    477     if os.path.isfile(path):
    478       with open(path, 'rb') as sending_file:
    479         self.send_response(200)
    480         self.send_header('Content-type', mime_type)
    481         self.end_headers()
    482         self.wfile.write(sending_file.read())
    483     else:
    484       self.send_error(404)
    485 
    486   def send_json_dict(self, json_dict):
    487     """ Send the contents of this dictionary in JSON format, with a JSON
    488         mimetype.
    489 
    490     Args:
    491       json_dict: dictionary to send
    492     """
    493     self.send_response(200)
    494     self.send_header('Content-type', 'application/json')
    495     self.end_headers()
    496     json.dump(json_dict, self.wfile)
    497 
    498 
    499 def main():
    500   logging.basicConfig(level=logging.INFO)
    501   parser = argparse.ArgumentParser()
    502   parser.add_argument('--actuals-dir',
    503                     help=('Directory into which we will check out the latest '
    504                           'actual GM results. If this directory does not '
    505                           'exist, it will be created. Defaults to %(default)s'),
    506                     default=DEFAULT_ACTUALS_DIR)
    507   parser.add_argument('--editable', action='store_true',
    508                       help=('Allow HTTP clients to submit new baselines.'))
    509   parser.add_argument('--export', action='store_true',
    510                       help=('Instead of only allowing access from HTTP clients '
    511                             'on localhost, allow HTTP clients on other hosts '
    512                             'to access this server.  WARNING: doing so will '
    513                             'allow users on other hosts to modify your '
    514                             'GM expectations, if combined with --editable.'))
    515   parser.add_argument('--port', type=int,
    516                       help=('Which TCP port to listen on for HTTP requests; '
    517                             'defaults to %(default)s'),
    518                       default=DEFAULT_PORT)
    519   parser.add_argument('--reload', type=int,
    520                       help=('How often (a period in seconds) to update the '
    521                             'results.  If specified, both expected and actual '
    522                             'results will be updated by running "gclient sync" '
    523                             'on your Skia checkout as a whole.  '
    524                             'By default, we do not reload at all, and you '
    525                             'must restart the server to pick up new data.'),
    526                       default=0)
    527   args = parser.parse_args()
    528   global _SERVER
    529   _SERVER = Server(actuals_dir=args.actuals_dir,
    530                    port=args.port, export=args.export, editable=args.editable,
    531                    reload_seconds=args.reload)
    532   _SERVER.run()
    533 
    534 
    535 if __name__ == '__main__':
    536   main()
    537