Home | History | Annotate | Download | only in pyautolib
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Chrome remote inspector utility for pyauto tests.
      7 
      8 This script provides a python interface that acts as a front-end for Chrome's
      9 remote inspector module, communicating via sockets to interact with Chrome in
     10 the same way that the Developer Tools does.  This -- in theory -- should allow
     11 a pyauto test to do anything that Chrome's Developer Tools does, as long as the
     12 appropriate communication with the remote inspector is implemented in this
     13 script.
     14 
     15 This script assumes that Chrome is already running on the local machine with
     16 flag '--remote-debugging-port=9222' to enable remote debugging on port 9222.
     17 
     18 To use this module, first create an instance of class RemoteInspectorClient;
     19 doing this sets up a connection to Chrome's remote inspector.  Then call the
     20 appropriate functions on that object to perform the desired actions with the
     21 remote inspector.  When done, call Stop() on the RemoteInspectorClient object
     22 to stop communication with the remote inspector.
     23 
     24 For example, to take v8 heap snapshots from a pyauto test:
     25 
     26 import remote_inspector_client
     27 my_client = remote_inspector_client.RemoteInspectorClient()
     28 snapshot_info = my_client.HeapSnapshot(include_summary=True)
     29 // Do some stuff...
     30 new_snapshot_info = my_client.HeapSnapshot(include_summary=True)
     31 my_client.Stop()
     32 
     33 It is expected that a test will only use one instance of RemoteInspectorClient
     34 at a time.  If a second instance is instantiated, a RuntimeError will be raised.
     35 RemoteInspectorClient could be made into a singleton in the future if the need
     36 for it arises.
     37 """
     38 
     39 import asyncore
     40 import datetime
     41 import logging
     42 import optparse
     43 import pprint
     44 import re
     45 import simplejson
     46 import socket
     47 import sys
     48 import threading
     49 import time
     50 import urllib2
     51 import urlparse
     52 
     53 
     54 class _DevToolsSocketRequest(object):
     55   """A representation of a single DevToolsSocket request.
     56 
     57   A DevToolsSocket request is used for communication with a remote Chrome
     58   instance when interacting with the renderer process of a given webpage.
     59   Requests and results are passed as specially-formatted JSON messages,
     60   according to a communication protocol defined in WebKit.  The string
     61   representation of this request will be a JSON message that is properly
     62   formatted according to the communication protocol.
     63 
     64   Public Attributes:
     65     method: The string method name associated with this request.
     66     id: A unique integer id associated with this request.
     67     params: A dictionary of input parameters associated with this request.
     68     results: A dictionary of relevant results obtained from the remote Chrome
     69         instance that are associated with this request.
     70     is_fulfilled: A boolean indicating whether or not this request has been sent
     71         and all relevant results for it have been obtained (i.e., this value is
     72         True only if all results for this request are known).
     73     is_fulfilled_condition: A threading.Condition for waiting for the request to
     74         be fulfilled.
     75   """
     76 
     77   def __init__(self, method, params, message_id):
     78     """Initialize.
     79 
     80     Args:
     81       method: The string method name for this request.
     82       message_id: An integer id for this request, which is assumed to be unique
     83           from among all requests.
     84     """
     85     self.method = method
     86     self.id = message_id
     87     self.params = params
     88     self.results = {}
     89     self.is_fulfilled = False
     90     self.is_fulfilled_condition = threading.Condition()
     91 
     92   def __repr__(self):
     93     json_dict = {}
     94     json_dict['method'] = self.method
     95     json_dict['id'] = self.id
     96     if self.params:
     97       json_dict['params'] = self.params
     98     return simplejson.dumps(json_dict, separators=(',', ':'))
     99 
    100 
    101 class _DevToolsSocketClient(asyncore.dispatcher):
    102   """Client that communicates with a remote Chrome instance via sockets.
    103 
    104   This class works in conjunction with the _RemoteInspectorThread class to
    105   communicate with a remote Chrome instance following the remote debugging
    106   communication protocol in WebKit.  This class performs the lower-level work
    107   of socket communication.
    108 
    109   Public Attributes:
    110     handshake_done: A boolean indicating whether or not the client has completed
    111         the required protocol handshake with the remote Chrome instance.
    112     inspector_thread: An instance of the _RemoteInspectorThread class that is
    113         working together with this class to communicate with a remote Chrome
    114         instance.
    115   """
    116 
    117   def __init__(self, verbose, show_socket_messages, hostname, port, path):
    118     """Initialize.
    119 
    120     Args:
    121       verbose: A boolean indicating whether or not to use verbose logging.
    122       show_socket_messages: A boolean indicating whether or not to show the
    123           socket messages sent/received when communicating with the remote
    124           Chrome instance.
    125       hostname: The string hostname of the DevToolsSocket to which to connect.
    126       port: The integer port number of the DevToolsSocket to which to connect.
    127       path: The string path of the DevToolsSocket to which to connect.
    128     """
    129     asyncore.dispatcher.__init__(self)
    130 
    131     self._logger = logging.getLogger('_DevToolsSocketClient')
    132     self._logger.setLevel([logging.WARNING, logging.DEBUG][verbose])
    133 
    134     self._show_socket_messages = show_socket_messages
    135 
    136     self._read_buffer = ''
    137     self._write_buffer = ''
    138 
    139     self._socket_buffer_lock = threading.Lock()
    140 
    141     self.handshake_done = False
    142     self.inspector_thread = None
    143 
    144     # Connect to the remote Chrome instance and initiate the protocol handshake.
    145     self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
    146     self.connect((hostname, port))
    147 
    148     fields = [
    149       'Upgrade: WebSocket',
    150       'Connection: Upgrade',
    151       'Host: %s:%d' % (hostname, port),
    152       'Origin: http://%s:%d' % (hostname, port),
    153       'Sec-WebSocket-Key1: 4k0L66E ZU 8  5  <18 <TK 7   7',
    154       'Sec-WebSocket-Key2: s2  20 `# 4|  3 9   U_ 1299',
    155     ]
    156     handshake_msg = ('GET %s HTTP/1.1\r\n%s\r\n\r\n\x47\x30\x22\x2D\x5A\x3F'
    157                      '\x47\x58' % (path, '\r\n'.join(fields)))
    158     self._Write(handshake_msg.encode('utf-8'))
    159 
    160   def SendMessage(self, msg):
    161     """Causes a request message to be sent to the remote Chrome instance.
    162 
    163     Args:
    164       msg: A string message to be sent; assumed to be a JSON message in proper
    165           format according to the remote debugging protocol in WebKit.
    166     """
    167     # According to the communication protocol, each request message sent over
    168     # the wire must begin with '\x00' and end with '\xff'.
    169     self._Write('\x00' + msg.encode('utf-8') + '\xff')
    170 
    171   def _Write(self, msg):
    172     """Causes a raw message to be sent to the remote Chrome instance.
    173 
    174     Args:
    175       msg: A raw string message to be sent.
    176     """
    177     self._write_buffer += msg
    178     self.handle_write()
    179 
    180   def handle_write(self):
    181     """Called if a writable socket can be written; overridden from asyncore."""
    182     self._socket_buffer_lock.acquire()
    183     if self._write_buffer:
    184       sent = self.send(self._write_buffer)
    185       if self._show_socket_messages:
    186         msg_type = ['Handshake', 'Message'][self._write_buffer[0] == '\x00' and
    187                                             self._write_buffer[-1] == '\xff']
    188         msg = ('========================\n'
    189                'Sent %s:\n'
    190                '========================\n'
    191                '%s\n'
    192                '========================') % (msg_type,
    193                                               self._write_buffer[:sent-1])
    194         print msg
    195       self._write_buffer = self._write_buffer[sent:]
    196     self._socket_buffer_lock.release()
    197 
    198   def handle_read(self):
    199     """Called when a socket can be read; overridden from asyncore."""
    200     self._socket_buffer_lock.acquire()
    201     if self.handshake_done:
    202       # Process a message reply from the remote Chrome instance.
    203       self._read_buffer += self.recv(4096)
    204       pos = self._read_buffer.find('\xff')
    205       while pos >= 0:
    206         pos += len('\xff')
    207         data = self._read_buffer[:pos-len('\xff')]
    208         pos2 = data.find('\x00')
    209         if pos2 >= 0:
    210           data = data[pos2 + 1:]
    211         self._read_buffer = self._read_buffer[pos:]
    212         if self._show_socket_messages:
    213           msg = ('========================\n'
    214                  'Received Message:\n'
    215                  '========================\n'
    216                  '%s\n'
    217                  '========================') % data
    218           print msg
    219         if self.inspector_thread:
    220           self.inspector_thread.NotifyReply(data)
    221         pos = self._read_buffer.find('\xff')
    222     else:
    223       # Process a handshake reply from the remote Chrome instance.
    224       self._read_buffer += self.recv(4096)
    225       pos = self._read_buffer.find('\r\n\r\n')
    226       if pos >= 0:
    227         pos += len('\r\n\r\n')
    228         data = self._read_buffer[:pos]
    229         self._read_buffer = self._read_buffer[pos:]
    230         self.handshake_done = True
    231         if self._show_socket_messages:
    232           msg = ('=========================\n'
    233                  'Received Handshake Reply:\n'
    234                  '=========================\n'
    235                  '%s\n'
    236                  '=========================') % data
    237           print msg
    238     self._socket_buffer_lock.release()
    239 
    240   def handle_close(self):
    241     """Called when the socket is closed; overridden from asyncore."""
    242     if self._show_socket_messages:
    243       msg = ('=========================\n'
    244              'Socket closed.\n'
    245              '=========================')
    246       print msg
    247     self.close()
    248 
    249   def writable(self):
    250     """Determines if writes can occur for this socket; overridden from asyncore.
    251 
    252     Returns:
    253       True, if there is something to write to the socket, or
    254       False, otherwise.
    255     """
    256     return len(self._write_buffer) > 0
    257 
    258   def handle_expt(self):
    259     """Called when out-of-band data exists; overridden from asyncore."""
    260     self.handle_error()
    261 
    262   def handle_error(self):
    263     """Called when an exception is raised; overridden from asyncore."""
    264     if self._show_socket_messages:
    265       msg = ('=========================\n'
    266              'Socket error.\n'
    267              '=========================')
    268       print msg
    269     self.close()
    270     self.inspector_thread.ClientSocketExceptionOccurred()
    271     asyncore.dispatcher.handle_error(self)
    272 
    273 
    274 class _RemoteInspectorThread(threading.Thread):
    275   """Manages communication using Chrome's remote inspector protocol.
    276 
    277   This class works in conjunction with the _DevToolsSocketClient class to
    278   communicate with a remote Chrome instance following the remote inspector
    279   communication protocol in WebKit.  This class performs the higher-level work
    280   of managing request and reply messages, whereas _DevToolsSocketClient handles
    281   the lower-level work of socket communication.
    282   """
    283 
    284   def __init__(self, url, tab_index, tab_filter, verbose, show_socket_messages,
    285                agent_name):
    286     """Initialize.
    287 
    288     Args:
    289       url: The base URL to connent to.
    290       tab_index: The integer index of the tab in the remote Chrome instance to
    291           use for snapshotting.
    292       tab_filter: When specified, is run over tabs of the remote Chrome
    293           instances to choose which one to connect to.
    294       verbose: A boolean indicating whether or not to use verbose logging.
    295       show_socket_messages: A boolean indicating whether or not to show the
    296           socket messages sent/received when communicating with the remote
    297           Chrome instance.
    298     """
    299     threading.Thread.__init__(self)
    300     self._logger = logging.getLogger('_RemoteInspectorThread')
    301     self._logger.setLevel([logging.WARNING, logging.DEBUG][verbose])
    302 
    303     self._killed = False
    304     self._requests = []
    305     self._action_queue = []
    306     self._action_queue_condition = threading.Condition()
    307     self._action_specific_callback = None  # Callback only for current action.
    308     self._action_specific_callback_lock = threading.Lock()
    309     self._general_callbacks = []  # General callbacks that can be long-lived.
    310     self._general_callbacks_lock = threading.Lock()
    311     self._condition_to_wait = None
    312     self._agent_name = agent_name
    313 
    314     # Create a DevToolsSocket client and wait for it to complete the remote
    315     # debugging protocol handshake with the remote Chrome instance.
    316     result = self._IdentifyDevToolsSocketConnectionInfo(
    317         url, tab_index, tab_filter)
    318     self._client = _DevToolsSocketClient(
    319         verbose, show_socket_messages, result['host'], result['port'],
    320         result['path'])
    321     self._client.inspector_thread = self
    322     while asyncore.socket_map:
    323       if self._client.handshake_done or self._killed:
    324         break
    325       asyncore.loop(timeout=1, count=1, use_poll=True)
    326 
    327   def ClientSocketExceptionOccurred(self):
    328     """Notifies that the _DevToolsSocketClient encountered an exception."""
    329     self.Kill()
    330 
    331   def NotifyReply(self, msg):
    332     """Notifies of a reply message received from the remote Chrome instance.
    333 
    334     Args:
    335       msg: A string reply message received from the remote Chrome instance;
    336            assumed to be a JSON message formatted according to the remote
    337            debugging communication protocol in WebKit.
    338     """
    339     reply_dict = simplejson.loads(msg)
    340 
    341     # Notify callbacks of this message received from the remote inspector.
    342     self._action_specific_callback_lock.acquire()
    343     if self._action_specific_callback:
    344       self._action_specific_callback(reply_dict)
    345     self._action_specific_callback_lock.release()
    346 
    347     self._general_callbacks_lock.acquire()
    348     if self._general_callbacks:
    349       for callback in self._general_callbacks:
    350         callback(reply_dict)
    351     self._general_callbacks_lock.release()
    352 
    353     if 'result' in reply_dict:
    354       # This is the result message associated with a previously-sent request.
    355       request = self.GetRequestWithId(reply_dict['id'])
    356       if request:
    357         request.is_fulfilled_condition.acquire()
    358         request.is_fulfilled_condition.notify()
    359         request.is_fulfilled_condition.release()
    360 
    361   def run(self):
    362     """Start this thread; overridden from threading.Thread."""
    363     while not self._killed:
    364       self._action_queue_condition.acquire()
    365       if self._action_queue:
    366         # There's a request to the remote inspector that needs to be processed.
    367         messages, callback = self._action_queue.pop(0)
    368         self._action_specific_callback_lock.acquire()
    369         self._action_specific_callback = callback
    370         self._action_specific_callback_lock.release()
    371 
    372         # Prepare the request list.
    373         for message_id, message in enumerate(messages):
    374           self._requests.append(
    375               _DevToolsSocketRequest(message[0], message[1], message_id))
    376 
    377         # Send out each request.  Wait until each request is complete before
    378         # sending the next request.
    379         for request in self._requests:
    380           self._FillInParams(request)
    381           self._client.SendMessage(str(request))
    382 
    383           request.is_fulfilled_condition.acquire()
    384           self._condition_to_wait = request.is_fulfilled_condition
    385           request.is_fulfilled_condition.wait()
    386           request.is_fulfilled_condition.release()
    387 
    388           if self._killed:
    389             self._client.close()
    390             return
    391 
    392         # Clean up so things are ready for the next request.
    393         self._requests = []
    394 
    395         self._action_specific_callback_lock.acquire()
    396         self._action_specific_callback = None
    397         self._action_specific_callback_lock.release()
    398 
    399       # Wait until there is something to process.
    400       self._condition_to_wait = self._action_queue_condition
    401       self._action_queue_condition.wait()
    402       self._action_queue_condition.release()
    403     self._client.close()
    404 
    405   def Kill(self):
    406     """Notify this thread that it should stop executing."""
    407     self._killed = True
    408     # The thread might be waiting on a condition.
    409     if self._condition_to_wait:
    410       self._condition_to_wait.acquire()
    411       self._condition_to_wait.notify()
    412       self._condition_to_wait.release()
    413 
    414   def PerformAction(self, request_messages, reply_message_callback):
    415     """Notify this thread of an action to perform using the remote inspector.
    416 
    417     Args:
    418       request_messages: A list of strings representing the requests to make
    419           using the remote inspector.
    420       reply_message_callback: A callable to be invoked any time a message is
    421           received from the remote inspector while the current action is
    422           being performed.  The callable should accept a single argument,
    423           which is a dictionary representing a message received.
    424     """
    425     self._action_queue_condition.acquire()
    426     self._action_queue.append((request_messages, reply_message_callback))
    427     self._action_queue_condition.notify()
    428     self._action_queue_condition.release()
    429 
    430   def AddMessageCallback(self, callback):
    431     """Add a callback to invoke for messages received from the remote inspector.
    432 
    433     Args:
    434       callback: A callable to be invoked any time a message is received from the
    435           remote inspector.  The callable should accept a single argument, which
    436           is a dictionary representing a message received.
    437     """
    438     self._general_callbacks_lock.acquire()
    439     self._general_callbacks.append(callback)
    440     self._general_callbacks_lock.release()
    441 
    442   def RemoveMessageCallback(self, callback):
    443     """Remove a callback from the set of those to invoke for messages received.
    444 
    445     Args:
    446       callback: A callable to remove from consideration.
    447     """
    448     self._general_callbacks_lock.acquire()
    449     self._general_callbacks.remove(callback)
    450     self._general_callbacks_lock.release()
    451 
    452   def GetRequestWithId(self, request_id):
    453     """Identifies the request with the specified id.
    454 
    455     Args:
    456       request_id: An integer request id; should be unique for each request.
    457 
    458     Returns:
    459       A request object associated with the given id if found, or
    460       None otherwise.
    461     """
    462     found_request = [x for x in self._requests if x.id == request_id]
    463     if found_request:
    464       return found_request[0]
    465     return None
    466 
    467   def GetFirstUnfulfilledRequest(self, method):
    468     """Identifies the first unfulfilled request with the given method name.
    469 
    470     An unfulfilled request is one for which all relevant reply messages have
    471     not yet been received from the remote inspector.
    472 
    473     Args:
    474       method: The string method name of the request for which to search.
    475 
    476     Returns:
    477       The first request object in the request list that is not yet fulfilled
    478       and is also associated with the given method name, or
    479       None if no such request object can be found.
    480     """
    481     for request in self._requests:
    482       if not request.is_fulfilled and request.method == method:
    483         return request
    484     return None
    485 
    486   def _GetLatestRequestOfType(self, ref_req, method):
    487     """Identifies the latest specified request before a reference request.
    488 
    489     This function finds the latest request with the specified method that
    490     occurs before the given reference request.
    491 
    492     Args:
    493       ref_req: A reference request from which to start looking.
    494       method: The string method name of the request for which to search.
    495 
    496     Returns:
    497       The latest _DevToolsSocketRequest object with the specified method,
    498       if found, or None otherwise.
    499     """
    500     start_looking = False
    501     for request in self._requests[::-1]:
    502       if request.id == ref_req.id:
    503         start_looking = True
    504       elif start_looking:
    505         if request.method == method:
    506           return request
    507     return None
    508 
    509   def _FillInParams(self, request):
    510     """Fills in parameters for requests as necessary before the request is sent.
    511 
    512     Args:
    513       request: The _DevToolsSocketRequest object associated with a request
    514                message that is about to be sent.
    515     """
    516     if request.method == self._agent_name +'.takeHeapSnapshot':
    517       # We always want detailed v8 heap snapshot information.
    518       request.params = {'detailed': True}
    519     elif request.method == self._agent_name + '.getHeapSnapshot':
    520       # To actually request the snapshot data from a previously-taken snapshot,
    521       # we need to specify the unique uid of the snapshot we want.
    522       # The relevant uid should be contained in the last
    523       # 'Profiler.takeHeapSnapshot' request object.
    524       last_req = self._GetLatestRequestOfType(request,
    525           self._agent_name + '.takeHeapSnapshot')
    526       if last_req and 'uid' in last_req.results:
    527         request.params = {'uid': last_req.results['uid']}
    528     elif request.method == self._agent_name + '.getProfile':
    529       # TODO(eustas): Remove this case after M27 is released.
    530       last_req = self._GetLatestRequestOfType(request,
    531           self._agent_name + '.takeHeapSnapshot')
    532       if last_req and 'uid' in last_req.results:
    533         request.params = {'type': 'HEAP', 'uid': last_req.results['uid']}
    534 
    535   @staticmethod
    536   def _IdentifyDevToolsSocketConnectionInfo(url, tab_index, tab_filter):
    537     """Identifies DevToolsSocket connection info from a remote Chrome instance.
    538 
    539     Args:
    540       url: The base URL to connent to.
    541       tab_index: The integer index of the tab in the remote Chrome instance to
    542           which to connect.
    543       tab_filter: When specified, is run over tabs of the remote Chrome instance
    544           to choose which one to connect to.
    545 
    546     Returns:
    547       A dictionary containing the DevToolsSocket connection info:
    548       {
    549         'host': string,
    550         'port': integer,
    551         'path': string,
    552       }
    553 
    554     Raises:
    555       RuntimeError: When DevToolsSocket connection info cannot be identified.
    556     """
    557     try:
    558       f = urllib2.urlopen(url + '/json')
    559       result = f.read()
    560       logging.debug(result)
    561       result = simplejson.loads(result)
    562     except urllib2.URLError, e:
    563       raise RuntimeError(
    564           'Error accessing Chrome instance debugging port: ' + str(e))
    565 
    566     if tab_filter:
    567       connect_to = filter(tab_filter, result)[0]
    568     else:
    569       if tab_index >= len(result):
    570         raise RuntimeError(
    571             'Specified tab index %d doesn\'t exist (%d tabs found)' %
    572             (tab_index, len(result)))
    573       connect_to = result[tab_index]
    574 
    575     logging.debug(simplejson.dumps(connect_to))
    576 
    577     if 'webSocketDebuggerUrl' not in connect_to:
    578       raise RuntimeError('No socket URL exists for the specified tab.')
    579 
    580     socket_url = connect_to['webSocketDebuggerUrl']
    581     parsed = urlparse.urlparse(socket_url)
    582     # On ChromeOS, the "ws://" scheme may not be recognized, leading to an
    583     # incorrect netloc (and empty hostname and port attributes) in |parsed|.
    584     # Change the scheme to "http://" to fix this.
    585     if not parsed.hostname or not parsed.port:
    586       socket_url = 'http' + socket_url[socket_url.find(':'):]
    587       parsed = urlparse.urlparse(socket_url)
    588       # Warning: |parsed.scheme| is incorrect after this point.
    589     return ({'host': parsed.hostname,
    590              'port': parsed.port,
    591              'path': parsed.path})
    592 
    593 
    594 class _RemoteInspectorDriverThread(threading.Thread):
    595   """Drives the communication service with the remote inspector."""
    596 
    597   def __init__(self):
    598     """Initialize."""
    599     threading.Thread.__init__(self)
    600 
    601   def run(self):
    602     """Drives the communication service with the remote inspector."""
    603     try:
    604       while asyncore.socket_map:
    605         asyncore.loop(timeout=1, count=1, use_poll=True)
    606     except KeyboardInterrupt:
    607       pass
    608 
    609 
    610 class _V8HeapSnapshotParser(object):
    611   """Parses v8 heap snapshot data."""
    612   _CHILD_TYPES = ['context', 'element', 'property', 'internal', 'hidden',
    613                   'shortcut', 'weak']
    614   _NODE_TYPES = ['hidden', 'array', 'string', 'object', 'code', 'closure',
    615                  'regexp', 'number', 'native', 'synthetic']
    616 
    617   @staticmethod
    618   def ParseSnapshotData(raw_data):
    619     """Parses raw v8 heap snapshot data and returns the summarized results.
    620 
    621     The raw heap snapshot data is represented as a JSON object with the
    622     following keys: 'snapshot', 'nodes', and 'strings'.
    623 
    624     The 'snapshot' value provides the 'title' and 'uid' attributes for the
    625     snapshot.  For example:
    626     { u'title': u'org.webkit.profiles.user-initiated.1', u'uid': 1}
    627 
    628     The 'nodes' value is a list of node information from the v8 heap, with a
    629     special first element that describes the node serialization layout (see
    630     HeapSnapshotJSONSerializer::SerializeNodes).  All other list elements
    631     contain information about nodes in the v8 heap, according to the
    632     serialization layout.
    633 
    634     The 'strings' value is a list of strings, indexed by values in the 'nodes'
    635     list to associate nodes with strings.
    636 
    637     Args:
    638       raw_data: A string representing the raw v8 heap snapshot data.
    639 
    640     Returns:
    641       A dictionary containing the summarized v8 heap snapshot data:
    642       {
    643         'total_v8_node_count': integer,  # Total number of nodes in the v8 heap.
    644         'total_shallow_size': integer, # Total heap size, in bytes.
    645       }
    646     """
    647     total_node_count = 0
    648     total_shallow_size = 0
    649     constructors = {}
    650 
    651     # TODO(dennisjeffrey): The following line might be slow, especially on
    652     # ChromeOS.  Investigate faster alternatives.
    653     heap = simplejson.loads(raw_data)
    654 
    655     index = 1  # Bypass the special first node list item.
    656     node_list = heap['nodes']
    657     while index < len(node_list):
    658       node_type = node_list[index]
    659       node_name = node_list[index + 1]
    660       node_id = node_list[index + 2]
    661       node_self_size = node_list[index + 3]
    662       node_retained_size = node_list[index + 4]
    663       node_dominator = node_list[index + 5]
    664       node_children_count = node_list[index + 6]
    665       index += 7
    666 
    667       node_children = []
    668       for i in xrange(node_children_count):
    669         child_type = node_list[index]
    670         child_type_string = _V8HeapSnapshotParser._CHILD_TYPES[int(child_type)]
    671         child_name_index = node_list[index + 1]
    672         child_to_node = node_list[index + 2]
    673         index += 3
    674 
    675         child_info = {
    676           'type': child_type_string,
    677           'name_or_index': child_name_index,
    678           'to_node': child_to_node,
    679         }
    680         node_children.append(child_info)
    681 
    682       # Get the constructor string for this node so nodes can be grouped by
    683       # constructor.
    684       # See HeapSnapshot.js: WebInspector.HeapSnapshotNode.prototype.
    685       type_string = _V8HeapSnapshotParser._NODE_TYPES[int(node_type)]
    686       constructor_name = None
    687       if type_string == 'hidden':
    688         constructor_name = '(system)'
    689       elif type_string == 'object':
    690         constructor_name = heap['strings'][int(node_name)]
    691       elif type_string == 'native':
    692         pos = heap['strings'][int(node_name)].find('/')
    693         if pos >= 0:
    694           constructor_name = heap['strings'][int(node_name)][:pos].rstrip()
    695         else:
    696           constructor_name = heap['strings'][int(node_name)]
    697       elif type_string == 'code':
    698         constructor_name = '(compiled code)'
    699       else:
    700         constructor_name = '(' + type_string + ')'
    701 
    702       node_obj = {
    703         'type': type_string,
    704         'name': heap['strings'][int(node_name)],
    705         'id': node_id,
    706         'self_size': node_self_size,
    707         'retained_size': node_retained_size,
    708         'dominator': node_dominator,
    709         'children_count': node_children_count,
    710         'children': node_children,
    711       }
    712 
    713       if constructor_name not in constructors:
    714         constructors[constructor_name] = []
    715       constructors[constructor_name].append(node_obj)
    716 
    717       total_node_count += 1
    718       total_shallow_size += node_self_size
    719 
    720     # TODO(dennisjeffrey): Have this function also return more detailed v8
    721     # heap snapshot data when a need for it arises (e.g., using |constructors|).
    722     result = {}
    723     result['total_v8_node_count'] = total_node_count
    724     result['total_shallow_size'] = total_shallow_size
    725     return result
    726 
    727 
    728 # TODO(dennisjeffrey): The "verbose" option used in this file should re-use
    729 # pyauto's verbose flag.
    730 class RemoteInspectorClient(object):
    731   """Main class for interacting with Chrome's remote inspector.
    732 
    733   Upon initialization, a socket connection to Chrome's remote inspector will
    734   be established.  Users of this class should call Stop() to close the
    735   connection when it's no longer needed.
    736 
    737   Public Methods:
    738     Stop: Close the connection to the remote inspector.  Should be called when
    739         a user is done using this module.
    740     HeapSnapshot: Takes a v8 heap snapshot and returns the summarized data.
    741     GetMemoryObjectCounts: Retrieves memory object count information.
    742     CollectGarbage: Forces a garbage collection.
    743     StartTimelineEventMonitoring: Starts monitoring for timeline events.
    744     StopTimelineEventMonitoring: Stops monitoring for timeline events.
    745   """
    746 
    747   # TODO(dennisjeffrey): Allow a user to specify a window index too (not just a
    748   # tab index), when running through PyAuto.
    749   def __init__(self, tab_index=0, tab_filter=None,
    750                verbose=False, show_socket_messages=False,
    751                url='http://localhost:9222'):
    752     """Initialize.
    753 
    754     Args:
    755       tab_index: The integer index of the tab in the remote Chrome instance to
    756           which to connect.  Defaults to 0 (the first tab).
    757       tab_filter: When specified, is run over tabs of the remote Chrome
    758           instance to choose which one to connect to.
    759       verbose: A boolean indicating whether or not to use verbose logging.
    760       show_socket_messages: A boolean indicating whether or not to show the
    761           socket messages sent/received when communicating with the remote
    762           Chrome instance.
    763     """
    764     self._tab_index = tab_index
    765     self._tab_filter = tab_filter
    766     self._verbose = verbose
    767     self._show_socket_messages = show_socket_messages
    768 
    769     self._timeline_started = False
    770 
    771     logging.basicConfig()
    772     self._logger = logging.getLogger('RemoteInspectorClient')
    773     self._logger.setLevel([logging.WARNING, logging.DEBUG][verbose])
    774 
    775     # Creating _RemoteInspectorThread might raise an exception. This prevents an
    776     # AttributeError in the destructor.
    777     self._remote_inspector_thread = None
    778     self._remote_inspector_driver_thread = None
    779 
    780     self._version = self._GetVersion(url)
    781 
    782     # TODO(loislo): Remove this hack after M28 is released.
    783     self._agent_name = 'Profiler'
    784     if self._IsBrowserDayNumberGreaterThan(1470):
    785       self._agent_name = 'HeapProfiler'
    786 
    787     # Start up a thread for long-term communication with the remote inspector.
    788     self._remote_inspector_thread = _RemoteInspectorThread(
    789         url, tab_index, tab_filter, verbose, show_socket_messages,
    790         self._agent_name)
    791     self._remote_inspector_thread.start()
    792     # At this point, a connection has already been made to the remote inspector.
    793 
    794     # This thread calls asyncore.loop, which activates the channel service.
    795     self._remote_inspector_driver_thread = _RemoteInspectorDriverThread()
    796     self._remote_inspector_driver_thread.start()
    797 
    798   def __del__(self):
    799     """Called on destruction of this object."""
    800     self.Stop()
    801 
    802   def Stop(self):
    803     """Stop/close communication with the remote inspector."""
    804     if self._remote_inspector_thread:
    805       self._remote_inspector_thread.Kill()
    806       self._remote_inspector_thread.join()
    807       self._remote_inspector_thread = None
    808     if self._remote_inspector_driver_thread:
    809       self._remote_inspector_driver_thread.join()
    810       self._remote_inspector_driver_thread = None
    811 
    812   def HeapSnapshot(self, include_summary=False):
    813     """Takes a v8 heap snapshot.
    814 
    815     Returns:
    816       A dictionary containing information for a single v8 heap
    817       snapshot that was taken.
    818       {
    819         'url': string,  # URL of the webpage that was snapshotted.
    820         'raw_data': string, # The raw data as JSON string.
    821         'total_v8_node_count': integer,  # Total number of nodes in the v8 heap.
    822                                          # Only if |include_summary| is True.
    823         'total_heap_size': integer,  # Total v8 heap size (number of bytes).
    824                                      # Only if |include_summary| is True.
    825       }
    826     """
    827     HEAP_SNAPSHOT_MESSAGES = [
    828       ('Page.getResourceTree', {}),
    829       ('Debugger.enable', {}),
    830       (self._agent_name + '.clearProfiles', {}),
    831       (self._agent_name + '.takeHeapSnapshot', {}),
    832       (self._agent_name + '.getHeapSnapshot', {}),
    833     ]
    834 
    835     self._current_heap_snapshot = []
    836     self._url = ''
    837     self._collected_heap_snapshot_data = {}
    838 
    839     done_condition = threading.Condition()
    840 
    841     def HandleReply(reply_dict):
    842       """Processes a reply message received from the remote Chrome instance.
    843 
    844       Args:
    845         reply_dict: A dictionary object representing the reply message received
    846                      from the remote inspector.
    847       """
    848       if 'result' in reply_dict:
    849         # This is the result message associated with a previously-sent request.
    850         request = self._remote_inspector_thread.GetRequestWithId(
    851             reply_dict['id'])
    852         if 'frameTree' in reply_dict['result']:
    853           self._url = reply_dict['result']['frameTree']['frame']['url']
    854         elif request.method == self._agent_name + '.getHeapSnapshot':
    855           # A heap snapshot has been completed.  Analyze and output the data.
    856           self._logger.debug('Heap snapshot taken: %s', self._url)
    857           # TODO(dennisjeffrey): Parse the heap snapshot on-the-fly as the data
    858           # is coming in over the wire, so we can avoid storing the entire
    859           # snapshot string in memory.
    860           raw_snapshot_data = ''.join(self._current_heap_snapshot)
    861           self._collected_heap_snapshot_data = {
    862               'url': self._url,
    863               'raw_data': raw_snapshot_data}
    864           if include_summary:
    865             self._logger.debug('Now analyzing heap snapshot...')
    866             parser = _V8HeapSnapshotParser()
    867             time_start = time.time()
    868             self._logger.debug('Raw snapshot data size: %.2f MB',
    869                                len(raw_snapshot_data) / (1024.0 * 1024.0))
    870             result = parser.ParseSnapshotData(raw_snapshot_data)
    871             self._logger.debug('Time to parse data: %.2f sec',
    872                                time.time() - time_start)
    873             count = result['total_v8_node_count']
    874             self._collected_heap_snapshot_data['total_v8_node_count'] = count
    875             total_size = result['total_shallow_size']
    876             self._collected_heap_snapshot_data['total_heap_size'] = total_size
    877 
    878           done_condition.acquire()
    879           done_condition.notify()
    880           done_condition.release()
    881       elif 'method' in reply_dict:
    882         # This is an auxiliary message sent from the remote Chrome instance.
    883         if reply_dict['method'] == self._agent_name + '.addProfileHeader':
    884           snapshot_req = (
    885               self._remote_inspector_thread.GetFirstUnfulfilledRequest(
    886                   self._agent_name + '.takeHeapSnapshot'))
    887           if snapshot_req:
    888             snapshot_req.results['uid'] = reply_dict['params']['header']['uid']
    889         elif reply_dict['method'] == self._agent_name + '.addHeapSnapshotChunk':
    890           self._current_heap_snapshot.append(reply_dict['params']['chunk'])
    891 
    892     # Tell the remote inspector to take a v8 heap snapshot, then wait until
    893     # the snapshot information is available to return.
    894     self._remote_inspector_thread.PerformAction(HEAP_SNAPSHOT_MESSAGES,
    895                                                 HandleReply)
    896 
    897     done_condition.acquire()
    898     done_condition.wait()
    899     done_condition.release()
    900 
    901     return self._collected_heap_snapshot_data
    902 
    903   def EvaluateJavaScript(self, expression):
    904     """Evaluates a JavaScript expression and returns the result.
    905 
    906     Sends a message containing the expression to the remote Chrome instance we
    907     are connected to, and evaluates it in the context of the tab we are
    908     connected to. Blocks until the result is available and returns it.
    909 
    910     Returns:
    911       A dictionary representing the result.
    912     """
    913     EVALUATE_MESSAGES = [
    914       ('Runtime.evaluate', { 'expression': expression,
    915                              'objectGroup': 'group',
    916                              'returnByValue': True }),
    917       ('Runtime.releaseObjectGroup', { 'objectGroup': 'group' })
    918     ]
    919 
    920     self._result = None
    921     done_condition = threading.Condition()
    922 
    923     def HandleReply(reply_dict):
    924       """Processes a reply message received from the remote Chrome instance.
    925 
    926       Args:
    927         reply_dict: A dictionary object representing the reply message received
    928                     from the remote Chrome instance.
    929       """
    930       if 'result' in reply_dict and 'result' in reply_dict['result']:
    931         self._result = reply_dict['result']['result']['value']
    932 
    933         done_condition.acquire()
    934         done_condition.notify()
    935         done_condition.release()
    936 
    937     # Tell the remote inspector to evaluate the given expression, then wait
    938     # until that information is available to return.
    939     self._remote_inspector_thread.PerformAction(EVALUATE_MESSAGES,
    940                                                 HandleReply)
    941 
    942     done_condition.acquire()
    943     done_condition.wait()
    944     done_condition.release()
    945 
    946     return self._result
    947 
    948   def GetMemoryObjectCounts(self):
    949     """Retrieves memory object count information.
    950 
    951     Returns:
    952       A dictionary containing the memory object count information:
    953       {
    954         'DOMNodeCount': integer,  # Total number of DOM nodes.
    955         'EventListenerCount': integer,  # Total number of event listeners.
    956       }
    957     """
    958     MEMORY_COUNT_MESSAGES = [
    959       ('Memory.getDOMCounters', {})
    960     ]
    961 
    962     self._event_listener_count = None
    963     self._dom_node_count = None
    964 
    965     done_condition = threading.Condition()
    966     def HandleReply(reply_dict):
    967       """Processes a reply message received from the remote Chrome instance.
    968 
    969       Args:
    970         reply_dict: A dictionary object representing the reply message received
    971                     from the remote Chrome instance.
    972       """
    973       if 'result' in reply_dict:
    974         self._event_listener_count = reply_dict['result']['jsEventListeners']
    975         self._dom_node_count = reply_dict['result']['nodes']
    976 
    977         done_condition.acquire()
    978         done_condition.notify()
    979         done_condition.release()
    980 
    981     # Tell the remote inspector to collect memory count info, then wait until
    982     # that information is available to return.
    983     self._remote_inspector_thread.PerformAction(MEMORY_COUNT_MESSAGES,
    984                                                 HandleReply)
    985 
    986     done_condition.acquire()
    987     done_condition.wait()
    988     done_condition.release()
    989 
    990     return {
    991       'DOMNodeCount': self._dom_node_count,
    992       'EventListenerCount': self._event_listener_count,
    993     }
    994 
    995   def CollectGarbage(self):
    996     """Forces a garbage collection."""
    997     COLLECT_GARBAGE_MESSAGES = [
    998       ('Profiler.collectGarbage', {})
    999     ]
   1000 
   1001     # Tell the remote inspector to do a garbage collect.  We can return
   1002     # immediately, since there is no result for which to wait.
   1003     self._remote_inspector_thread.PerformAction(COLLECT_GARBAGE_MESSAGES, None)
   1004 
   1005   def StartTimelineEventMonitoring(self, event_callback):
   1006     """Starts timeline event monitoring.
   1007 
   1008     Args:
   1009       event_callback: A callable to invoke whenever a timeline event is observed
   1010           from the remote inspector.  The callable should take a single input,
   1011           which is a dictionary containing the detailed information of a
   1012           timeline event.
   1013     """
   1014     if self._timeline_started:
   1015       self._logger.warning('Timeline monitoring already started.')
   1016       return
   1017     TIMELINE_MESSAGES = [
   1018       ('Timeline.start', {})
   1019     ]
   1020 
   1021     self._event_callback = event_callback
   1022 
   1023     done_condition = threading.Condition()
   1024     def HandleReply(reply_dict):
   1025       """Processes a reply message received from the remote Chrome instance.
   1026 
   1027       Args:
   1028         reply_dict: A dictionary object representing the reply message received
   1029                     from the remote Chrome instance.
   1030       """
   1031       if 'result' in reply_dict:
   1032         done_condition.acquire()
   1033         done_condition.notify()
   1034         done_condition.release()
   1035       if reply_dict.get('method') == 'Timeline.eventRecorded':
   1036         self._event_callback(reply_dict['params']['record'])
   1037 
   1038     # Tell the remote inspector to start the timeline.
   1039     self._timeline_callback = HandleReply
   1040     self._remote_inspector_thread.AddMessageCallback(self._timeline_callback)
   1041     self._remote_inspector_thread.PerformAction(TIMELINE_MESSAGES, None)
   1042 
   1043     done_condition.acquire()
   1044     done_condition.wait()
   1045     done_condition.release()
   1046 
   1047     self._timeline_started = True
   1048 
   1049   def StopTimelineEventMonitoring(self):
   1050     """Stops timeline event monitoring."""
   1051     if not self._timeline_started:
   1052       self._logger.warning('Timeline monitoring already stopped.')
   1053       return
   1054     TIMELINE_MESSAGES = [
   1055       ('Timeline.stop', {})
   1056     ]
   1057 
   1058     done_condition = threading.Condition()
   1059     def HandleReply(reply_dict):
   1060       """Processes a reply message received from the remote Chrome instance.
   1061 
   1062       Args:
   1063         reply_dict: A dictionary object representing the reply message received
   1064                     from the remote Chrome instance.
   1065       """
   1066       if 'result' in reply_dict:
   1067         done_condition.acquire()
   1068         done_condition.notify()
   1069         done_condition.release()
   1070 
   1071     # Tell the remote inspector to stop the timeline.
   1072     self._remote_inspector_thread.RemoveMessageCallback(self._timeline_callback)
   1073     self._remote_inspector_thread.PerformAction(TIMELINE_MESSAGES, HandleReply)
   1074 
   1075     done_condition.acquire()
   1076     done_condition.wait()
   1077     done_condition.release()
   1078 
   1079     self._timeline_started = False
   1080 
   1081   def _ConvertByteCountToHumanReadableString(self, num_bytes):
   1082     """Converts an integer number of bytes into a human-readable string.
   1083 
   1084     Args:
   1085       num_bytes: An integer number of bytes.
   1086 
   1087     Returns:
   1088       A human-readable string representation of the given number of bytes.
   1089     """
   1090     if num_bytes < 1024:
   1091       return '%d B' % num_bytes
   1092     elif num_bytes < 1048576:
   1093       return '%.2f KB' % (num_bytes / 1024.0)
   1094     else:
   1095       return '%.2f MB' % (num_bytes / 1048576.0)
   1096 
   1097   @staticmethod
   1098   def _GetVersion(endpoint):
   1099     """Fetches version information from a remote Chrome instance.
   1100 
   1101     Args:
   1102       endpoint: The base URL to connent to.
   1103 
   1104     Returns:
   1105       A dictionary containing Browser and Content version information:
   1106       {
   1107         'Browser': {
   1108           'major': integer,
   1109           'minor': integer,
   1110           'fix': integer,
   1111           'day': integer
   1112         },
   1113         'Content': {
   1114           'name': string,
   1115           'major': integer,
   1116           'minor': integer
   1117         }
   1118       }
   1119 
   1120     Raises:
   1121       RuntimeError: When Browser version info can't be fetched or parsed.
   1122     """
   1123     try:
   1124       f = urllib2.urlopen(endpoint + '/json/version')
   1125       result = f.read();
   1126       result = simplejson.loads(result)
   1127     except urllib2.URLError, e:
   1128       raise RuntimeError(
   1129           'Error accessing Chrome instance debugging port: ' + str(e))
   1130 
   1131     if 'Browser' not in result:
   1132       raise RuntimeError('Browser version is not specified.')
   1133 
   1134     parsed = re.search('^Chrome\/(\d+).(\d+).(\d+).(\d+)', result['Browser'])
   1135     if parsed is None:
   1136       raise RuntimeError('Browser-Version cannot be parsed.')
   1137     try:
   1138       day = int(parsed.group(3))
   1139       browser_info = {
   1140         'major': int(parsed.group(1)),
   1141         'minor': int(parsed.group(2)),
   1142         'day': day,
   1143         'fix': int(parsed.group(4)),
   1144       }
   1145     except ValueError:
   1146       raise RuntimeError('Browser-Version cannot be parsed.')
   1147 
   1148     if 'WebKit-Version' not in result:
   1149       raise RuntimeError('Content-Version is not specified.')
   1150 
   1151     parsed = re.search('^(\d+)\.(\d+)', result['WebKit-Version'])
   1152     if parsed is None:
   1153       raise RuntimeError('Content-Version cannot be parsed.')
   1154 
   1155     try:
   1156       platform_info = {
   1157         'name': 'Blink' if day > 1464 else 'WebKit',
   1158         'major': int(parsed.group(1)),
   1159         'minor': int(parsed.group(2)),
   1160       }
   1161     except ValueError:
   1162       raise RuntimeError('WebKit-Version cannot be parsed.')
   1163 
   1164     return {
   1165       'browser': browser_info,
   1166       'platform': platform_info
   1167     }
   1168 
   1169   def _IsContentVersionNotOlderThan(self, major, minor):
   1170     """Compares remote Browser Content version with specified one.
   1171 
   1172     Args:
   1173       major: Major Webkit version.
   1174       minor: Minor Webkit version.
   1175 
   1176     Returns:
   1177       True if remote Content version is same or newer than specified,
   1178       False otherwise.
   1179 
   1180     Raises:
   1181       RuntimeError: If remote Content version hasn't been fetched yet.
   1182     """
   1183     if not hasattr(self, '_version'):
   1184       raise RuntimeError('Browser version has not been fetched yet.')
   1185     version = self._version['platform']
   1186 
   1187     if version['major'] < major:
   1188       return False
   1189     elif version['major'] == major and version['minor'] < minor:
   1190       return False
   1191     else:
   1192       return True
   1193 
   1194   def _IsBrowserDayNumberGreaterThan(self, day_number):
   1195     """Compares remote Chromium day number with specified one.
   1196 
   1197     Args:
   1198       day_number: Forth part of the chromium version.
   1199 
   1200     Returns:
   1201       True if remote Chromium day number is same or newer than specified,
   1202       False otherwise.
   1203 
   1204     Raises:
   1205       RuntimeError: If remote Chromium version hasn't been fetched yet.
   1206     """
   1207     if not hasattr(self, '_version'):
   1208       raise RuntimeError('Browser revision has not been fetched yet.')
   1209     version = self._version['browser']
   1210 
   1211     return version['day'] > day_number
   1212