Home | History | Annotate | Download | only in blink_gc_plugin
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 import argparse, os, sys, json, subprocess, pickle, StringIO
      7 
      8 parser = argparse.ArgumentParser(
      9   description =
     10     "Process the Blink points-to graph generated by the Blink GC plugin.")
     11 
     12 parser.add_argument(
     13   '-', dest='use_stdin', action='store_true',
     14   help='Read JSON graph files from stdin')
     15 
     16 parser.add_argument(
     17   '-c', '--detect-cycles', action='store_true',
     18   help='Detect cycles containing GC roots')
     19 
     20 parser.add_argument(
     21   '-s', '--print-stats', action='store_true',
     22   help='Statistics about ref-counted and traced objects')
     23 
     24 parser.add_argument(
     25   '-v', '--verbose', action='store_true',
     26   help='Verbose output')
     27 
     28 parser.add_argument(
     29   '--ignore-cycles', default=None, metavar='FILE',
     30   help='File with cycles to ignore')
     31 
     32 parser.add_argument(
     33   '--ignore-classes', nargs='*', default=[], metavar='CLASS',
     34   help='Classes to ignore when detecting cycles')
     35 
     36 parser.add_argument(
     37   '--pickle-graph', default=None, metavar='FILE',
     38   help='File to read/save the graph from/to')
     39 
     40 parser.add_argument(
     41   'files', metavar='FILE_OR_DIR', nargs='*', default=[],
     42   help='JSON graph files or directories containing them')
     43 
     44 # Command line args after parsing.
     45 args = None
     46 
     47 # Map from node labels to nodes.
     48 graph = {}
     49 
     50 # Set of root nodes.
     51 roots = []
     52 
     53 # List of cycles to ignore.
     54 ignored_cycles = []
     55 
     56 # Global flag to determine exit code.
     57 global_reported_error = False
     58 
     59 def set_reported_error(value):
     60   global global_reported_error
     61   global_reported_error = value
     62 
     63 def reported_error():
     64   return global_reported_error
     65 
     66 def log(msg):
     67   if args.verbose:
     68     print msg
     69 
     70 global_inc_copy = 0
     71 def inc_copy():
     72   global global_inc_copy
     73   global_inc_copy += 1
     74 
     75 def get_node(name):
     76   return graph.setdefault(name, Node(name))
     77 
     78 ptr_types = ('raw', 'ref', 'mem')
     79 
     80 def inc_ptr(dst, ptr):
     81   if ptr in ptr_types:
     82     node = graph.get(dst)
     83     if not node: return
     84     node.counts[ptr] += 1
     85 
     86 def add_counts(s1, s2):
     87   for (k, v) in s2.iteritems():
     88     s1[k] += s2[k]
     89 
     90 # Representation of graph nodes. Basically a map of directed edges.
     91 class Node:
     92   def __init__(self, name):
     93     self.name = name
     94     self.edges = {}
     95     self.reset()
     96   def __repr__(self):
     97     return "%s(%s) %s" % (self.name, self.visited, self.edges)
     98   def update_node(self, decl):
     99     # Currently we don't track any node info besides its edges.
    100     pass
    101   def update_edge(self, e):
    102     new_edge = Edge(**e)
    103     edge = self.edges.get(new_edge.key)
    104     if edge:
    105       # If an edge exist, its kind is the strongest of the two.
    106       edge.kind = max(edge.kind, new_edge.kind)
    107     else:
    108       self.edges[new_edge.key] = new_edge
    109   def super_edges(self):
    110     return [ e for e in self.edges.itervalues() if e.is_super() ]
    111   def subclass_edges(self):
    112     return [ e for e in self.edges.itervalues() if e.is_subclass() ]
    113   def reset(self):
    114     self.cost = sys.maxint
    115     self.visited = False
    116     self.path = None
    117     self.counts = {}
    118     for ptr in ptr_types:
    119       self.counts[ptr] = 0
    120   def update_counts(self):
    121     for e in self.edges.itervalues():
    122       inc_ptr(e.dst, e.ptr)
    123 
    124 # Representation of directed graph edges.
    125 class Edge:
    126   def __init__(self, **decl):
    127     self.src = decl['src']
    128     self.dst = decl['dst']
    129     self.lbl = decl['lbl']
    130     self.ptr = decl['ptr']
    131     self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root
    132     self.loc = decl['loc']
    133     # The label does not uniquely determine an edge from a node. We
    134     # define the semi-unique key to be the concatenation of the
    135     # label and dst name. This is sufficient to track the strongest
    136     # edge to a particular type. For example, if the field A::m_f
    137     # has type HashMap<WeakMember<B>, Member<B>> we will have a
    138     # strong edge with key m_f#B from A to B.
    139     self.key = '%s#%s' % (self.lbl, self.dst)
    140   def __repr__(self):
    141     return '%s (%s) => %s' % (self.src, self.lbl, self.dst)
    142   def is_root(self):
    143     return self.kind == 2
    144   def is_weak(self):
    145     return self.kind == 0
    146   def keeps_alive(self):
    147     return self.kind > 0
    148   def is_subclass(self):
    149     return self.lbl.startswith('<subclass>')
    150   def is_super(self):
    151     return self.lbl.startswith('<super>')
    152 
    153 def parse_file(filename):
    154   obj = json.load(open(filename))
    155   return obj
    156 
    157 def build_graphs_in_dir(dirname):
    158   # TODO: Use plateform independent code, eg, os.walk
    159   files = subprocess.check_output(
    160     ['find', dirname, '-name', '*.graph.json']).split('\n')
    161   log("Found %d files" % len(files))
    162   for f in files:
    163     f.strip()
    164     if len(f) < 1:
    165       continue
    166     build_graph(f)
    167 
    168 def build_graph(filename):
    169   for decl in parse_file(filename):
    170     if decl.has_key('name'):
    171       # Add/update a node entry
    172       name = decl['name']
    173       node = get_node(name)
    174       node.update_node(decl)
    175     else:
    176       # Add/update an edge entry
    177       name = decl['src']
    178       node = get_node(name)
    179       node.update_edge(decl)
    180 
    181 # Copy all non-weak edges from super classes to their subclasses.
    182 # This causes all fields of a super to be considered fields of a
    183 # derived class without tranitively relating derived classes with
    184 # each other. For example, if B <: A, C <: A, and for some D, D => B,
    185 # we don't want that to entail that D => C.
    186 def copy_super_edges(edge):
    187   if edge.is_weak() or not edge.is_super():
    188     return
    189   inc_copy()
    190   # Make the super-class edge weak (prohibits processing twice).
    191   edge.kind = 0
    192   # If the super class is not in our graph exit early.
    193   super_node = graph.get(edge.dst)
    194   if super_node is None: return
    195   # Recursively copy all super-class edges.
    196   for e in super_node.super_edges():
    197     copy_super_edges(e)
    198   # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
    199   sub_node = graph[edge.src]
    200   for e in super_node.edges.itervalues():
    201     if e.keeps_alive() and not e.is_subclass():
    202       new_edge = Edge(
    203         src = sub_node.name,
    204         dst = e.dst,
    205         lbl = '%s <: %s' % (super_node.name, e.lbl),
    206         ptr = e.ptr,
    207         kind = e.kind,
    208         loc = e.loc,
    209       )
    210       sub_node.edges[new_edge.key] = new_edge
    211   # Add a strong sub-class edge.
    212   sub_edge = Edge(
    213     src = super_node.name,
    214     dst = sub_node.name,
    215     lbl = '<subclass>',
    216     ptr = edge.ptr,
    217     kind = 1,
    218     loc = edge.loc,
    219   )
    220   super_node.edges[sub_edge.key] = sub_edge
    221 
    222 def complete_graph():
    223   for node in graph.itervalues():
    224     for edge in node.super_edges():
    225       copy_super_edges(edge)
    226     for edge in node.edges.itervalues():
    227       if edge.is_root():
    228         roots.append(edge)
    229   log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)
    230 
    231 def reset_graph():
    232   for n in graph.itervalues():
    233     n.reset()
    234 
    235 def shortest_path(start, end):
    236   start.cost = 0
    237   minlist = [start]
    238   while len(minlist) > 0:
    239     minlist.sort(key=lambda n: -n.cost)
    240     current = minlist.pop()
    241     current.visited = True
    242     if current == end or current.cost >= end.cost + 1:
    243       return
    244     for e in current.edges.itervalues():
    245       if not e.keeps_alive():
    246         continue
    247       dst = graph.get(e.dst)
    248       if dst is None or dst.visited:
    249         continue
    250       if current.cost < dst.cost:
    251         dst.cost = current.cost + 1
    252         dst.path = e
    253       minlist.append(dst)
    254 
    255 def detect_cycles():
    256   for root_edge in roots:
    257     reset_graph()
    258     # Mark ignored classes as already visited
    259     for ignore in args.ignore_classes:
    260       name = ignore.find("::") > 0 and ignore or ("blink::" + ignore)
    261       node = graph.get(name)
    262       if node:
    263         node.visited = True
    264     src = graph[root_edge.src]
    265     dst = graph.get(root_edge.dst)
    266     if src.visited:
    267       continue
    268     if root_edge.dst == "WTF::String":
    269       continue
    270     if dst is None:
    271       print "\nPersistent root to incomplete destination object:"
    272       print root_edge
    273       set_reported_error(True)
    274       continue
    275     # Find the shortest path from the root target (dst) to its host (src)
    276     shortest_path(dst, src)
    277     if src.cost < sys.maxint:
    278       report_cycle(root_edge)
    279 
    280 def is_ignored_cycle(cycle):
    281   for block in ignored_cycles:
    282     if block_match(cycle, block):
    283       return True
    284 
    285 def block_match(b1, b2):
    286   if len(b1) != len(b2):
    287     return False
    288   for (l1, l2) in zip(b1, b2):
    289     if l1 != l2:
    290       return False
    291   return True
    292 
    293 def report_cycle(root_edge):
    294   dst = graph[root_edge.dst]
    295   path = []
    296   edge = root_edge
    297   dst.path = None
    298   while edge:
    299     path.append(edge)
    300     edge = graph[edge.src].path
    301   path.append(root_edge)
    302   path.reverse()
    303   # Find the max loc length for pretty printing.
    304   max_loc = 0
    305   for p in path:
    306     if len(p.loc) > max_loc:
    307       max_loc = len(p.loc)
    308   out = StringIO.StringIO()
    309   for p in path[:-1]:
    310     print >>out, (p.loc + ':').ljust(max_loc + 1), p
    311   sout = out.getvalue()
    312   if not is_ignored_cycle(sout):
    313     print "\nFound a potentially leaking cycle starting from a GC root:\n", sout
    314     set_reported_error(True)
    315 
    316 def load_graph():
    317   global graph
    318   global roots
    319   log("Reading graph from pickled file: " + args.pickle_graph)
    320   dump = pickle.load(open(args.pickle_graph, 'rb'))
    321   graph = dump[0]
    322   roots = dump[1]
    323 
    324 def save_graph():
    325   log("Saving graph to pickle file: " + args.pickle_graph)
    326   dump = (graph, roots)
    327   pickle.dump(dump, open(args.pickle_graph, 'wb'))
    328 
    329 def read_ignored_cycles():
    330   global ignored_cycles
    331   if not args.ignore_cycles:
    332     return
    333   log("Reading ignored cycles from file: " + args.ignore_cycles)
    334   block = []
    335   for l in open(args.ignore_cycles):
    336     line = l.strip()
    337     if not line or line.startswith('Found'):
    338       if len(block) > 0:
    339         ignored_cycles.append(block)
    340       block = []
    341     else:
    342       block += l
    343   if len(block) > 0:
    344     ignored_cycles.append(block)
    345 
    346 gc_bases = (
    347   'blink::GarbageCollected',
    348   'blink::GarbageCollectedFinalized',
    349   'blink::GarbageCollectedMixin',
    350 )
    351 ref_bases = (
    352   'WTF::RefCounted',
    353   'WTF::ThreadSafeRefCounted',
    354 )
    355 gcref_bases = (
    356   'blink::RefCountedGarbageCollected',
    357   'blink::ThreadSafeRefCountedGarbageCollected',
    358 )
    359 ref_mixins = (
    360   'blink::EventTarget',
    361   'blink::EventTargetWithInlineData',
    362   'blink::ActiveDOMObject',
    363 )
    364 
    365 def print_stats():
    366   gcref_managed = []
    367   ref_managed = []
    368   gc_managed = []
    369   hierarchies = []
    370 
    371   for node in graph.itervalues():
    372     node.update_counts()
    373     for sup in node.super_edges():
    374       if sup.dst in gcref_bases:
    375         gcref_managed.append(node)
    376       elif sup.dst in ref_bases:
    377         ref_managed.append(node)
    378       elif sup.dst in gc_bases:
    379         gc_managed.append(node)
    380 
    381   groups = [("GC manged   ", gc_managed),
    382             ("ref counted ", ref_managed),
    383             ("in transition", gcref_managed)]
    384   total = sum([len(g) for (s,g) in groups])
    385   for (s, g) in groups:
    386     percent = len(g) * 100 / total
    387     print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g))
    388 
    389   for base in gcref_managed:
    390     stats = dict({ 'classes': 0, 'ref-mixins': 0 })
    391     for ptr in ptr_types: stats[ptr] = 0
    392     hierarchy_stats(base, stats)
    393     hierarchies.append((base, stats))
    394 
    395   print "\nHierarchies in transition (RefCountedGarbageCollected):"
    396   hierarchies.sort(key=lambda (n,s): -s['classes'])
    397   for (node, stats) in hierarchies:
    398     total = stats['mem'] + stats['ref'] + stats['raw']
    399     print (
    400       "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" %
    401       (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ",
    402        total == 0 and 100 or stats['mem'] * 100 / total,
    403        node.name.replace('blink::', ''),
    404        stats['classes'],
    405        stats['mem'],
    406        stats['ref'],
    407        stats['raw'],
    408        stats['ref-mixins'],
    409      ))
    410 
    411 def hierarchy_stats(node, stats):
    412   if not node: return
    413   stats['classes'] += 1
    414   add_counts(stats, node.counts)
    415   for edge in node.super_edges():
    416     if edge.dst in ref_mixins:
    417       stats['ref-mixins'] += 1
    418   for edge in node.subclass_edges():
    419     hierarchy_stats(graph.get(edge.dst), stats)
    420 
    421 def main():
    422   global args
    423   args = parser.parse_args()
    424   if not (args.detect_cycles or args.print_stats):
    425     print "Please select an operation to perform (eg, -c to detect cycles)"
    426     parser.print_help()
    427     return 1
    428   if args.pickle_graph and os.path.isfile(args.pickle_graph):
    429     load_graph()
    430   else:
    431     if args.use_stdin:
    432       log("Reading files from stdin")
    433       for f in sys.stdin:
    434         build_graph(f.strip())
    435     else:
    436       log("Reading files and directories from command line")
    437       if len(args.files) == 0:
    438         print "Please provide files or directores for building the graph"
    439         parser.print_help()
    440         return 1
    441       for f in args.files:
    442         if os.path.isdir(f):
    443           log("Building graph from files in directory: " + f)
    444           build_graphs_in_dir(f)
    445         else:
    446           log("Building graph from file: " + f)
    447           build_graph(f)
    448     log("Completing graph construction (%d graph nodes)" % len(graph))
    449     complete_graph()
    450     if args.pickle_graph:
    451       save_graph()
    452   if args.detect_cycles:
    453     read_ignored_cycles()
    454     log("Detecting cycles containg GC roots")
    455     detect_cycles()
    456   if args.print_stats:
    457     log("Printing statistics")
    458     print_stats()
    459   if reported_error():
    460     return 1
    461   return 0
    462 
    463 if __name__ == '__main__':
    464   sys.exit(main())
    465