Home | History | Annotate | Download | only in update_payload
      1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Histogram generation tools."""
      6 
      7 from collections import defaultdict
      8 
      9 from update_payload import format_utils
     10 
     11 
     12 class Histogram(object):
     13   """A histogram generating object.
     14 
     15   This object serves the sole purpose of formatting (key, val) pairs as an
     16   ASCII histogram, including bars and percentage markers, and taking care of
     17   label alignment, scaling, etc. In addition to the standard __init__
     18   interface, two static methods are provided for conveniently converting data
     19   in different formats into a histogram. Histogram generation is exported via
     20   its __str__ method, and looks as follows:
     21 
     22     Yes |################    | 5 (83.3%)
     23     No  |###                 | 1 (16.6%)
     24 
     25   TODO(garnold) we may want to add actual methods for adding data or tweaking
     26   the output layout and formatting. For now, though, this is fine.
     27 
     28   """
     29 
     30   def __init__(self, data, scale=20, formatter=None):
     31     """Initialize a histogram object.
     32 
     33     Args:
     34       data: list of (key, count) pairs constituting the histogram
     35       scale: number of characters used to indicate 100%
     36       formatter: function used for formatting raw histogram values
     37 
     38     """
     39     self.data = data
     40     self.scale = scale
     41     self.formatter = formatter or str
     42     self.max_key_len = max([len(str(key)) for key, count in self.data])
     43     self.total = sum([count for key, count in self.data])
     44 
     45   @staticmethod
     46   def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
     47     """Takes a dictionary of counts and returns a histogram object.
     48 
     49     This simply converts a mapping from names to counts into a list of (key,
     50     count) pairs, optionally translating keys into name strings, then
     51     generating and returning a histogram for them. This is a useful convenience
     52     call for clients that update a dictionary of counters as they (say) scan a
     53     data stream.
     54 
     55     Args:
     56       count_dict: dictionary mapping keys to occurrence counts
     57       scale: number of characters used to indicate 100%
     58       formatter: function used for formatting raw histogram values
     59       key_names: dictionary mapping keys to name strings
     60     Returns:
     61       A histogram object based on the given data.
     62 
     63     """
     64     namer = None
     65     if key_names:
     66       namer = lambda key: key_names[key]
     67     else:
     68       namer = lambda key: key
     69 
     70     hist = [(namer(key), count) for key, count in count_dict.items()]
     71     return Histogram(hist, scale, formatter)
     72 
     73   @staticmethod
     74   def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
     75     """Takes a list of (possibly recurring) keys and returns a histogram object.
     76 
     77     This converts the list into a dictionary of counters, then uses
     78     FromCountDict() to generate the actual histogram. For example:
     79 
     80       ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
     81 
     82     Args:
     83       key_list: list of (possibly recurring) keys
     84       scale: number of characters used to indicate 100%
     85       formatter: function used for formatting raw histogram values
     86       key_names: dictionary mapping keys to name strings
     87     Returns:
     88       A histogram object based on the given data.
     89 
     90     """
     91     count_dict = defaultdict(int)  # Unset items default to zero
     92     for key in key_list:
     93       count_dict[key] += 1
     94     return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
     95 
     96   def __str__(self):
     97     hist_lines = []
     98     hist_bar = '|'
     99     for key, count in self.data:
    100       if self.total:
    101         bar_len = count * self.scale / self.total
    102         hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
    103 
    104       line = '%s %s %s' % (
    105           str(key).ljust(self.max_key_len),
    106           hist_bar,
    107           self.formatter(count))
    108       percent_str = format_utils.NumToPercent(count, self.total)
    109       if percent_str:
    110         line += ' (%s)' % percent_str
    111       hist_lines.append(line)
    112 
    113     return '\n'.join(hist_lines)
    114 
    115   def GetKeys(self):
    116     """Returns the keys of the histogram."""
    117     return [key for key, _ in self.data]
    118