1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Histogram generation tools.""" 6 7 from collections import defaultdict 8 9 import format_utils 10 11 12 class Histogram(object): 13 """A histogram generating object. 14 15 This object serves the sole purpose of formatting (key, val) pairs as an 16 ASCII histogram, including bars and percentage markers, and taking care of 17 label alignment, scaling, etc. In addition to the standard __init__ 18 interface, two static methods are provided for conveniently converting data 19 in different formats into a histogram. Histogram generation is exported via 20 its __str__ method, and looks as follows: 21 22 Yes |################ | 5 (83.3%) 23 No |### | 1 (16.6%) 24 25 TODO(garnold) we may want to add actual methods for adding data or tweaking 26 the output layout and formatting. For now, though, this is fine. 27 28 """ 29 30 def __init__(self, data, scale=20, formatter=None): 31 """Initialize a histogram object. 32 33 Args: 34 data: list of (key, count) pairs constituting the histogram 35 scale: number of characters used to indicate 100% 36 formatter: function used for formatting raw histogram values 37 38 """ 39 self.data = data 40 self.scale = scale 41 self.formatter = formatter or str 42 self.max_key_len = max([len(str(key)) for key, count in self.data]) 43 self.total = sum([count for key, count in self.data]) 44 45 @staticmethod 46 def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): 47 """Takes a dictionary of counts and returns a histogram object. 48 49 This simply converts a mapping from names to counts into a list of (key, 50 count) pairs, optionally translating keys into name strings, then 51 generating and returning a histogram for them. This is a useful convenience 52 call for clients that update a dictionary of counters as they (say) scan a 53 data stream. 54 55 Args: 56 count_dict: dictionary mapping keys to occurrence counts 57 scale: number of characters used to indicate 100% 58 formatter: function used for formatting raw histogram values 59 key_names: dictionary mapping keys to name strings 60 Returns: 61 A histogram object based on the given data. 62 63 """ 64 namer = None 65 if key_names: 66 namer = lambda key: key_names[key] 67 else: 68 namer = lambda key: key 69 70 hist = [(namer(key), count) for key, count in count_dict.items()] 71 return Histogram(hist, scale, formatter) 72 73 @staticmethod 74 def FromKeyList(key_list, scale=20, formatter=None, key_names=None): 75 """Takes a list of (possibly recurring) keys and returns a histogram object. 76 77 This converts the list into a dictionary of counters, then uses 78 FromCountDict() to generate the actual histogram. For example: 79 80 ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... 81 82 Args: 83 key_list: list of (possibly recurring) keys 84 scale: number of characters used to indicate 100% 85 formatter: function used for formatting raw histogram values 86 key_names: dictionary mapping keys to name strings 87 Returns: 88 A histogram object based on the given data. 89 90 """ 91 count_dict = defaultdict(int) # Unset items default to zero 92 for key in key_list: 93 count_dict[key] += 1 94 return Histogram.FromCountDict(count_dict, scale, formatter, key_names) 95 96 def __str__(self): 97 hist_lines = [] 98 hist_bar = '|' 99 for key, count in self.data: 100 if self.total: 101 bar_len = count * self.scale / self.total 102 hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) 103 104 line = '%s %s %s' % ( 105 str(key).ljust(self.max_key_len), 106 hist_bar, 107 self.formatter(count)) 108 percent_str = format_utils.NumToPercent(count, self.total) 109 if percent_str: 110 line += ' (%s)' % percent_str 111 hist_lines.append(line) 112 113 return '\n'.join(hist_lines) 114 115 def GetKeys(self): 116 """Returns the keys of the histogram.""" 117 return [key for key, _ in self.data] 118