diff options
Diffstat (limited to 'scripts/update_payload/histogram.py')
-rw-r--r-- | scripts/update_payload/histogram.py | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/scripts/update_payload/histogram.py b/scripts/update_payload/histogram.py new file mode 100644 index 00000000..a5ddac4b --- /dev/null +++ b/scripts/update_payload/histogram.py @@ -0,0 +1,115 @@ +# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Histogram generation tools.""" + +from collections import defaultdict + +import format_utils + + +class Histogram(object): + """A histogram generating object. + + This object serves the sole purpose of formatting (key, val) pairs as an + ASCII histogram, including bars and percentage markers, and taking care of + label alignment, scaling, etc. In addition to the standard __init__ + interface, two static methods are provided for conveniently converting data + in different formats into a histogram. Histogram generation is exported via + its __str__ method, and looks as follows: + + Yes |################ | 5 (83.3%) + No |### | 1 (16.6%) + + TODO(garnold) we may want to add actual methods for adding data or tweaking + the output layout and formatting. For now, though, this is fine. + + """ + + def __init__(self, data, scale=20, formatter=None): + """Initialize a histogram object. + + Args: + data: list of (key, count) pairs constituting the histogram + scale: number of characters used to indicate 100% + formatter: function used for formatting raw histogram values + + """ + self.data = data + self.scale = scale + self.formatter = formatter or str + self.max_key_len = max([len(str(key)) for key, count in self.data]) + self.total = sum([count for key, count in self.data]) + + @staticmethod + def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): + """Takes a dictionary of counts and returns a histogram object. + + This simply converts a mapping from names to counts into a list of (key, + count) pairs, optionally translating keys into name strings, then + generating and returning a histogram for them. This is a useful convenience + call for clients that update a dictionary of counters as they (say) scan a + data stream. + + Args: + count_dict: dictionary mapping keys to occurrence counts + scale: number of characters used to indicate 100% + formatter: function used for formatting raw histogram values + key_names: dictionary mapping keys to name strings + Returns: + A histogram object based on the given data. + + """ + namer = None + if key_names: + namer = lambda key: key_names[key] + else: + namer = lambda key: key + + hist = [(namer(key), count) for key, count in count_dict.items()] + return Histogram(hist, scale, formatter) + + @staticmethod + def FromKeyList(key_list, scale=20, formatter=None, key_names=None): + """Takes a list of (possibly recurring) keys and returns a histogram object. + + This converts the list into a dictionary of counters, then uses + FromCountDict() to generate the actual histogram. For example: + + ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... + + Args: + key_list: list of (possibly recurring) keys + scale: number of characters used to indicate 100% + formatter: function used for formatting raw histogram values + key_names: dictionary mapping keys to name strings + Returns: + A histogram object based on the given data. + + """ + count_dict = defaultdict(int) # Unset items default to zero + for key in key_list: + count_dict[key] += 1 + return Histogram.FromCountDict(count_dict, scale, formatter, key_names) + + def __str__(self): + hist_lines = [] + hist_bar = '|' + for key, count in self.data: + if self.total: + bar_len = count * self.scale / self.total + hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) + + line = '%s %s %s (%s)' % ( + str(key).ljust(self.max_key_len), + hist_bar, + self.formatter(count), + format_utils.NumToPercent(count, self.total)) + hist_lines.append(line) + + return '\n'.join(hist_lines) + + def GetKeys(self): + """Returns the keys of the histogram.""" + return [key for key, _ in self.data] |