summaryrefslogtreecommitdiff
path: root/scripts/update_payload/histogram.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/update_payload/histogram.py')
-rw-r--r--scripts/update_payload/histogram.py115
1 files changed, 115 insertions, 0 deletions
diff --git a/scripts/update_payload/histogram.py b/scripts/update_payload/histogram.py
new file mode 100644
index 00000000..a5ddac4b
--- /dev/null
+++ b/scripts/update_payload/histogram.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Histogram generation tools."""
+
+from collections import defaultdict
+
+import format_utils
+
+
+class Histogram(object):
+ """A histogram generating object.
+
+ This object serves the sole purpose of formatting (key, val) pairs as an
+ ASCII histogram, including bars and percentage markers, and taking care of
+ label alignment, scaling, etc. In addition to the standard __init__
+ interface, two static methods are provided for conveniently converting data
+ in different formats into a histogram. Histogram generation is exported via
+ its __str__ method, and looks as follows:
+
+ Yes |################ | 5 (83.3%)
+ No |### | 1 (16.6%)
+
+ TODO(garnold) we may want to add actual methods for adding data or tweaking
+ the output layout and formatting. For now, though, this is fine.
+
+ """
+
+ def __init__(self, data, scale=20, formatter=None):
+ """Initialize a histogram object.
+
+ Args:
+ data: list of (key, count) pairs constituting the histogram
+ scale: number of characters used to indicate 100%
+ formatter: function used for formatting raw histogram values
+
+ """
+ self.data = data
+ self.scale = scale
+ self.formatter = formatter or str
+ self.max_key_len = max([len(str(key)) for key, count in self.data])
+ self.total = sum([count for key, count in self.data])
+
+ @staticmethod
+ def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
+ """Takes a dictionary of counts and returns a histogram object.
+
+ This simply converts a mapping from names to counts into a list of (key,
+ count) pairs, optionally translating keys into name strings, then
+ generating and returning a histogram for them. This is a useful convenience
+ call for clients that update a dictionary of counters as they (say) scan a
+ data stream.
+
+ Args:
+ count_dict: dictionary mapping keys to occurrence counts
+ scale: number of characters used to indicate 100%
+ formatter: function used for formatting raw histogram values
+ key_names: dictionary mapping keys to name strings
+ Returns:
+ A histogram object based on the given data.
+
+ """
+ namer = None
+ if key_names:
+ namer = lambda key: key_names[key]
+ else:
+ namer = lambda key: key
+
+ hist = [(namer(key), count) for key, count in count_dict.items()]
+ return Histogram(hist, scale, formatter)
+
+ @staticmethod
+ def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
+ """Takes a list of (possibly recurring) keys and returns a histogram object.
+
+ This converts the list into a dictionary of counters, then uses
+ FromCountDict() to generate the actual histogram. For example:
+
+ ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
+
+ Args:
+ key_list: list of (possibly recurring) keys
+ scale: number of characters used to indicate 100%
+ formatter: function used for formatting raw histogram values
+ key_names: dictionary mapping keys to name strings
+ Returns:
+ A histogram object based on the given data.
+
+ """
+ count_dict = defaultdict(int) # Unset items default to zero
+ for key in key_list:
+ count_dict[key] += 1
+ return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
+
+ def __str__(self):
+ hist_lines = []
+ hist_bar = '|'
+ for key, count in self.data:
+ if self.total:
+ bar_len = count * self.scale / self.total
+ hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
+
+ line = '%s %s %s (%s)' % (
+ str(key).ljust(self.max_key_len),
+ hist_bar,
+ self.formatter(count),
+ format_utils.NumToPercent(count, self.total))
+ hist_lines.append(line)
+
+ return '\n'.join(hist_lines)
+
+ def GetKeys(self):
+ """Returns the keys of the histogram."""
+ return [key for key, _ in self.data]