diff options
Diffstat (limited to 'startop/scripts/app_startup/app_startup_runner.py')
-rwxr-xr-x | startop/scripts/app_startup/app_startup_runner.py | 269 |
1 files changed, 261 insertions, 8 deletions
diff --git a/startop/scripts/app_startup/app_startup_runner.py b/startop/scripts/app_startup/app_startup_runner.py index adf2ad808ea8..9a608af346d0 100755 --- a/startop/scripts/app_startup/app_startup_runner.py +++ b/startop/scripts/app_startup/app_startup_runner.py @@ -192,7 +192,206 @@ def generate_group_run_combinations(run_combinations: Iterable[NamedTuple], dst_ (group_key_value, args_it) = args_list_it yield (group_key_value, args_it) -def parse_run_script_csv_file(csv_file: TextIO) -> List[int]: +class DataFrame: + """Table-like class for storing a 2D cells table with named columns.""" + def __init__(self, data: Dict[str, List[object]] = {}): + """ + Create a new DataFrame from a dictionary (keys = headers, + values = columns). + """ + self._headers = [i for i in data.keys()] + self._rows = [] + + row_num = 0 + + def get_data_row(idx): + r = {} + for header, header_data in data.items(): + + if not len(header_data) > idx: + continue + + r[header] = header_data[idx] + + return r + + while True: + row_dict = get_data_row(row_num) + if len(row_dict) == 0: + break + + self._append_row(row_dict.keys(), row_dict.values()) + row_num = row_num + 1 + + def concat_rows(self, other: 'DataFrame') -> None: + """ + In-place concatenate rows of other into the rows of the + current DataFrame. + + None is added in pre-existing cells if new headers + are introduced. + """ + other_datas = other._data_only() + + other_headers = other.headers + + for d in other_datas: + self._append_row(other_headers, d) + + def _append_row(self, headers: List[str], data: List[object]): + new_row = {k:v for k,v in zip(headers, data)} + self._rows.append(new_row) + + for header in headers: + if not header in self._headers: + self._headers.append(header) + + def __repr__(self): +# return repr(self._rows) + repr = "" + + header_list = self._headers_only() + + row_format = u"" + for header in header_list: + row_format = row_format + u"{:>%d}" %(len(header) + 1) + + repr = row_format.format(*header_list) + "\n" + + for v in self._data_only(): + repr = repr + row_format.format(*v) + "\n" + + return repr + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.headers == other.headers and self.data_table == other.data_table + else: + print("wrong instance", other.__class__) + return False + + @property + def headers(self) -> List[str]: + return [i for i in self._headers_only()] + + @property + def data_table(self) -> List[List[object]]: + return list(self._data_only()) + + @property + def data_table_transposed(self) -> List[List[object]]: + return list(self._transposed_data()) + + @property + def data_row_len(self) -> int: + return len(self._rows) + + def data_row_at(self, idx) -> List[object]: + """ + Return a single data row at the specified index (0th based). + + Accepts negative indices, e.g. -1 is last row. + """ + row_dict = self._rows[idx] + l = [] + + for h in self._headers_only(): + l.append(row_dict.get(h)) # Adds None in blank spots. + + return l + + def copy(self) -> 'DataFrame': + """ + Shallow copy of this DataFrame. + """ + return self.repeat(count=0) + + def repeat(self, count: int) -> 'DataFrame': + """ + Returns a new DataFrame where each row of this dataframe is repeated count times. + A repeat of a row is adjacent to other repeats of that same row. + """ + df = DataFrame() + df._headers = self._headers.copy() + + rows = [] + for row in self._rows: + for i in range(count): + rows.append(row.copy()) + + df._rows = rows + + return df + + def merge_data_columns(self, other: 'DataFrame'): + """ + Merge self and another DataFrame by adding the data from other column-wise. + For any headers that are the same, data from 'other' is preferred. + """ + for h in other._headers: + if not h in self._headers: + self._headers.append(h) + + append_rows = [] + + for self_dict, other_dict in itertools.zip_longest(self._rows, other._rows): + if not self_dict: + d = {} + append_rows.append(d) + else: + d = self_dict + + d_other = other_dict + if d_other: + for k,v in d_other.items(): + d[k] = v + + for r in append_rows: + self._rows.append(r) + + def data_row_reduce(self, fnc) -> 'DataFrame': + """ + Reduces the data row-wise by applying the fnc to each row (column-wise). + Empty cells are skipped. + + fnc(Iterable[object]) -> object + fnc is applied over every non-empty cell in that column (descending row-wise). + + Example: + DataFrame({'a':[1,2,3]}).data_row_reduce(sum) == DataFrame({'a':[6]}) + + Returns a new single-row DataFrame. + """ + df = DataFrame() + df._headers = self._headers.copy() + + def yield_by_column(header_key): + for row_dict in self._rows: + val = row_dict.get(header_key) + if val: + yield val + + new_row_dict = {} + for h in df._headers: + cell_value = fnc(yield_by_column(h)) + new_row_dict[h] = cell_value + + df._rows = [new_row_dict] + return df + + def _headers_only(self): + return self._headers + + def _data_only(self): + row_len = len(self._rows) + + for i in range(row_len): + yield self.data_row_at(i) + + def _transposed_data(self): + return zip(*self._data_only()) + +def parse_run_script_csv_file_flat(csv_file: TextIO) -> List[int]: """Parse a CSV file full of integers into a flat int list.""" csv_reader = csv.reader(csv_file) arr = [] @@ -202,6 +401,38 @@ def parse_run_script_csv_file(csv_file: TextIO) -> List[int]: arr.append(int(i)) return arr +def parse_run_script_csv_file(csv_file: TextIO) -> DataFrame: + """Parse a CSV file full of integers into a DataFrame.""" + csv_reader = csv.reader(csv_file) + + try: + header_list = next(csv_reader) + except StopIteration: + header_list = [] + + if not header_list: + return None + + headers = [i for i in header_list] + + d = {} + for row in csv_reader: + header_idx = 0 + + for i in row: + v = i + if i: + v = int(i) + + header_key = headers[header_idx] + l = d.get(header_key, []) + l.append(v) + d[header_key] = l + + header_idx = header_idx + 1 + + return DataFrame(d) + def make_script_command_with_temp_output(script: str, args: List[str], **kwargs)\ -> Tuple[str, TextIO]: """ @@ -338,27 +569,49 @@ def execute_run_combos(grouped_run_combos: Iterable[Tuple[CollectorPackageInfo, cmd, tmp_output_file = make_script_command_with_temp_output(_RUN_SCRIPT, args, count=loop_count, input=collector_tmp_output_file.name) with tmp_output_file: (passed, script_output) = execute_arbitrary_command(cmd, simulate, timeout) - parsed_output = simulate and [1,2,3] or parse_run_script_csv_file(tmp_output_file) + parsed_output = simulate and DataFrame({'fake_ms':[1,2,3]}) or parse_run_script_csv_file(tmp_output_file) yield (passed, script_output, parsed_output) -def gather_results(commands: Iterable[Tuple[bool, str, List[int]]], key_list: List[str], value_list: List[Tuple[str, ...]]): +def gather_results(commands: Iterable[Tuple[bool, str, DataFrame]], key_list: List[str], value_list: List[Tuple[str, ...]]): _debug_print("gather_results: key_list = ", key_list) - yield key_list + ["time(ms)"] +# yield key_list + ["time(ms)"] stringify_none = lambda s: s is None and "<none>" or s for ((passed, script_output, run_result_list), values) in itertools.zip_longest(commands, value_list): + _debug_print("run_result_list = ", run_result_list) + _debug_print("values = ", values) if not passed: continue - for result in run_result_list: - yield [stringify_none(i) for i in values] + [result] - yield ["; avg(%s), min(%s), max(%s), count(%s)" %(sum(run_result_list, 0.0) / len(run_result_list), min(run_result_list), max(run_result_list), len(run_result_list)) ] + # RunCommandArgs(package='com.whatever', readahead='warm', compiler_filter=None) + # -> {'package':['com.whatever'], 'readahead':['warm'], 'compiler_filter':[None]} + values_dict = {k:[v] for k,v in values._asdict().items()} + + values_df = DataFrame(values_dict) + # project 'values_df' to be same number of rows as run_result_list. + values_df = values_df.repeat(run_result_list.data_row_len) + + # the results are added as right-hand-side columns onto the existing labels for the table. + values_df.merge_data_columns(run_result_list) + + yield values_df def eval_and_save_to_csv(output, annotated_result_values): + + printed_header = False + csv_writer = csv.writer(output) for row in annotated_result_values: - csv_writer.writerow(row) + if not printed_header: + headers = row.headers + csv_writer.writerow(headers) + printed_header = True + # TODO: what about when headers change? + + for data_row in row.data_table: + csv_writer.writerow(data_row) + output.flush() # see the output live. def main(): |