diff options
author | Igor Murashkin <iam@google.com> | 2019-06-06 16:10:16 -0700 |
---|---|---|
committer | Igor Murashkin <iam@google.com> | 2019-06-10 22:17:13 +0000 |
commit | 0c65d1c5faac197630f919085fc690a716783303 (patch) | |
tree | 5a0a83ff437cdc95e9729a70984e1c59eab81b73 /startop/scripts/trace_analyzer/trace_analyzer.py | |
parent | 2cd5a231f9775c07e13312607260b338549e37b1 (diff) |
startop: Add script to analyze block I/O from an ftrace file
Adds a simple python parser to convert .ftrace file into
sqlite3 database.
The rest of the analysis logic is done through a series of SQL
commands that build tables/views/select queries.
Bug: 134705245
Test: trace_analyzer some_ftrace_file.trace tmp_file.db
Change-Id: I25274e25a0ab1f091ae4e6161e6726e006e346a5
Diffstat (limited to 'startop/scripts/trace_analyzer/trace_analyzer.py')
-rwxr-xr-x | startop/scripts/trace_analyzer/trace_analyzer.py | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/startop/scripts/trace_analyzer/trace_analyzer.py b/startop/scripts/trace_analyzer/trace_analyzer.py new file mode 100755 index 000000000000..45429339bea6 --- /dev/null +++ b/startop/scripts/trace_analyzer/trace_analyzer.py @@ -0,0 +1,350 @@ +#!/usr/bin/python3 +# Copyright (C) 2019 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys + +from sqlalchemy import create_engine +from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey +from sqlalchemy.ext.declarative import declarative_base + +from sqlalchemy.orm import sessionmaker + +import sqlalchemy + +_DEBUG = False +#_LIMIT = 100000 +_LIMIT = None +_FLUSH_LIMIT = 10000 + +Base = declarative_base() + +class RawFtraceEntry(Base): + __tablename__ = 'raw_ftrace_entries' + + id = Column(Integer, primary_key=True) + task_name = Column(String, nullable=True) # <...> -> None. + task_pid = Column(String, nullable=False) + tgid = Column(Integer, nullable=True) # ----- -> None. + cpu = Column(Integer, nullable=False) + timestamp = Column(Float, nullable=False) + function = Column(String, nullable=False) + function_args = Column(String, nullable=False) + +# __mapper_args__ = { +# 'polymorphic_identity':'raw_ftrace_entry', +# 'polymorphic_on':function +# } + + @staticmethod + def parse(line): + # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' + m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) + if not m: + return None + + groups = m.groups() + # groups example: + # ('<...>', + # '5521', + # '-----', + # '003', + # '...1', + # '17148.446877', + # 'tracing_mark_write', + # 'trace_event_clock_sync: parent_ts=17148.447266') + task_name = groups[0] + if task_name == '<...>': + task_name = None + + task_pid = int(groups[1]) + tgid = groups[2] + if tgid == '-----': + tgid = None + + cpu = int(groups[3]) + # irq_flags = groups[4] + timestamp = float(groups[5]) + function = groups[6] + function_args = groups[7] + + return RawFtraceEntry(task_name=task_name, task_pid=task_pid, tgid=tgid, cpu=cpu, + timestamp=timestamp, function=function, function_args=function_args) + + @staticmethod + def parse_dict(line): + # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' + m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) + if not m: + return None + + groups = m.groups() + # groups example: + # ('<...>', + # '5521', + # '-----', + # '003', + # '...1', + # '17148.446877', + # 'tracing_mark_write', + # 'trace_event_clock_sync: parent_ts=17148.447266') + task_name = groups[0] + if task_name == '<...>': + task_name = None + + task_pid = int(groups[1]) + tgid = groups[2] + if tgid == '-----': + tgid = None + + cpu = int(groups[3]) + # irq_flags = groups[4] + timestamp = float(groups[5]) + function = groups[6] + function_args = groups[7] + + return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args} + +#class TracingMarkWriteFtraceEntry(RawFtraceEntry): +# __tablename__ = 'tracing_mark_write_ftrace_entries' +# +# id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) +# mark_type = Column(String(1), nullable=False) +# mark_id = Column(Integer, nullable=False) +# message = Column(String) +# +## __mapper_args__ = { +## 'polymorphic_identity':'tracing_mark_write', +## } +# +# @staticmethod +# def decode(raw_ftrace_entry): +# if raw_ftrace_entry.function != 'tracing_mark_write': +# raise ValueError("raw_ftrace_entry must be function 'tracing_mark_write':" + raw_ftrace_entry) +# +# #"B|2446|(Paused)ClearCards|Foo" +# match = re.match("([^|]*)\|([^|]*)\|(.*)", raw_ftrace_entry.function_args) +# +# if not match: +# return None +# +# # ('B', '2446', '(Paused)ClearCards|Foo') +# groups = match.groups() +# +# mark_type = groups[0] +# mark_id = int(groups[1]) +# message = groups[2] +# +# return TracingMarkWriteFtraceEntry(id = raw_ftrace_entry.id, +# mark_type = mark_type, +# mark_id = mark_id, +# message = message) + +class SchedSwitch(Base): + __tablename__ = 'sched_switches' + + id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) + + prev_comm = Column(String, nullable=False) + prev_pid = Column(Integer, nullable=False) + prev_prio = Column(Integer, nullable=False) + prev_state = Column(String, nullable=False) + + next_comm = Column(String, nullable=False) + next_pid = Column(Integer, nullable=False) + next_prio = Column(Integer, nullable=False) + +# __mapper_args__ = { +# 'polymorphic_identity':'raw_ftrace_entry', +# 'polymorphic_on':function +# } + + @staticmethod + def parse_dict(function_args, id = None): + # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120' + m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args) + if not m: + return None + + groups = m.groups() + # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120') + d = {} + if id is not None: + d['id'] = id + d['prev_comm'] = groups[0] + d['prev_pid'] = int(groups[1]) + d['prev_prio'] = int(groups[2]) + d['prev_state'] = groups[3] + d['next_comm'] = groups[4] + d['next_pid'] = int(groups[5]) + d['next_prio'] = int(groups[6]) + + return d + +class SchedBlockedReason(Base): + __tablename__ = 'sched_blocked_reasons' + + id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) + + pid = Column(Integer, nullable=False) + iowait = Column(Integer, nullable=False) + caller = Column(String, nullable=False) + + @staticmethod + def parse_dict(function_args, id = None): + # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f' + m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args) + if not m: + return None + + groups = m.groups() + # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8') + d = {} + if id is not None: + d['id'] = id + d['pid'] = int(groups[0]) + d['iowait'] = int(groups[1]) + d['caller'] = groups[2] + + return d + +def init_sqlalchemy(db_filename): + global _DEBUG + engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG) + + # DROP TABLES +# Base.metadata.drop_all(engine) + # CREATE ... (tables) + Base.metadata.create_all(engine) + + Session = sessionmaker(bind=engine) + session = Session() + return (session, engine) + +def insert_pending_entries(engine, kls, lst): + if len(lst) > 0: + # for some reason, it tries to generate an empty INSERT statement with len=0, + # which of course violates the first non-null constraint. + try: + # Performance-sensitive parsing according to: + # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow + engine.execute(kls.__table__.insert(), lst) + lst.clear() + except sqlalchemy.exc.IntegrityError as err: + # possibly violating some SQL constraint, print data here. + print(err) + print(lst) + raise + +def parse_file(filename, session, engine): + global _LIMIT + global _FLUSH_LIMIT + count = 0 + + pending_entries = [] + pending_sched_switch = [] + pending_sched_blocked_reasons = [] + + def insert_all_pending_entries(): + insert_pending_entries(engine, RawFtraceEntry, pending_entries) + insert_pending_entries(engine, SchedSwitch, pending_sched_switch) + insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons) + + # use explicit encoding to avoid UnicodeDecodeError. + with open(filename, encoding="ISO-8859-1") as f: + for l in f: + + if len(l) > 1 and l[0] == '#': + continue + + count = count + 1 + + if _LIMIT and count >= _LIMIT: + break + + raw_ftrace_entry = RawFtraceEntry.parse_dict(l) + if not raw_ftrace_entry: + print("WARNING: Failed to parse raw ftrace entry: " + l) + continue + + pending_entries.append(raw_ftrace_entry) + + if raw_ftrace_entry['function'] == 'sched_switch': + sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], count) + + if not sched_switch: + print("WARNING: Failed to parse sched_switch: " + l) + else: + pending_sched_switch.append(sched_switch) + + elif raw_ftrace_entry['function'] == 'sched_blocked_reason': + sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], count) + + if not sbr: + print("WARNING: Failed to parse sched_blocked_reason: " + l) + else: + pending_sched_blocked_reasons.append(sbr) + + # Objects are cached in python memory, not yet sent to SQL database. + #session.add(raw_ftrace_entry) + + # Send INSERT/UPDATE/etc statements to the underlying SQL database. + if count % _FLUSH_LIMIT == 0: + # session.flush() + #session.bulk_save_objects(pending_entries) + #session.bulk_insert_mappings(RawFtraceEntry, pending_entries) + insert_all_pending_entries() + + insert_all_pending_entries() + + # Ensure underlying database commits changes from memory to disk. + session.commit() + + return count + +#def decode_raw_traces(session, engine): +# count = 0 +# global _FLUSH_LIMIT +# +# for tmw in session.query(RawFtraceEntry).filter_by(function = 'tracing_mark_write'): +# print(tmw) +# decoded = TracingMarkWriteFtraceEntry.decode(tmw) +# session.add(decoded) +# +# if count % _FLUSH_LIMIT == 0: +# session.flush() +# +# session.commit() +# +# return count + +def main(argv): + db_filename = sys.argv[1] + trace_filename = sys.argv[2] + + session, engine = init_sqlalchemy(db_filename) + print("SQL Alchemy db initialized") + + # parse 'raw_ftrace_entries' table + count = parse_file(trace_filename, session, engine) + print("Count was ", count) + + # create other tables +# count = decode_raw_traces(session, engine) + + return 0 + +if __name__ == '__main__': + main(sys.argv) |