diff options
Diffstat (limited to 'startop/scripts/trace_analyzer/trace_analyzer.py')
-rwxr-xr-x | startop/scripts/trace_analyzer/trace_analyzer.py | 333 |
1 files changed, 17 insertions, 316 deletions
diff --git a/startop/scripts/trace_analyzer/trace_analyzer.py b/startop/scripts/trace_analyzer/trace_analyzer.py index 45429339bea6..62ae018a9986 100755 --- a/startop/scripts/trace_analyzer/trace_analyzer.py +++ b/startop/scripts/trace_analyzer/trace_analyzer.py @@ -15,335 +15,36 @@ import re import sys +import argparse -from sqlalchemy import create_engine -from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey -from sqlalchemy.ext.declarative import declarative_base +from lib.trace2db import Trace2Db -from sqlalchemy.orm import sessionmaker - -import sqlalchemy - -_DEBUG = False -#_LIMIT = 100000 -_LIMIT = None -_FLUSH_LIMIT = 10000 - -Base = declarative_base() - -class RawFtraceEntry(Base): - __tablename__ = 'raw_ftrace_entries' - - id = Column(Integer, primary_key=True) - task_name = Column(String, nullable=True) # <...> -> None. - task_pid = Column(String, nullable=False) - tgid = Column(Integer, nullable=True) # ----- -> None. - cpu = Column(Integer, nullable=False) - timestamp = Column(Float, nullable=False) - function = Column(String, nullable=False) - function_args = Column(String, nullable=False) - -# __mapper_args__ = { -# 'polymorphic_identity':'raw_ftrace_entry', -# 'polymorphic_on':function -# } - - @staticmethod - def parse(line): - # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' - m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) - if not m: - return None - - groups = m.groups() - # groups example: - # ('<...>', - # '5521', - # '-----', - # '003', - # '...1', - # '17148.446877', - # 'tracing_mark_write', - # 'trace_event_clock_sync: parent_ts=17148.447266') - task_name = groups[0] - if task_name == '<...>': - task_name = None - - task_pid = int(groups[1]) - tgid = groups[2] - if tgid == '-----': - tgid = None - - cpu = int(groups[3]) - # irq_flags = groups[4] - timestamp = float(groups[5]) - function = groups[6] - function_args = groups[7] - - return RawFtraceEntry(task_name=task_name, task_pid=task_pid, tgid=tgid, cpu=cpu, - timestamp=timestamp, function=function, function_args=function_args) - - @staticmethod - def parse_dict(line): - # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' - m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) - if not m: - return None - - groups = m.groups() - # groups example: - # ('<...>', - # '5521', - # '-----', - # '003', - # '...1', - # '17148.446877', - # 'tracing_mark_write', - # 'trace_event_clock_sync: parent_ts=17148.447266') - task_name = groups[0] - if task_name == '<...>': - task_name = None - - task_pid = int(groups[1]) - tgid = groups[2] - if tgid == '-----': - tgid = None - - cpu = int(groups[3]) - # irq_flags = groups[4] - timestamp = float(groups[5]) - function = groups[6] - function_args = groups[7] - - return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args} - -#class TracingMarkWriteFtraceEntry(RawFtraceEntry): -# __tablename__ = 'tracing_mark_write_ftrace_entries' +# This script requires 'sqlalchemy' to access the sqlite3 database. # -# id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) -# mark_type = Column(String(1), nullable=False) -# mark_id = Column(Integer, nullable=False) -# message = Column(String) +# $> sudo apt-get install python3-pip +# $> pip3 install --user sqlalchemy # -## __mapper_args__ = { -## 'polymorphic_identity':'tracing_mark_write', -## } -# -# @staticmethod -# def decode(raw_ftrace_entry): -# if raw_ftrace_entry.function != 'tracing_mark_write': -# raise ValueError("raw_ftrace_entry must be function 'tracing_mark_write':" + raw_ftrace_entry) -# -# #"B|2446|(Paused)ClearCards|Foo" -# match = re.match("([^|]*)\|([^|]*)\|(.*)", raw_ftrace_entry.function_args) -# -# if not match: -# return None -# -# # ('B', '2446', '(Paused)ClearCards|Foo') -# groups = match.groups() -# -# mark_type = groups[0] -# mark_id = int(groups[1]) -# message = groups[2] -# -# return TracingMarkWriteFtraceEntry(id = raw_ftrace_entry.id, -# mark_type = mark_type, -# mark_id = mark_id, -# message = message) - -class SchedSwitch(Base): - __tablename__ = 'sched_switches' - - id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) - - prev_comm = Column(String, nullable=False) - prev_pid = Column(Integer, nullable=False) - prev_prio = Column(Integer, nullable=False) - prev_state = Column(String, nullable=False) - - next_comm = Column(String, nullable=False) - next_pid = Column(Integer, nullable=False) - next_prio = Column(Integer, nullable=False) - -# __mapper_args__ = { -# 'polymorphic_identity':'raw_ftrace_entry', -# 'polymorphic_on':function -# } - - @staticmethod - def parse_dict(function_args, id = None): - # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120' - m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args) - if not m: - return None - - groups = m.groups() - # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120') - d = {} - if id is not None: - d['id'] = id - d['prev_comm'] = groups[0] - d['prev_pid'] = int(groups[1]) - d['prev_prio'] = int(groups[2]) - d['prev_state'] = groups[3] - d['next_comm'] = groups[4] - d['next_pid'] = int(groups[5]) - d['next_prio'] = int(groups[6]) - - return d - -class SchedBlockedReason(Base): - __tablename__ = 'sched_blocked_reasons' - - id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) - pid = Column(Integer, nullable=False) - iowait = Column(Integer, nullable=False) - caller = Column(String, nullable=False) - - @staticmethod - def parse_dict(function_args, id = None): - # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f' - m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args) - if not m: - return None - - groups = m.groups() - # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8') - d = {} - if id is not None: - d['id'] = id - d['pid'] = int(groups[0]) - d['iowait'] = int(groups[1]) - d['caller'] = groups[2] - - return d - -def init_sqlalchemy(db_filename): - global _DEBUG - engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG) - - # DROP TABLES -# Base.metadata.drop_all(engine) - # CREATE ... (tables) - Base.metadata.create_all(engine) - - Session = sessionmaker(bind=engine) - session = Session() - return (session, engine) - -def insert_pending_entries(engine, kls, lst): - if len(lst) > 0: - # for some reason, it tries to generate an empty INSERT statement with len=0, - # which of course violates the first non-null constraint. - try: - # Performance-sensitive parsing according to: - # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow - engine.execute(kls.__table__.insert(), lst) - lst.clear() - except sqlalchemy.exc.IntegrityError as err: - # possibly violating some SQL constraint, print data here. - print(err) - print(lst) - raise - -def parse_file(filename, session, engine): - global _LIMIT - global _FLUSH_LIMIT - count = 0 - - pending_entries = [] - pending_sched_switch = [] - pending_sched_blocked_reasons = [] - - def insert_all_pending_entries(): - insert_pending_entries(engine, RawFtraceEntry, pending_entries) - insert_pending_entries(engine, SchedSwitch, pending_sched_switch) - insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons) - - # use explicit encoding to avoid UnicodeDecodeError. - with open(filename, encoding="ISO-8859-1") as f: - for l in f: - - if len(l) > 1 and l[0] == '#': - continue - - count = count + 1 - - if _LIMIT and count >= _LIMIT: - break - - raw_ftrace_entry = RawFtraceEntry.parse_dict(l) - if not raw_ftrace_entry: - print("WARNING: Failed to parse raw ftrace entry: " + l) - continue - - pending_entries.append(raw_ftrace_entry) - - if raw_ftrace_entry['function'] == 'sched_switch': - sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], count) - - if not sched_switch: - print("WARNING: Failed to parse sched_switch: " + l) - else: - pending_sched_switch.append(sched_switch) - - elif raw_ftrace_entry['function'] == 'sched_blocked_reason': - sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], count) - - if not sbr: - print("WARNING: Failed to parse sched_blocked_reason: " + l) - else: - pending_sched_blocked_reasons.append(sbr) - - # Objects are cached in python memory, not yet sent to SQL database. - #session.add(raw_ftrace_entry) - - # Send INSERT/UPDATE/etc statements to the underlying SQL database. - if count % _FLUSH_LIMIT == 0: - # session.flush() - #session.bulk_save_objects(pending_entries) - #session.bulk_insert_mappings(RawFtraceEntry, pending_entries) - insert_all_pending_entries() - - insert_all_pending_entries() - - # Ensure underlying database commits changes from memory to disk. - session.commit() - - return count +def main(argv): + parser = argparse.ArgumentParser(description='Convert ftrace/systrace file into sqlite3 db.') + parser.add_argument('db_filename', metavar='sql_filename.db', type=str, + help='path to sqlite3 db filename') + parser.add_argument('trace_filename', metavar='systrace.ftrace', type=str, + help='path to ftrace/systrace filename') + parser.add_argument('--limit', type=int, help='limit the number of entries parsed [for debugging]') -#def decode_raw_traces(session, engine): -# count = 0 -# global _FLUSH_LIMIT -# -# for tmw in session.query(RawFtraceEntry).filter_by(function = 'tracing_mark_write'): -# print(tmw) -# decoded = TracingMarkWriteFtraceEntry.decode(tmw) -# session.add(decoded) -# -# if count % _FLUSH_LIMIT == 0: -# session.flush() -# -# session.commit() -# -# return count + args = parser.parse_args() -def main(argv): - db_filename = sys.argv[1] - trace_filename = sys.argv[2] + db_filename = args.db_filename + trace_filename = args.trace_filename - session, engine = init_sqlalchemy(db_filename) + trace2db = Trace2Db(db_filename) print("SQL Alchemy db initialized") # parse 'raw_ftrace_entries' table - count = parse_file(trace_filename, session, engine) + count = trace2db.parse_file_into_db(trace_filename, limit=args.limit) print("Count was ", count) - # create other tables -# count = decode_raw_traces(session, engine) - return 0 if __name__ == '__main__': |