diff options
author | Igor Murashkin <iam@google.com> | 2019-06-18 16:03:20 -0700 |
---|---|---|
committer | Igor Murashkin <iam@google.com> | 2019-06-26 21:19:27 +0000 |
commit | 06d017ee1913b20d5ebbb9c31e4c6061d993067e (patch) | |
tree | 86bcd6facf165e7b70e5900e05f41c70a0dd2df1 /startop/scripts/trace_analyzer/trace_analyzer.py | |
parent | 42439214ebdf1b6de12683790ea325164fda37fc (diff) |
startop: Add iorap compiler written in python
Compiler will be used for experimentation purpose since it's both easier
to develop in python and it accepts ftrace, making it very easy to write
complicated-experimental changes that we aren't sure are worth it yet
for the on-device C++/perfetto compiler.
This 'new' compiler accepts ftrace/systrace files as input,
then generates an in-memory sqlite3 database (using the trace_analyzer
source code), and finally code-generates a TraceFile.pb protobuf.
(Also refactor trace_analyzer into a library, and update it to
parse systrace.html files)
Limitations: currently does not accept perfetto_trace.pb files due to
'ofs' fields missing (see bug#135555191)
Test: py.test-3 frameworks/base/startop/scripts
Test: ./compiler.py -i tmp_sargo/textcache -t tmp_sargo/trace.html -o tmp/output.pb
Test: ./compiler.py -i tmp_sargo/textcache -t tmp_sargo/trace.html -o tmp/output.pb -f '^/data'
Test: ./trace_analyzer music_run.trace tmp_dbs/12345.db
Bug: 135557978
Bug: 134789969
Change-Id: Ic8295900ee9e634b4cfd8cf99b671ae08d2ea4f7
Diffstat (limited to 'startop/scripts/trace_analyzer/trace_analyzer.py')
-rwxr-xr-x | startop/scripts/trace_analyzer/trace_analyzer.py | 333 |
1 files changed, 17 insertions, 316 deletions
diff --git a/startop/scripts/trace_analyzer/trace_analyzer.py b/startop/scripts/trace_analyzer/trace_analyzer.py index 45429339bea6..62ae018a9986 100755 --- a/startop/scripts/trace_analyzer/trace_analyzer.py +++ b/startop/scripts/trace_analyzer/trace_analyzer.py @@ -15,335 +15,36 @@ import re import sys +import argparse -from sqlalchemy import create_engine -from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey -from sqlalchemy.ext.declarative import declarative_base +from lib.trace2db import Trace2Db -from sqlalchemy.orm import sessionmaker - -import sqlalchemy - -_DEBUG = False -#_LIMIT = 100000 -_LIMIT = None -_FLUSH_LIMIT = 10000 - -Base = declarative_base() - -class RawFtraceEntry(Base): - __tablename__ = 'raw_ftrace_entries' - - id = Column(Integer, primary_key=True) - task_name = Column(String, nullable=True) # <...> -> None. - task_pid = Column(String, nullable=False) - tgid = Column(Integer, nullable=True) # ----- -> None. - cpu = Column(Integer, nullable=False) - timestamp = Column(Float, nullable=False) - function = Column(String, nullable=False) - function_args = Column(String, nullable=False) - -# __mapper_args__ = { -# 'polymorphic_identity':'raw_ftrace_entry', -# 'polymorphic_on':function -# } - - @staticmethod - def parse(line): - # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' - m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) - if not m: - return None - - groups = m.groups() - # groups example: - # ('<...>', - # '5521', - # '-----', - # '003', - # '...1', - # '17148.446877', - # 'tracing_mark_write', - # 'trace_event_clock_sync: parent_ts=17148.447266') - task_name = groups[0] - if task_name == '<...>': - task_name = None - - task_pid = int(groups[1]) - tgid = groups[2] - if tgid == '-----': - tgid = None - - cpu = int(groups[3]) - # irq_flags = groups[4] - timestamp = float(groups[5]) - function = groups[6] - function_args = groups[7] - - return RawFtraceEntry(task_name=task_name, task_pid=task_pid, tgid=tgid, cpu=cpu, - timestamp=timestamp, function=function, function_args=function_args) - - @staticmethod - def parse_dict(line): - # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' - m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) - if not m: - return None - - groups = m.groups() - # groups example: - # ('<...>', - # '5521', - # '-----', - # '003', - # '...1', - # '17148.446877', - # 'tracing_mark_write', - # 'trace_event_clock_sync: parent_ts=17148.447266') - task_name = groups[0] - if task_name == '<...>': - task_name = None - - task_pid = int(groups[1]) - tgid = groups[2] - if tgid == '-----': - tgid = None - - cpu = int(groups[3]) - # irq_flags = groups[4] - timestamp = float(groups[5]) - function = groups[6] - function_args = groups[7] - - return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args} - -#class TracingMarkWriteFtraceEntry(RawFtraceEntry): -# __tablename__ = 'tracing_mark_write_ftrace_entries' +# This script requires 'sqlalchemy' to access the sqlite3 database. # -# id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) -# mark_type = Column(String(1), nullable=False) -# mark_id = Column(Integer, nullable=False) -# message = Column(String) +# $> sudo apt-get install python3-pip +# $> pip3 install --user sqlalchemy # -## __mapper_args__ = { -## 'polymorphic_identity':'tracing_mark_write', -## } -# -# @staticmethod -# def decode(raw_ftrace_entry): -# if raw_ftrace_entry.function != 'tracing_mark_write': -# raise ValueError("raw_ftrace_entry must be function 'tracing_mark_write':" + raw_ftrace_entry) -# -# #"B|2446|(Paused)ClearCards|Foo" -# match = re.match("([^|]*)\|([^|]*)\|(.*)", raw_ftrace_entry.function_args) -# -# if not match: -# return None -# -# # ('B', '2446', '(Paused)ClearCards|Foo') -# groups = match.groups() -# -# mark_type = groups[0] -# mark_id = int(groups[1]) -# message = groups[2] -# -# return TracingMarkWriteFtraceEntry(id = raw_ftrace_entry.id, -# mark_type = mark_type, -# mark_id = mark_id, -# message = message) - -class SchedSwitch(Base): - __tablename__ = 'sched_switches' - - id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) - - prev_comm = Column(String, nullable=False) - prev_pid = Column(Integer, nullable=False) - prev_prio = Column(Integer, nullable=False) - prev_state = Column(String, nullable=False) - - next_comm = Column(String, nullable=False) - next_pid = Column(Integer, nullable=False) - next_prio = Column(Integer, nullable=False) - -# __mapper_args__ = { -# 'polymorphic_identity':'raw_ftrace_entry', -# 'polymorphic_on':function -# } - - @staticmethod - def parse_dict(function_args, id = None): - # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120' - m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args) - if not m: - return None - - groups = m.groups() - # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120') - d = {} - if id is not None: - d['id'] = id - d['prev_comm'] = groups[0] - d['prev_pid'] = int(groups[1]) - d['prev_prio'] = int(groups[2]) - d['prev_state'] = groups[3] - d['next_comm'] = groups[4] - d['next_pid'] = int(groups[5]) - d['next_prio'] = int(groups[6]) - - return d - -class SchedBlockedReason(Base): - __tablename__ = 'sched_blocked_reasons' - - id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) - pid = Column(Integer, nullable=False) - iowait = Column(Integer, nullable=False) - caller = Column(String, nullable=False) - - @staticmethod - def parse_dict(function_args, id = None): - # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f' - m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args) - if not m: - return None - - groups = m.groups() - # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8') - d = {} - if id is not None: - d['id'] = id - d['pid'] = int(groups[0]) - d['iowait'] = int(groups[1]) - d['caller'] = groups[2] - - return d - -def init_sqlalchemy(db_filename): - global _DEBUG - engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG) - - # DROP TABLES -# Base.metadata.drop_all(engine) - # CREATE ... (tables) - Base.metadata.create_all(engine) - - Session = sessionmaker(bind=engine) - session = Session() - return (session, engine) - -def insert_pending_entries(engine, kls, lst): - if len(lst) > 0: - # for some reason, it tries to generate an empty INSERT statement with len=0, - # which of course violates the first non-null constraint. - try: - # Performance-sensitive parsing according to: - # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow - engine.execute(kls.__table__.insert(), lst) - lst.clear() - except sqlalchemy.exc.IntegrityError as err: - # possibly violating some SQL constraint, print data here. - print(err) - print(lst) - raise - -def parse_file(filename, session, engine): - global _LIMIT - global _FLUSH_LIMIT - count = 0 - - pending_entries = [] - pending_sched_switch = [] - pending_sched_blocked_reasons = [] - - def insert_all_pending_entries(): - insert_pending_entries(engine, RawFtraceEntry, pending_entries) - insert_pending_entries(engine, SchedSwitch, pending_sched_switch) - insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons) - - # use explicit encoding to avoid UnicodeDecodeError. - with open(filename, encoding="ISO-8859-1") as f: - for l in f: - - if len(l) > 1 and l[0] == '#': - continue - - count = count + 1 - - if _LIMIT and count >= _LIMIT: - break - - raw_ftrace_entry = RawFtraceEntry.parse_dict(l) - if not raw_ftrace_entry: - print("WARNING: Failed to parse raw ftrace entry: " + l) - continue - - pending_entries.append(raw_ftrace_entry) - - if raw_ftrace_entry['function'] == 'sched_switch': - sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], count) - - if not sched_switch: - print("WARNING: Failed to parse sched_switch: " + l) - else: - pending_sched_switch.append(sched_switch) - - elif raw_ftrace_entry['function'] == 'sched_blocked_reason': - sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], count) - - if not sbr: - print("WARNING: Failed to parse sched_blocked_reason: " + l) - else: - pending_sched_blocked_reasons.append(sbr) - - # Objects are cached in python memory, not yet sent to SQL database. - #session.add(raw_ftrace_entry) - - # Send INSERT/UPDATE/etc statements to the underlying SQL database. - if count % _FLUSH_LIMIT == 0: - # session.flush() - #session.bulk_save_objects(pending_entries) - #session.bulk_insert_mappings(RawFtraceEntry, pending_entries) - insert_all_pending_entries() - - insert_all_pending_entries() - - # Ensure underlying database commits changes from memory to disk. - session.commit() - - return count +def main(argv): + parser = argparse.ArgumentParser(description='Convert ftrace/systrace file into sqlite3 db.') + parser.add_argument('db_filename', metavar='sql_filename.db', type=str, + help='path to sqlite3 db filename') + parser.add_argument('trace_filename', metavar='systrace.ftrace', type=str, + help='path to ftrace/systrace filename') + parser.add_argument('--limit', type=int, help='limit the number of entries parsed [for debugging]') -#def decode_raw_traces(session, engine): -# count = 0 -# global _FLUSH_LIMIT -# -# for tmw in session.query(RawFtraceEntry).filter_by(function = 'tracing_mark_write'): -# print(tmw) -# decoded = TracingMarkWriteFtraceEntry.decode(tmw) -# session.add(decoded) -# -# if count % _FLUSH_LIMIT == 0: -# session.flush() -# -# session.commit() -# -# return count + args = parser.parse_args() -def main(argv): - db_filename = sys.argv[1] - trace_filename = sys.argv[2] + db_filename = args.db_filename + trace_filename = args.trace_filename - session, engine = init_sqlalchemy(db_filename) + trace2db = Trace2Db(db_filename) print("SQL Alchemy db initialized") # parse 'raw_ftrace_entries' table - count = parse_file(trace_filename, session, engine) + count = trace2db.parse_file_into_db(trace_filename, limit=args.limit) print("Count was ", count) - # create other tables -# count = decode_raw_traces(session, engine) - return 0 if __name__ == '__main__': |