startop: Add iorap compiler written in python

Compiler will be used for experimentation purpose since it's both easier to develop in python and it accepts ftrace, making it very easy to write complicated-experimental changes that we aren't sure are worth it yet for the on-device C++/perfetto compiler. This 'new' compiler accepts ftrace/systrace files as input, then generates an in-memory sqlite3 database (using the trace_analyzer source code), and finally code-generates a TraceFile.pb protobuf. (Also refactor trace_analyzer into a library, and update it to parse systrace.html files) Limitations: currently does not accept perfetto_trace.pb files due to 'ofs' fields missing (see bug#135555191) Test: py.test-3 frameworks/base/startop/scripts Test: ./compiler.py -i tmp_sargo/textcache -t tmp_sargo/trace.html -o tmp/output.pb Test: ./compiler.py -i tmp_sargo/textcache -t tmp_sargo/trace.html -o tmp/output.pb -f '^/data' Test: ./trace_analyzer music_run.trace tmp_dbs/12345.db Bug: 135557978 Bug: 134789969 Change-Id: Ic8295900ee9e634b4cfd8cf99b671ae08d2ea4f7
author: Igor Murashkin <iam@google.com> 2019-06-18 16:03:20 -0700
committer: Igor Murashkin <iam@google.com> 2019-06-26 21:19:27 +0000
commit: 06d017ee1913b20d5ebbb9c31e4c6061d993067e (patch)
tree: 86bcd6facf165e7b70e5900e05f41c70a0dd2df1 /startop/scripts/trace_analyzer/trace_analyzer.py
parent: 42439214ebdf1b6de12683790ea325164fda37fc (diff)
1 files changed, 17 insertions, 316 deletions
diff --git a/startop/scripts/trace_analyzer/trace_analyzer.py b/startop/scripts/trace_analyzer/trace_analyzer.py
index 45429339bea6..62ae018a9986 100755
--- a/startop/scripts/trace_analyzer/trace_analyzer.py
+++ b/startop/scripts/trace_analyzer/trace_analyzer.py
@@ -15,335 +15,36 @@
 
 import re
 import sys
+import argparse
 
-from sqlalchemy import create_engine
-from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey
-from sqlalchemy.ext.declarative import declarative_base
+from lib.trace2db import Trace2Db
 
-from sqlalchemy.orm import sessionmaker
-
-import sqlalchemy
-
-_DEBUG = False
-#_LIMIT = 100000
-_LIMIT = None
-_FLUSH_LIMIT = 10000
-
-Base = declarative_base()
-
-class RawFtraceEntry(Base):
-  __tablename__ = 'raw_ftrace_entries'
-
-  id = Column(Integer, primary_key=True)
-  task_name = Column(String, nullable=True) # <...> -> None.
-  task_pid = Column(String, nullable=False)
-  tgid = Column(Integer, nullable=True)     # ----- -> None.
-  cpu = Column(Integer, nullable=False)
-  timestamp = Column(Float, nullable=False)
-  function = Column(String, nullable=False)
-  function_args = Column(String, nullable=False)
-
-#  __mapper_args__ = {
-#      'polymorphic_identity':'raw_ftrace_entry',
-#      'polymorphic_on':function
-#  }
-
-  @staticmethod
-  def parse(line):
-    # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266'
-    m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # groups example:
-    # ('<...>',
-    #  '5521',
-    #  '-----',
-    #  '003',
-    #  '...1',
-    #  '17148.446877',
-    #  'tracing_mark_write',
-    #  'trace_event_clock_sync: parent_ts=17148.447266')
-    task_name = groups[0]
-    if task_name == '<...>':
-      task_name = None
-
-    task_pid = int(groups[1])
-    tgid = groups[2]
-    if tgid == '-----':
-      tgid = None
-
-    cpu = int(groups[3])
-    # irq_flags = groups[4]
-    timestamp = float(groups[5])
-    function = groups[6]
-    function_args = groups[7]
-
-    return RawFtraceEntry(task_name=task_name, task_pid=task_pid, tgid=tgid, cpu=cpu,
-                          timestamp=timestamp, function=function, function_args=function_args)
-
-  @staticmethod
-  def parse_dict(line):
-    # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266'
-    m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # groups example:
-    # ('<...>',
-    #  '5521',
-    #  '-----',
-    #  '003',
-    #  '...1',
-    #  '17148.446877',
-    #  'tracing_mark_write',
-    #  'trace_event_clock_sync: parent_ts=17148.447266')
-    task_name = groups[0]
-    if task_name == '<...>':
-      task_name = None
-
-    task_pid = int(groups[1])
-    tgid = groups[2]
-    if tgid == '-----':
-      tgid = None
-
-    cpu = int(groups[3])
-    # irq_flags = groups[4]
-    timestamp = float(groups[5])
-    function = groups[6]
-    function_args = groups[7]
-
-    return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args}
-
-#class TracingMarkWriteFtraceEntry(RawFtraceEntry):
-#  __tablename__ = 'tracing_mark_write_ftrace_entries'
+# This script requires 'sqlalchemy' to access the sqlite3 database.
 #
-#  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
-#  mark_type = Column(String(1), nullable=False)
-#  mark_id = Column(Integer, nullable=False)
-#  message = Column(String)
+# $> sudo apt-get install python3-pip
+# $> pip3 install --user sqlalchemy
 #
-##  __mapper_args__ = {
-##      'polymorphic_identity':'tracing_mark_write',
-##  }
-#
-#  @staticmethod
-#  def decode(raw_ftrace_entry):
-#    if raw_ftrace_entry.function != 'tracing_mark_write':
-#      raise ValueError("raw_ftrace_entry must be function 'tracing_mark_write':" + raw_ftrace_entry)
-#
-#    #"B|2446|(Paused)ClearCards|Foo"
-#    match = re.match("([^|]*)\|([^|]*)\|(.*)", raw_ftrace_entry.function_args)
-#
-#    if not match:
-#      return None
-#
-#    # ('B', '2446', '(Paused)ClearCards|Foo')
-#    groups = match.groups()
-#
-#    mark_type = groups[0]
-#    mark_id = int(groups[1])
-#    message = groups[2]
-#
-#    return TracingMarkWriteFtraceEntry(id = raw_ftrace_entry.id,
-#        mark_type = mark_type,
-#        mark_id = mark_id,
-#        message = message)
-
-class SchedSwitch(Base):
-  __tablename__ = 'sched_switches'
-
-  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
-
-  prev_comm = Column(String, nullable=False)
-  prev_pid = Column(Integer, nullable=False)
-  prev_prio = Column(Integer, nullable=False)
-  prev_state = Column(String, nullable=False)
-
-  next_comm = Column(String, nullable=False)
-  next_pid = Column(Integer, nullable=False)
-  next_prio = Column(Integer, nullable=False)
-
-#  __mapper_args__ = {
-#      'polymorphic_identity':'raw_ftrace_entry',
-#      'polymorphic_on':function
-#  }
-
-  @staticmethod
-  def parse_dict(function_args, id = None):
-    # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120'
-    m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120')
-    d = {}
-    if id is not None:
-      d['id'] = id
-    d['prev_comm'] = groups[0]
-    d['prev_pid'] = int(groups[1])
-    d['prev_prio'] = int(groups[2])
-    d['prev_state'] = groups[3]
-    d['next_comm'] = groups[4]
-    d['next_pid'] = int(groups[5])
-    d['next_prio'] = int(groups[6])
-
-    return d
-
-class SchedBlockedReason(Base):
-  __tablename__ = 'sched_blocked_reasons'
-
-  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
 
-  pid = Column(Integer, nullable=False)
-  iowait = Column(Integer, nullable=False)
-  caller = Column(String, nullable=False)
-
-  @staticmethod
-  def parse_dict(function_args, id = None):
-    # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f'
-    m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8')
-    d = {}
-    if id is not None:
-      d['id'] = id
-    d['pid'] = int(groups[0])
-    d['iowait'] = int(groups[1])
-    d['caller'] = groups[2]
-
-    return d
-
-def init_sqlalchemy(db_filename):
-  global _DEBUG
-  engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG)
-
-  # DROP TABLES
-#  Base.metadata.drop_all(engine)
-  # CREATE ... (tables)
-  Base.metadata.create_all(engine)
-
-  Session = sessionmaker(bind=engine)
-  session = Session()
-  return (session, engine)
-
-def insert_pending_entries(engine, kls, lst):
-  if len(lst) > 0:
-    # for some reason, it tries to generate an empty INSERT statement with len=0,
-    # which of course violates the first non-null constraint.
-    try:
-      # Performance-sensitive parsing according to:
-      # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
-      engine.execute(kls.__table__.insert(), lst)
-      lst.clear()
-    except sqlalchemy.exc.IntegrityError as err:
-      # possibly violating some SQL constraint, print data here.
-      print(err)
-      print(lst)
-      raise
-
-def parse_file(filename, session, engine):
-  global _LIMIT
-  global _FLUSH_LIMIT
-  count = 0
-
-  pending_entries = []
-  pending_sched_switch = []
-  pending_sched_blocked_reasons = []
-
-  def insert_all_pending_entries():
-    insert_pending_entries(engine, RawFtraceEntry, pending_entries)
-    insert_pending_entries(engine, SchedSwitch, pending_sched_switch)
-    insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons)
-
-  # use explicit encoding to avoid UnicodeDecodeError.
-  with open(filename, encoding="ISO-8859-1") as f:
-    for l in f:
-
-      if len(l) > 1 and l[0] == '#':
-        continue
-
-      count = count + 1
-
-      if _LIMIT and count >= _LIMIT:
-        break
-
-      raw_ftrace_entry = RawFtraceEntry.parse_dict(l)
-      if not raw_ftrace_entry:
-        print("WARNING: Failed to parse raw ftrace entry: " + l)
-        continue
-
-      pending_entries.append(raw_ftrace_entry)
-
-      if raw_ftrace_entry['function'] == 'sched_switch':
-        sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], count)
-
-        if not sched_switch:
-          print("WARNING: Failed to parse sched_switch: " + l)
-        else:
-          pending_sched_switch.append(sched_switch)
-
-      elif raw_ftrace_entry['function'] == 'sched_blocked_reason':
-        sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], count)
-
-        if not sbr:
-          print("WARNING: Failed to parse sched_blocked_reason: " + l)
-        else:
-          pending_sched_blocked_reasons.append(sbr)
-
-      # Objects are cached in python memory, not yet sent to SQL database.
-      #session.add(raw_ftrace_entry)
-
-      # Send INSERT/UPDATE/etc statements to the underlying SQL database.
-      if count % _FLUSH_LIMIT == 0:
-        # session.flush()
-        #session.bulk_save_objects(pending_entries)
-        #session.bulk_insert_mappings(RawFtraceEntry, pending_entries)
-        insert_all_pending_entries()
-
-  insert_all_pending_entries()
-
-  # Ensure underlying database commits changes from memory to disk.
-  session.commit()
-
-  return count
+def main(argv):
+  parser = argparse.ArgumentParser(description='Convert ftrace/systrace file into sqlite3 db.')
+  parser.add_argument('db_filename', metavar='sql_filename.db', type=str,
+                      help='path to sqlite3 db filename')
+  parser.add_argument('trace_filename', metavar='systrace.ftrace', type=str,
+                      help='path to ftrace/systrace filename')
+  parser.add_argument('--limit', type=int, help='limit the number of entries parsed [for debugging]')
 
-#def decode_raw_traces(session, engine):
-#  count = 0
-#  global _FLUSH_LIMIT
-#
-#  for tmw in session.query(RawFtraceEntry).filter_by(function = 'tracing_mark_write'):
-#    print(tmw)
-#    decoded = TracingMarkWriteFtraceEntry.decode(tmw)
-#    session.add(decoded)
-#
-#    if count % _FLUSH_LIMIT == 0:
-#      session.flush()
-#
-#  session.commit()
-#
-#  return count
+  args = parser.parse_args()
 
-def main(argv):
-  db_filename = sys.argv[1]
-  trace_filename = sys.argv[2]
+  db_filename = args.db_filename
+  trace_filename = args.trace_filename
 
-  session, engine = init_sqlalchemy(db_filename)
+  trace2db = Trace2Db(db_filename)
   print("SQL Alchemy db initialized")
 
   # parse 'raw_ftrace_entries' table
-  count = parse_file(trace_filename, session, engine)
+  count = trace2db.parse_file_into_db(trace_filename, limit=args.limit)
   print("Count was ", count)
 
-  # create other tables
-#  count = decode_raw_traces(session, engine)
-
   return 0
 
 if __name__ == '__main__':
author	Igor Murashkin <iam@google.com>	2019-06-18 16:03:20 -0700
committer	Igor Murashkin <iam@google.com>	2019-06-26 21:19:27 +0000
commit	06d017ee1913b20d5ebbb9c31e4c6061d993067e (patch)
tree	86bcd6facf165e7b70e5900e05f41c70a0dd2df1 /startop/scripts/trace_analyzer/trace_analyzer.py
parent	42439214ebdf1b6de12683790ea325164fda37fc (diff)