startop: Add iorap compiler written in python

Compiler will be used for experimentation purpose since it's both easier to develop in python and it accepts ftrace, making it very easy to write complicated-experimental changes that we aren't sure are worth it yet for the on-device C++/perfetto compiler. This 'new' compiler accepts ftrace/systrace files as input, then generates an in-memory sqlite3 database (using the trace_analyzer source code), and finally code-generates a TraceFile.pb protobuf. (Also refactor trace_analyzer into a library, and update it to parse systrace.html files) Limitations: currently does not accept perfetto_trace.pb files due to 'ofs' fields missing (see bug#135555191) Test: py.test-3 frameworks/base/startop/scripts Test: ./compiler.py -i tmp_sargo/textcache -t tmp_sargo/trace.html -o tmp/output.pb Test: ./compiler.py -i tmp_sargo/textcache -t tmp_sargo/trace.html -o tmp/output.pb -f '^/data' Test: ./trace_analyzer music_run.trace tmp_dbs/12345.db Bug: 135557978 Bug: 134789969 Change-Id: Ic8295900ee9e634b4cfd8cf99b671ae08d2ea4f7
author: Igor Murashkin <iam@google.com> 2019-06-18 16:03:20 -0700
committer: Igor Murashkin <iam@google.com> 2019-06-26 21:19:27 +0000
commit: 06d017ee1913b20d5ebbb9c31e4c6061d993067e (patch)
tree: 86bcd6facf165e7b70e5900e05f41c70a0dd2df1 /startop/scripts
parent: 42439214ebdf1b6de12683790ea325164fda37fc (diff)
8 files changed, 1277 insertions, 316 deletions
diff --git a/startop/scripts/iorap/compiler.py b/startop/scripts/iorap/compiler.py
new file mode 100755
index 000000000000..79149601acb0
--- /dev/null
+++ b/startop/scripts/iorap/compiler.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+
+#
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Dependencies:
+#
+# $> sudo apt-get install python3-pip
+# $> pip3 install --user protobuf sqlalchemy sqlite3
+#
+
+import collections
+import optparse
+import os
+import re
+import sys
+
+from typing import Iterable
+
+from lib.inode2filename import Inode2Filename
+from generated.TraceFile_pb2 import *
+
+parent_dir_name = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+sys.path.append(parent_dir_name + "/trace_analyzer")
+from lib.trace2db import Trace2Db, MmFilemapAddToPageCache
+
+_PAGE_SIZE = 4096 # adb shell getconf PAGESIZE ## size of a memory page in bytes.
+
+class PageRun:
+  """
+  Intermediate representation for a run of one or more pages.
+  """
+  def __init__(self, device_number: int, inode: int, offset: int, length: int):
+    self.device_number = device_number
+    self.inode = inode
+    self.offset = offset
+    self.length = length
+
+  def __str__(self):
+    return "PageRun(device_number=%d, inode=%d, offset=%d, length=%d)" \
+        %(self.device_number, self.inode, self.offset, self.length)
+
+def debug_print(msg):
+  #print(msg)
+  pass
+
+UNDER_LAUNCH = False
+
+def page_cache_entries_to_runs(page_cache_entries: Iterable[MmFilemapAddToPageCache]):
+  global _PAGE_SIZE
+
+  runs = [
+      PageRun(device_number=pg_entry.dev, inode=pg_entry.ino, offset=pg_entry.ofs,
+              length=_PAGE_SIZE)
+        for pg_entry in page_cache_entries
+  ]
+
+  for r in runs:
+    debug_print(r)
+
+  print("Stats: Page runs totaling byte length: %d" %(len(runs) * _PAGE_SIZE))
+
+  return runs
+
+def optimize_page_runs(page_runs):
+  new_entries = []
+  last_entry = None
+  for pg_entry in page_runs:
+    if last_entry:
+      if pg_entry.device_number == last_entry.device_number and pg_entry.inode == last_entry.inode:
+        # we are dealing with a run for the same exact file as a previous run.
+        if pg_entry.offset == last_entry.offset + last_entry.length:
+          # trivially contiguous entries. merge them together.
+          last_entry.length += pg_entry.length
+          continue
+    # Default: Add the run without merging it to a previous run.
+    last_entry = pg_entry
+    new_entries.append(pg_entry)
+  return new_entries
+
+def is_filename_matching_filter(file_name, filters=[]):
+  """
+  Blacklist-style regular expression filters.
+
+  :return: True iff file_name has an RE match in one of the filters.
+  """
+  for filt in filters:
+    res = re.search(filt, file_name)
+    if res:
+      return True
+
+  return False
+
+def build_protobuf(page_runs, inode2filename, filters=[]):
+  trace_file = TraceFile()
+  trace_file_index = trace_file.index
+
+  file_id_counter = 0
+  file_id_map = {} # filename -> id
+
+  stats_length_total = 0
+  filename_stats = {} # filename -> total size
+
+  skipped_inode_map = {}
+  filtered_entry_map = {} # filename -> count
+
+  for pg_entry in page_runs:
+    fn = inode2filename.resolve(pg_entry.device_number, pg_entry.inode)
+    if not fn:
+      skipped_inode_map[pg_entry.inode] = skipped_inode_map.get(pg_entry.inode, 0) + 1
+      continue
+
+    filename = fn
+
+    if filters and not is_filename_matching_filter(filename, filters):
+      filtered_entry_map[filename] = filtered_entry_map.get(filename, 0) + 1
+      continue
+
+    file_id = file_id_map.get(filename)
+    if not file_id:
+      file_id = file_id_counter
+      file_id_map[filename] = file_id_counter
+      file_id_counter = file_id_counter + 1
+
+      file_index_entry = trace_file_index.entries.add()
+      file_index_entry.id = file_id
+      file_index_entry.file_name = filename
+
+    # already in the file index, add the file entry.
+    file_entry = trace_file.list.entries.add()
+    file_entry.index_id = file_id
+    file_entry.file_length = pg_entry.length
+    stats_length_total += file_entry.file_length
+    file_entry.file_offset = pg_entry.offset
+
+    filename_stats[filename] = filename_stats.get(filename, 0) + file_entry.file_length
+
+  for inode, count in skipped_inode_map.items():
+    print("WARNING: Skip inode %s because it's not in inode map (%d entries)" %(inode, count))
+
+  print("Stats: Sum of lengths %d" %(stats_length_total))
+
+  if filters:
+    print("Filter: %d total files removed." %(len(filtered_entry_map)))
+
+    for fn, count in filtered_entry_map.items():
+      print("Filter: File '%s' removed '%d' entries." %(fn, count))
+
+  for filename, file_size in filename_stats.items():
+    print("%s,%s" %(filename, file_size))
+
+  return trace_file
+
+def query_add_to_page_cache(trace2db: Trace2Db):
+  # SELECT * FROM tbl ORDER BY id;
+  return trace2db.session.query(MmFilemapAddToPageCache).order_by(MmFilemapAddToPageCache.id).all()
+
+def main(argv):
+  parser = optparse.OptionParser(usage="Usage: %prog [options]", description="Compile systrace file into TraceFile.pb")
+  parser.add_option('-i', dest='inode_data_file', metavar='FILE',
+                    help='Read cached inode data from a file saved earlier with pagecache.py -d')
+  parser.add_option('-t', dest='trace_file', metavar='FILE',
+                    help='Path to systrace file (trace.html) that will be parsed')
+
+  parser.add_option('--db', dest='sql_db', metavar='FILE',
+                    help='Path to intermediate sqlite3 database [default: in-memory].')
+
+  parser.add_option('-f', dest='filter', action="append", default=[],
+                    help="Add file filter. All file entries not matching one of the filters are discarded.")
+
+  parser.add_option('-l', dest='launch_lock', action="store_true", default=False,
+                    help="Exclude all events not inside launch_lock")
+
+  parser.add_option('-o', dest='output_file', metavar='FILE',
+                    help='Output protobuf file')
+
+  options, categories = parser.parse_args(argv[1:])
+
+  # TODO: OptionParser should have some flags to make these mandatory.
+  if not options.inode_data_file:
+    parser.error("-i is required")
+  if not options.trace_file:
+    parser.error("-t is required")
+  if not options.output_file:
+    parser.error("-o is required")
+
+  if options.launch_lock:
+    print("INFO: Launch lock flag (-l) enabled; filtering all events not inside launch_lock.")
+
+
+  inode_table = Inode2Filename.new_from_filename(options.inode_data_file)
+
+  trace_file = open(options.trace_file)
+
+  sql_db_path = ":memory:"
+  if options.sql_db:
+    sql_db_path = options.sql_db
+
+  trace2db = Trace2Db(sql_db_path)
+  # Speed optimization: Skip any entries that aren't mm_filemap_add_to_pagecache.
+  trace2db.set_raw_ftrace_entry_filter(\
+      lambda entry: entry['function'] == 'mm_filemap_add_to_page_cache')
+  # TODO: parse multiple trace files here.
+  parse_count = trace2db.parse_file_into_db(options.trace_file)
+
+  mm_filemap_add_to_page_cache_rows = query_add_to_page_cache(trace2db)
+  print("DONE. Parsed %d entries into sql db." %(len(mm_filemap_add_to_page_cache_rows)))
+
+  page_runs = page_cache_entries_to_runs(mm_filemap_add_to_page_cache_rows)
+  print("DONE. Converted %d entries" %(len(page_runs)))
+
+  # TODO: flags to select optimizations.
+  optimized_page_runs = optimize_page_runs(page_runs)
+  print("DONE. Optimized down to %d entries" %(len(optimized_page_runs)))
+
+  print("Build protobuf...")
+  trace_file = build_protobuf(optimized_page_runs, inode_table, options.filter)
+
+  print("Write protobuf to file...")
+  output_file = open(options.output_file, 'wb')
+  output_file.write(trace_file.SerializeToString())
+  output_file.close()
+
+  print("DONE")
+
+  # TODO: Silent running mode [no output except on error] for build runs.
+
+  return 0
+
+sys.exit(main(sys.argv))
diff --git a/startop/scripts/iorap/generated/TraceFile_pb2.py b/startop/scripts/iorap/generated/TraceFile_pb2.py
new file mode 100644
index 000000000000..f005bed427ca
--- /dev/null
+++ b/startop/scripts/iorap/generated/TraceFile_pb2.py
@@ -0,0 +1,259 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: TraceFile.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='TraceFile.proto',
+  package='iorap.serialize.proto',
+  syntax='proto2',
+  serialized_pb=_b('\n\x0fTraceFile.proto\x12\x15iorap.serialize.proto\"u\n\tTraceFile\x12\x34\n\x05index\x18\x01 \x02(\x0b\x32%.iorap.serialize.proto.TraceFileIndex\x12\x32\n\x04list\x18\x02 \x02(\x0b\x32$.iorap.serialize.proto.TraceFileList\"M\n\x0eTraceFileIndex\x12;\n\x07\x65ntries\x18\x01 \x03(\x0b\x32*.iorap.serialize.proto.TraceFileIndexEntry\"4\n\x13TraceFileIndexEntry\x12\n\n\x02id\x18\x01 \x02(\x03\x12\x11\n\tfile_name\x18\x02 \x02(\t\"G\n\rTraceFileList\x12\x36\n\x07\x65ntries\x18\x01 \x03(\x0b\x32%.iorap.serialize.proto.TraceFileEntry\"L\n\x0eTraceFileEntry\x12\x10\n\x08index_id\x18\x01 \x02(\x03\x12\x13\n\x0b\x66ile_offset\x18\x02 \x02(\x03\x12\x13\n\x0b\x66ile_length\x18\x03 \x02(\x03\x42\x1c\n\x18\x63om.google.android.iorapH\x03')
+)
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+
+
+
+_TRACEFILE = _descriptor.Descriptor(
+  name='TraceFile',
+  full_name='iorap.serialize.proto.TraceFile',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='index', full_name='iorap.serialize.proto.TraceFile.index', index=0,
+      number=1, type=11, cpp_type=10, label=2,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='list', full_name='iorap.serialize.proto.TraceFile.list', index=1,
+      number=2, type=11, cpp_type=10, label=2,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=42,
+  serialized_end=159,
+)
+
+
+_TRACEFILEINDEX = _descriptor.Descriptor(
+  name='TraceFileIndex',
+  full_name='iorap.serialize.proto.TraceFileIndex',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='entries', full_name='iorap.serialize.proto.TraceFileIndex.entries', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=161,
+  serialized_end=238,
+)
+
+
+_TRACEFILEINDEXENTRY = _descriptor.Descriptor(
+  name='TraceFileIndexEntry',
+  full_name='iorap.serialize.proto.TraceFileIndexEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='id', full_name='iorap.serialize.proto.TraceFileIndexEntry.id', index=0,
+      number=1, type=3, cpp_type=2, label=2,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='file_name', full_name='iorap.serialize.proto.TraceFileIndexEntry.file_name', index=1,
+      number=2, type=9, cpp_type=9, label=2,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=240,
+  serialized_end=292,
+)
+
+
+_TRACEFILELIST = _descriptor.Descriptor(
+  name='TraceFileList',
+  full_name='iorap.serialize.proto.TraceFileList',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='entries', full_name='iorap.serialize.proto.TraceFileList.entries', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=294,
+  serialized_end=365,
+)
+
+
+_TRACEFILEENTRY = _descriptor.Descriptor(
+  name='TraceFileEntry',
+  full_name='iorap.serialize.proto.TraceFileEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='index_id', full_name='iorap.serialize.proto.TraceFileEntry.index_id', index=0,
+      number=1, type=3, cpp_type=2, label=2,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='file_offset', full_name='iorap.serialize.proto.TraceFileEntry.file_offset', index=1,
+      number=2, type=3, cpp_type=2, label=2,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='file_length', full_name='iorap.serialize.proto.TraceFileEntry.file_length', index=2,
+      number=3, type=3, cpp_type=2, label=2,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=367,
+  serialized_end=443,
+)
+
+_TRACEFILE.fields_by_name['index'].message_type = _TRACEFILEINDEX
+_TRACEFILE.fields_by_name['list'].message_type = _TRACEFILELIST
+_TRACEFILEINDEX.fields_by_name['entries'].message_type = _TRACEFILEINDEXENTRY
+_TRACEFILELIST.fields_by_name['entries'].message_type = _TRACEFILEENTRY
+DESCRIPTOR.message_types_by_name['TraceFile'] = _TRACEFILE
+DESCRIPTOR.message_types_by_name['TraceFileIndex'] = _TRACEFILEINDEX
+DESCRIPTOR.message_types_by_name['TraceFileIndexEntry'] = _TRACEFILEINDEXENTRY
+DESCRIPTOR.message_types_by_name['TraceFileList'] = _TRACEFILELIST
+DESCRIPTOR.message_types_by_name['TraceFileEntry'] = _TRACEFILEENTRY
+
+TraceFile = _reflection.GeneratedProtocolMessageType('TraceFile', (_message.Message,), dict(
+  DESCRIPTOR = _TRACEFILE,
+  __module__ = 'TraceFile_pb2'
+  # @@protoc_insertion_point(class_scope:iorap.serialize.proto.TraceFile)
+  ))
+_sym_db.RegisterMessage(TraceFile)
+
+TraceFileIndex = _reflection.GeneratedProtocolMessageType('TraceFileIndex', (_message.Message,), dict(
+  DESCRIPTOR = _TRACEFILEINDEX,
+  __module__ = 'TraceFile_pb2'
+  # @@protoc_insertion_point(class_scope:iorap.serialize.proto.TraceFileIndex)
+  ))
+_sym_db.RegisterMessage(TraceFileIndex)
+
+TraceFileIndexEntry = _reflection.GeneratedProtocolMessageType('TraceFileIndexEntry', (_message.Message,), dict(
+  DESCRIPTOR = _TRACEFILEINDEXENTRY,
+  __module__ = 'TraceFile_pb2'
+  # @@protoc_insertion_point(class_scope:iorap.serialize.proto.TraceFileIndexEntry)
+  ))
+_sym_db.RegisterMessage(TraceFileIndexEntry)
+
+TraceFileList = _reflection.GeneratedProtocolMessageType('TraceFileList', (_message.Message,), dict(
+  DESCRIPTOR = _TRACEFILELIST,
+  __module__ = 'TraceFile_pb2'
+  # @@protoc_insertion_point(class_scope:iorap.serialize.proto.TraceFileList)
+  ))
+_sym_db.RegisterMessage(TraceFileList)
+
+TraceFileEntry = _reflection.GeneratedProtocolMessageType('TraceFileEntry', (_message.Message,), dict(
+  DESCRIPTOR = _TRACEFILEENTRY,
+  __module__ = 'TraceFile_pb2'
+  # @@protoc_insertion_point(class_scope:iorap.serialize.proto.TraceFileEntry)
+  ))
+_sym_db.RegisterMessage(TraceFileEntry)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030com.google.android.iorapH\003'))
+# @@protoc_insertion_point(module_scope)
diff --git a/startop/scripts/iorap/generated/codegen_protos b/startop/scripts/iorap/generated/codegen_protos
new file mode 100755
index 000000000000..5688711ec25d
--- /dev/null
+++ b/startop/scripts/iorap/generated/codegen_protos
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# Copyright 2019, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+APROTOC="$(which aprotoc)"
+
+IORAP_SERIALIZE_DIR="${DIR}/../../../../../../system/iorap/src/serialize"
+IORAP_PROTOS=($IORAP_SERIALIZE_DIR/*.proto)
+
+if [[ $? -ne 0 ]]; then
+  echo "Fatal: Missing aprotoc. Set APROTOC=... or lunch build/envsetup.sh?" >&2
+  exit 1
+fi
+
+if ! [[ -d $IORAP_SERIALIZE_DIR ]]; then
+  echo "Fatal: Directory '$IORAP_SERIALIZE_DIR' does not exist." >&2
+  exit 1
+fi
+
+# codegen the .py files into the same directory as this script.
+echo "$APROTOC" --proto_path="$IORAP_SERIALIZE_DIR" --python_out="$DIR" "${IORAP_PROTOS[@]}"
+"$APROTOC" --proto_path="$IORAP_SERIALIZE_DIR" --python_out="$DIR" "${IORAP_PROTOS[@]}"
diff --git a/startop/scripts/iorap/lib/inode2filename.py b/startop/scripts/iorap/lib/inode2filename.py
new file mode 100644
index 000000000000..2e713936319a
--- /dev/null
+++ b/startop/scripts/iorap/lib/inode2filename.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+#
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Any, Callable, Dict, Generic, Iterable, List, NamedTuple, TextIO, Tuple, TypeVar, Optional, Union, TextIO
+
+import re
+
+class Inode2Filename:
+  """
+  Parses a text file of the format
+     "uint(dev_t) uint(ino_t) int(file_size) string(filepath)\\n"*
+
+  Lines not matching this format are ignored.
+  """
+
+  def __init__(self, inode_data_file: TextIO):
+    """
+    Create an Inode2Filename that reads cached inode from a file saved earlier
+    (e.g. with pagecache.py -d or with inode2filename --format=textcache)
+
+    :param inode_data_file: a file object (e.g. created with open or StringIO).
+
+    Lifetime: inode_data_file is only used during the construction of the object.
+    """
+    self._inode_table = Inode2Filename.build_inode_lookup_table(inode_data_file)
+
+  @classmethod
+  def new_from_filename(cls, textcache_filename: str) -> 'Inode2Filename':
+    """
+    Create an Inode2Filename that reads cached inode from a file saved earlier
+    (e.g. with pagecache.py -d or with inode2filename --format=textcache)
+
+    :param textcache_filename: path to textcache
+    """
+    with open(textcache_filename) as inode_data_file:
+      return cls(inode_data_file)
+
+  @staticmethod
+  def build_inode_lookup_table(inode_data_file: TextIO) -> Dict[Tuple[int, int], Tuple[str, str]]:
+    """
+    :return: map { (device_int, inode_int) -> (filename_str, size_str) }
+    """
+    inode2filename = {}
+    for line in inode_data_file:
+      # stat -c "%d %i %s %n
+      # device number, inode number, total size in bytes, file name
+      result = re.match('([0-9]+)d? ([0-9]+) -?([0-9]+) (.*)', line)
+      if result:
+        inode2filename[(int(result.group(1)), int(result.group(2)))] = \
+            (result.group(4), result.group(3))
+
+    return inode2filename
+
+  def resolve(self, dev_t: int, ino_t: int) -> Optional[str]:
+    """
+    Return a filename (str) from a (dev_t, ino_t) inode pair.
+
+    Returns None if the lookup fails.
+    """
+    maybe_result = self._inode_table.get((dev_t, ino_t))
+
+    if not maybe_result:
+      return None
+
+    return maybe_result[0] # filename str
+
+  def __len__(self) -> int:
+    """
+    :return: the number of inode entries parsed from the file.
+    """
+    return len(self._inode_table)
+
+  def __repr__(self) -> str:
+    """
+    :return: string representation for debugging/test failures.
+    """
+    return "Inode2Filename%s" %(repr(self._inode_table))
+
+  # end of class.
diff --git a/startop/scripts/iorap/lib/inode2filename_test.py b/startop/scripts/iorap/lib/inode2filename_test.py
new file mode 100755
index 000000000000..1224c61da641
--- /dev/null
+++ b/startop/scripts/iorap/lib/inode2filename_test.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+#
+# Copyright 2019, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for inode2filename module.
+
+Install:
+  $> sudo apt-get install python3-pytest   ##  OR
+  $> pip install -U pytest
+See also https://docs.pytest.org/en/latest/getting-started.html
+
+Usage:
+  $> ./inode2filename_test.py
+  $> pytest inode2filename_test.py
+  $> python -m pytest inode2filename_test.py
+
+See also https://docs.pytest.org/en/latest/usage.html
+"""
+
+# global imports
+from contextlib import contextmanager
+import io
+import shlex
+import sys
+import typing
+
+# pip imports
+import pytest
+
+# local imports
+from inode2filename import *
+
+def create_inode2filename(*contents):
+  buf = io.StringIO()
+
+  for c in contents:
+    buf.write(c)
+    buf.write("\n")
+
+  buf.seek(0)
+
+  i2f = Inode2Filename(buf)
+
+  buf.close()
+
+  return i2f
+
+def test_inode2filename():
+  a = create_inode2filename("")
+  assert len(a) == 0
+  assert a.resolve(1, 2) == None
+
+  a = create_inode2filename("1 2 3 foo.bar")
+  assert len(a) == 1
+  assert a.resolve(1, 2) == "foo.bar"
+  assert a.resolve(4, 5) == None
+
+  a = create_inode2filename("1 2 3 foo.bar", "4 5 6 bar.baz")
+  assert len(a) == 2
+  assert a.resolve(1, 2) == "foo.bar"
+  assert a.resolve(4, 5) == "bar.baz"
+
+  a = create_inode2filename("1567d 8910 -1 /a/b/c/", "4 5 6 bar.baz")
+  assert len(a) == 2
+  assert a.resolve(1567, 8910) == "/a/b/c/"
+  assert a.resolve(4, 5) == "bar.baz"
+
+if __name__ == '__main__':
+  pytest.main()
diff --git a/startop/scripts/trace_analyzer/lib/trace2db.py b/startop/scripts/trace_analyzer/lib/trace2db.py
new file mode 100644
index 000000000000..f60d6ab5245b
--- /dev/null
+++ b/startop/scripts/trace_analyzer/lib/trace2db.py
@@ -0,0 +1,343 @@
+#!/usr/bin/python3
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import sys
+
+from sqlalchemy import create_engine
+from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey
+from sqlalchemy.ext.declarative import declarative_base
+
+from sqlalchemy.orm import sessionmaker
+
+import sqlalchemy
+
+from typing import Optional, Tuple
+
+_DEBUG = False        # print sql commands to console
+_FLUSH_LIMIT = 10000  # how many entries are parsed before flushing to DB from memory
+
+Base = declarative_base()
+
+class RawFtraceEntry(Base):
+  __tablename__ = 'raw_ftrace_entries'
+
+  id = Column(Integer, primary_key=True)
+  task_name = Column(String, nullable=True) # <...> -> None.
+  task_pid = Column(String, nullable=False)
+  tgid = Column(Integer, nullable=True)     # ----- -> None.
+  cpu = Column(Integer, nullable=False)
+  timestamp = Column(Float, nullable=False)
+  function = Column(String, nullable=False)
+  function_args = Column(String, nullable=False)
+
+  @staticmethod
+  def parse_dict(line):
+    # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266'
+    m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line)
+    if not m:
+      return None
+
+    groups = m.groups()
+    # groups example:
+    # ('<...>',
+    #  '5521',
+    #  '-----',
+    #  '003',
+    #  '...1',
+    #  '17148.446877',
+    #  'tracing_mark_write',
+    #  'trace_event_clock_sync: parent_ts=17148.447266')
+    task_name = groups[0]
+    if task_name == '<...>':
+      task_name = None
+
+    task_pid = int(groups[1])
+    tgid = groups[2]
+    if tgid == '-----':
+      tgid = None
+
+    cpu = int(groups[3])
+    # irq_flags = groups[4]
+    timestamp = float(groups[5])
+    function = groups[6]
+    function_args = groups[7]
+
+    return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args}
+
+class SchedSwitch(Base):
+  __tablename__ = 'sched_switches'
+
+  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
+
+  prev_comm = Column(String, nullable=False)
+  prev_pid = Column(Integer, nullable=False)
+  prev_prio = Column(Integer, nullable=False)
+  prev_state = Column(String, nullable=False)
+
+  next_comm = Column(String, nullable=False)
+  next_pid = Column(Integer, nullable=False)
+  next_prio = Column(Integer, nullable=False)
+
+  @staticmethod
+  def parse_dict(function_args, id = None):
+    # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120'
+    m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args)
+    if not m:
+      return None
+
+    groups = m.groups()
+    # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120')
+    d = {}
+    if id is not None:
+      d['id'] = id
+    d['prev_comm'] = groups[0]
+    d['prev_pid'] = int(groups[1])
+    d['prev_prio'] = int(groups[2])
+    d['prev_state'] = groups[3]
+    d['next_comm'] = groups[4]
+    d['next_pid'] = int(groups[5])
+    d['next_prio'] = int(groups[6])
+
+    return d
+
+class SchedBlockedReason(Base):
+  __tablename__ = 'sched_blocked_reasons'
+
+  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
+
+  pid = Column(Integer, nullable=False)
+  iowait = Column(Integer, nullable=False)
+  caller = Column(String, nullable=False)
+
+  @staticmethod
+  def parse_dict(function_args, id = None):
+    # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f'
+    m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args)
+    if not m:
+      return None
+
+    groups = m.groups()
+    # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8')
+    d = {}
+    if id is not None:
+      d['id'] = id
+    d['pid'] = int(groups[0])
+    d['iowait'] = int(groups[1])
+    d['caller'] = groups[2]
+
+    return d
+
+class MmFilemapAddToPageCache(Base):
+  __tablename__ = 'mm_filemap_add_to_page_caches'
+
+  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
+
+  dev = Column(Integer, nullable=False)        # decoded from ${major}:${minor} syntax.
+  dev_major = Column(Integer, nullable=False)  # original ${major} value.
+  dev_minor = Column(Integer, nullable=False)  # original ${minor} value.
+
+  ino = Column(Integer, nullable=False)  # decoded from hex to base 10
+  page = Column(Integer, nullable=False) # decoded from hex to base 10
+
+  pfn = Column(Integer, nullable=False)
+  ofs = Column(Integer, nullable=False)
+
+  @staticmethod
+  def parse_dict(function_args, id = None):
+    # dev 253:6 ino b2c7 page=00000000ec787cd9 pfn=1478539 ofs=4096
+    m = re.match("dev (\d+):(\d+) ino ([0-9a-fA-F]+) page=([0-9a-fA-F]+) pfn=(\d+) ofs=(\d+)", function_args)
+    if not m:
+      return None
+
+    groups = m.groups()
+    # ('253', '6', 'b2c7', '00000000ec787cd9', '1478539', '4096')
+    d = {}
+    if id is not None:
+      d['id'] = id
+
+    device_major = d['dev_major'] = int(groups[0])
+    device_minor = d['dev_minor'] = int(groups[1])
+    d['dev'] = device_major << 8 | device_minor
+    d['ino'] = int(groups[2], 16)
+    d['page'] = int(groups[3], 16)
+    d['pfn'] = int(groups[4])
+    d['ofs'] = int(groups[5])
+
+    return d
+
+class Trace2Db:
+  def __init__(self, db_filename: str):
+    (s, e) = self._init_sqlalchemy(db_filename)
+    self._session = s
+    self._engine = e
+    self._raw_ftrace_entry_filter = lambda x: True
+
+  def set_raw_ftrace_entry_filter(self, flt):
+    """
+    Install a function dict(RawFtraceEntry) -> bool
+
+    If this returns 'false', then we skip adding the RawFtraceEntry to the database.
+    """
+    self._raw_ftrace_entry_filter = flt
+
+  @staticmethod
+  def _init_sqlalchemy(db_filename: str) -> Tuple[object, object]:
+    global _DEBUG
+    engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG)
+
+    # CREATE ... (tables)
+    Base.metadata.create_all(engine)
+
+    Session = sessionmaker(bind=engine)
+    session = Session()
+    return (session, engine)
+
+  def parse_file_into_db(self, filename: str, limit: Optional[int] = None):
+    """
+    Parse the ftrace/systrace at 'filename',
+    inserting the values into the current sqlite database.
+
+    :return: number of RawFtraceEntry inserted.
+    """
+    return parse_file(filename, self._session, self._engine, self._raw_ftrace_entry_filter, limit)
+
+  def parse_file_buf_into_db(self, file_buf, limit: Optional[int] = None):
+    """
+    Parse the ftrace/systrace at 'filename',
+    inserting the values into the current sqlite database.
+
+    :return: number of RawFtraceEntry inserted.
+    """
+    return parse_file_buf(file_buf, self._session, self._engine, self._raw_ftrace_entry_filter, limit)
+
+
+  @property
+  def session(self):
+    return self._session
+
+def insert_pending_entries(engine, kls, lst):
+  if len(lst) > 0:
+    # for some reason, it tries to generate an empty INSERT statement with len=0,
+    # which of course violates the first non-null constraint.
+    try:
+      # Performance-sensitive parsing according to:
+      # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
+      engine.execute(kls.__table__.insert(), lst)
+      lst.clear()
+    except sqlalchemy.exc.IntegrityError as err:
+      # possibly violating some SQL constraint, print data here.
+      print(err)
+      print(lst)
+      raise
+
+def parse_file(filename: str, *args, **kwargs) -> int:
+  # use explicit encoding to avoid UnicodeDecodeError.
+  with open(filename, encoding="ISO-8859-1") as f:
+    return parse_file_buf(f, *args, **kwargs)
+
+def parse_file_buf(filebuf, session, engine, raw_ftrace_entry_filter, limit=None) -> int:
+  global _FLUSH_LIMIT
+  count = 0
+
+  pending_entries = []
+  pending_sched_switch = []
+  pending_sched_blocked_reasons = []
+  pending_mm_filemap_add_to_pagecaches = []
+
+  def insert_all_pending_entries():
+    insert_pending_entries(engine, RawFtraceEntry, pending_entries)
+    insert_pending_entries(engine, SchedSwitch, pending_sched_switch)
+    insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons)
+    insert_pending_entries(engine, MmFilemapAddToPageCache, pending_mm_filemap_add_to_pagecaches)
+
+  # for trace.html files produced by systrace,
+  # the actual ftrace is in the 'second' trace-data script class.
+  parsing_trace_data = 0
+  parsing_systrace_file = False
+
+  f = filebuf
+  for l in f:
+    if parsing_trace_data == 0 and l == "<!DOCTYPE html>\n":
+      parsing_systrace_file = True
+      continue
+    if parsing_trace_data != 2 and parsing_systrace_file:
+      if l == '  <script class="trace-data" type="application/text">\n':
+        parsing_trace_data = parsing_trace_data + 1
+      continue
+
+    if parsing_systrace_file and parsing_trace_data != 2:
+      continue
+    elif parsing_systrace_file and parsing_trace_data == 2 and l == "  </script>\n":
+      # the rest of this file is just random html
+      break
+
+    # now parsing the ftrace data.
+    if len(l) > 1 and l[0] == '#':
+      continue
+
+    count = count + 1
+
+    if limit and count >= limit:
+      break
+
+    raw_ftrace_entry = RawFtraceEntry.parse_dict(l)
+    if not raw_ftrace_entry:
+      print("WARNING: Failed to parse raw ftrace entry: " + l)
+      continue
+
+    if not raw_ftrace_entry_filter(raw_ftrace_entry):
+      # Skip processing raw ftrace entries that don't match a filter.
+      # This is an optimization for when Trace2Db is used programatically
+      # to avoid having an overly large database.
+      continue
+
+    pending_entries.append(raw_ftrace_entry)
+
+    if raw_ftrace_entry['function'] == 'sched_switch':
+      sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], count)
+
+      if not sched_switch:
+        print("WARNING: Failed to parse sched_switch: " + l)
+      else:
+        pending_sched_switch.append(sched_switch)
+
+    elif raw_ftrace_entry['function'] == 'sched_blocked_reason':
+      sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], count)
+
+      if not sbr:
+        print("WARNING: Failed to parse sched_blocked_reason: " + l)
+      else:
+        pending_sched_blocked_reasons.append(sbr)
+
+    elif raw_ftrace_entry['function'] == 'mm_filemap_add_to_page_cache':
+      d = MmFilemapAddToPageCache.parse_dict(raw_ftrace_entry['function_args'], count)
+      if not d:
+        print("WARNING: Failed to parse mm_filemap_add_to_page_cache: " + l)
+      else:
+        pending_mm_filemap_add_to_pagecaches.append(d)
+
+    # Objects are cached in python memory, not yet sent to SQL database.
+
+    # Send INSERT/UPDATE/etc statements to the underlying SQL database.
+    if count % _FLUSH_LIMIT == 0:
+      insert_all_pending_entries()
+
+  insert_all_pending_entries()
+
+  # Ensure underlying database commits changes from memory to disk.
+  session.commit()
+
+  return count
diff --git a/startop/scripts/trace_analyzer/lib/trace2db_test.py b/startop/scripts/trace_analyzer/lib/trace2db_test.py
new file mode 100755
index 000000000000..b67cffa51cde
--- /dev/null
+++ b/startop/scripts/trace_analyzer/lib/trace2db_test.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+#
+# Copyright 2019, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for inode2filename module.
+
+Install:
+  $> sudo apt-get install python3-pytest   ##  OR
+  $> pip install -U pytest
+See also https://docs.pytest.org/en/latest/getting-started.html
+
+Usage:
+  $> ./inode2filename_test.py
+  $> pytest inode2filename_test.py
+  $> python -m pytest inode2filename_test.py
+
+See also https://docs.pytest.org/en/latest/usage.html
+"""
+
+# global imports
+from contextlib import contextmanager
+import io
+import shlex
+import sys
+import typing
+
+from copy import deepcopy
+
+# pip imports
+import pytest
+
+# local imports
+from trace2db import *
+
+# This pretty-prints the raw dictionary of the sqlalchemy object if it fails.
+class EqualsSqlAlchemyObject:
+  # For convenience to write shorter tests, we also add 'ignore_fields' which allow us to specify
+  # which fields to ignore when doing the comparison.
+  def __init__(self_, self, ignore_fields=[]):
+    self_.self = self
+    self_.ignore_fields = ignore_fields
+
+  # Do field-by-field comparison.
+  # It seems that SQLAlchemy does not implement __eq__ itself so we have to do it ourselves.
+  def __eq__(self_, other):
+    if isinstance(other, EqualsSqlAlchemyObject):
+      other = other.self
+
+    self = self_.self
+
+    classes_match = isinstance(other, self.__class__)
+    a, b = deepcopy(self.__dict__), deepcopy(other.__dict__)
+
+    #compare based on equality our attributes, ignoring SQLAlchemy internal stuff
+
+    a.pop('_sa_instance_state', None)
+    b.pop('_sa_instance_state', None)
+
+    for f in self_.ignore_fields:
+      a.pop(f, None)
+      b.pop(f, None)
+
+    attrs_match = (a == b)
+    return classes_match and attrs_match
+
+  def __repr__(self):
+    return repr(self.self.__dict__)
+
+
+def assert_eq_ignore_id(left, right):
+  # This pretty-prints the raw dictionary of the sqlalchemy object if it fails.
+  # It does field-by-field comparison, but ignores the 'id' field.
+  assert EqualsSqlAlchemyObject(left, ignore_fields=['id']) == EqualsSqlAlchemyObject(right)
+
+def parse_trace_file_to_db(*contents):
+  """
+  Make temporary in-memory sqlite3 database by parsing the string contents as a trace.
+
+  :return: Trace2Db instance
+  """
+  buf = io.StringIO()
+
+  for c in contents:
+    buf.write(c)
+    buf.write("\n")
+
+  buf.seek(0)
+
+  t2d = Trace2Db(":memory:")
+  t2d.parse_file_buf_into_db(buf)
+
+  buf.close()
+
+  return t2d
+
+def test_ftrace_mm_filemap_add_to_pagecache():
+  test_contents = """
+MediaStoreImpor-27212 (27176) [000] .... 16136.595194: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=0000000060e990c7 pfn=677646 ofs=159744
+MediaStoreImpor-27212 (27176) [000] .... 16136.595920: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=0000000048e2e156 pfn=677645 ofs=126976
+MediaStoreImpor-27212 (27176) [000] .... 16136.597793: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=0000000051eabfb2 pfn=677644 ofs=122880
+MediaStoreImpor-27212 (27176) [000] .... 16136.597815: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=00000000ce7cd606 pfn=677643 ofs=131072
+MediaStoreImpor-27212 (27176) [000] .... 16136.603732: mm_filemap_add_to_page_cache: dev 253:6 ino 1 page=000000008ffd3030 pfn=730119 ofs=186482688
+MediaStoreImpor-27212 (27176) [000] .... 16136.604126: mm_filemap_add_to_page_cache: dev 253:6 ino b1d8 page=0000000098d4d2e2 pfn=829676 ofs=0
+          <...>-27197 (-----) [002] .... 16136.613471: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=00000000aca88a97 pfn=743346 ofs=241664
+          <...>-27197 (-----) [002] .... 16136.615979: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=00000000351f2bc1 pfn=777799 ofs=106496
+          <...>-27224 (-----) [006] .... 16137.400090: mm_filemap_add_to_page_cache: dev 253:6 ino 712d page=000000006ff7ffdb pfn=754861 ofs=0
+          <...>-1396  (-----) [000] .... 16137.451660: mm_filemap_add_to_page_cache: dev 253:6 ino 1 page=00000000ba0cbb34 pfn=769173 ofs=187191296
+          <...>-1396  (-----) [000] .... 16137.453020: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=00000000f6ef038e pfn=820291 ofs=0
+          <...>-1396  (-----) [000] .... 16137.453067: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=0000000083ebc446 pfn=956463 ofs=4096
+          <...>-1396  (-----) [000] .... 16137.453101: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=000000009dc2cd25 pfn=822813 ofs=8192
+          <...>-1396  (-----) [000] .... 16137.453113: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=00000000a11167fb pfn=928650 ofs=12288
+          <...>-1396  (-----) [000] .... 16137.453126: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=00000000c1c3311b pfn=621110 ofs=16384
+          <...>-1396  (-----) [000] .... 16137.453139: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=000000009aa78342 pfn=689370 ofs=20480
+          <...>-1396  (-----) [000] .... 16137.453151: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=0000000082cddcd6 pfn=755584 ofs=24576
+          <...>-1396  (-----) [000] .... 16137.453162: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=00000000b0249bc7 pfn=691431 ofs=28672
+          <...>-1396  (-----) [000] .... 16137.453183: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=000000006a776ff0 pfn=795084 ofs=32768
+          <...>-1396  (-----) [000] .... 16137.453203: mm_filemap_add_to_page_cache: dev 253:6 ino b285 page=000000001a4918a7 pfn=806998 ofs=36864
+          <...>-2578  (-----) [002] .... 16137.561871: mm_filemap_add_to_page_cache: dev 253:6 ino 1 page=00000000d65af9d2 pfn=719246 ofs=187015168
+          <...>-2578  (-----) [002] .... 16137.562846: mm_filemap_add_to_page_cache: dev 253:6 ino b25a page=000000002f6ba74f pfn=864982 ofs=0
+          <...>-2578  (-----) [000] .... 16138.104500: mm_filemap_add_to_page_cache: dev 253:6 ino 1 page=00000000f888d0f6 pfn=805812 ofs=192794624
+          <...>-2578  (-----) [000] .... 16138.105836: mm_filemap_add_to_page_cache: dev 253:6 ino b7dd page=000000003749523b pfn=977196 ofs=0
+          <...>-27215 (-----) [001] .... 16138.256881: mm_filemap_add_to_page_cache: dev 253:6 ino 758f page=000000001b375de1 pfn=755928 ofs=0
+          <...>-27215 (-----) [001] .... 16138.257526: mm_filemap_add_to_page_cache: dev 253:6 ino 7591 page=000000004e039481 pfn=841534 ofs=0
+ NonUserFacing6-5246  ( 1322) [005] .... 16138.356491: mm_filemap_add_to_page_cache: dev 253:6 ino 1 page=00000000d65af9d2 pfn=719246 ofs=161890304
+ NonUserFacing6-5246  ( 1322) [005] .... 16138.357538: mm_filemap_add_to_page_cache: dev 253:6 ino 9a64 page=000000002f6ba74f pfn=864982 ofs=0
+ NonUserFacing6-5246  ( 1322) [005] .... 16138.357581: mm_filemap_add_to_page_cache: dev 253:6 ino 9a64 page=000000006e0f8322 pfn=797894 ofs=4096
+          <...>-27197 (-----) [005] .... 16140.143224: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=00000000a42527c6 pfn=1076669 ofs=32768
+  """
+
+  t2d = parse_trace_file_to_db(test_contents)
+  session = t2d.session
+
+  first_row = session.query(MmFilemapAddToPageCache).order_by(MmFilemapAddToPageCache.id).first()
+
+  #dev 253:6 ino 7580 page=0000000060e990c7 pfn=677646 ofs=159744
+  assert_eq_ignore_id(MmFilemapAddToPageCache(dev=64774, dev_major=253, dev_minor=6,
+      ino=0x7580, page=0x0000000060e990c7, pfn=677646, ofs=159744), first_row)
+
+  second_to_last_row = session.query(MmFilemapAddToPageCache).filter(MmFilemapAddToPageCache.page.in_([0x000000006e0f8322])).first()
+
+  # dev 253:6 ino 9a64 page=000000006e0f8322 pfn=797894 ofs=4096
+  assert_eq_ignore_id(MmFilemapAddToPageCache(dev=64774, dev_major=253, dev_minor=6,
+      ino=0x9a64, page=0x000000006e0f8322, pfn=797894, ofs=4096), second_to_last_row)
+
+def test_systrace_mm_filemap_add_to_pagecache():
+  test_contents = """
+<!DOCTYPE html>
+<html>
+<head i18n-values="dir:textdirection;">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<meta charset="utf-8"/>
+<title>Android System Trace</title>
+  <script class="trace-data" type="application/text">
+PROCESS DUMP
+USER           PID  PPID     VSZ    RSS WCHAN  PC S NAME                        COMM
+root             1     0   62148   5976 0       0 S init                        [init]
+root             2     0       0      0 0       0 S [kthreadd]                  [kthreadd]
+  </script>
+
+  <script class="trace-data" type="application/text">
+MediaStoreImpor-27212 (27176) [000] .... 16136.595194: mm_filemap_add_to_page_cache: dev 253:6 ino 7580 page=0000000060e990c7 pfn=677646 ofs=159744
+NonUserFacing6-5246  ( 1322) [005] .... 16138.357581: mm_filemap_add_to_page_cache: dev 253:6 ino 9a64 page=000000006e0f8322 pfn=797894 ofs=4096
+  </script>
+
+  <script class="trace-data" type="application/text">
+{"traceEvents": [{"category": "process_argv", "name": "process_argv", "args": {"argv": ["/mnt/ssd3/workspace/master/external/chromium-trace/systrace.py", "-t", "5", "pagecache"]}, "pid": 160383, "ts": 1037300940509.7991, "tid": 139628672526080, "ph": "M"}, {"category": "python", "name": "clock_sync", "args": {"issue_ts": 1037307346185.212, "sync_id": "9a7e4fe3-89ad-441f-8226-8fe533fe973e"}, "pid": 160383, "ts": 1037307351643.906, "tid": 139628726089536, "ph": "c"}], "metadata": {"clock-domain": "SYSTRACE"}}
+  </script>
+<!-- END TRACE -->
+  """
+
+  t2d = parse_trace_file_to_db(test_contents)
+  session = t2d.session
+
+  first_row = session.query(MmFilemapAddToPageCache).order_by(MmFilemapAddToPageCache.id).first()
+
+  #dev 253:6 ino 7580 page=0000000060e990c7 pfn=677646 ofs=159744
+  assert_eq_ignore_id(MmFilemapAddToPageCache(dev=64774, dev_major=253, dev_minor=6,
+      ino=0x7580, page=0x0000000060e990c7, pfn=677646, ofs=159744), first_row)
+
+  second_to_last_row = session.query(MmFilemapAddToPageCache).filter(MmFilemapAddToPageCache.page.in_([0x000000006e0f8322])).first()
+
+  # dev 253:6 ino 9a64 page=000000006e0f8322 pfn=797894 ofs=4096
+  assert_eq_ignore_id(MmFilemapAddToPageCache(dev=64774, dev_major=253, dev_minor=6,
+      ino=0x9a64, page=0x000000006e0f8322, pfn=797894, ofs=4096), second_to_last_row)
+
+
+if __name__ == '__main__':
+  pytest.main()
diff --git a/startop/scripts/trace_analyzer/trace_analyzer.py b/startop/scripts/trace_analyzer/trace_analyzer.py
index 45429339bea6..62ae018a9986 100755
--- a/startop/scripts/trace_analyzer/trace_analyzer.py
+++ b/startop/scripts/trace_analyzer/trace_analyzer.py
@@ -15,335 +15,36 @@
 
 import re
 import sys
+import argparse
 
-from sqlalchemy import create_engine
-from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey
-from sqlalchemy.ext.declarative import declarative_base
+from lib.trace2db import Trace2Db
 
-from sqlalchemy.orm import sessionmaker
-
-import sqlalchemy
-
-_DEBUG = False
-#_LIMIT = 100000
-_LIMIT = None
-_FLUSH_LIMIT = 10000
-
-Base = declarative_base()
-
-class RawFtraceEntry(Base):
-  __tablename__ = 'raw_ftrace_entries'
-
-  id = Column(Integer, primary_key=True)
-  task_name = Column(String, nullable=True) # <...> -> None.
-  task_pid = Column(String, nullable=False)
-  tgid = Column(Integer, nullable=True)     # ----- -> None.
-  cpu = Column(Integer, nullable=False)
-  timestamp = Column(Float, nullable=False)
-  function = Column(String, nullable=False)
-  function_args = Column(String, nullable=False)
-
-#  __mapper_args__ = {
-#      'polymorphic_identity':'raw_ftrace_entry',
-#      'polymorphic_on':function
-#  }
-
-  @staticmethod
-  def parse(line):
-    # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266'
-    m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # groups example:
-    # ('<...>',
-    #  '5521',
-    #  '-----',
-    #  '003',
-    #  '...1',
-    #  '17148.446877',
-    #  'tracing_mark_write',
-    #  'trace_event_clock_sync: parent_ts=17148.447266')
-    task_name = groups[0]
-    if task_name == '<...>':
-      task_name = None
-
-    task_pid = int(groups[1])
-    tgid = groups[2]
-    if tgid == '-----':
-      tgid = None
-
-    cpu = int(groups[3])
-    # irq_flags = groups[4]
-    timestamp = float(groups[5])
-    function = groups[6]
-    function_args = groups[7]
-
-    return RawFtraceEntry(task_name=task_name, task_pid=task_pid, tgid=tgid, cpu=cpu,
-                          timestamp=timestamp, function=function, function_args=function_args)
-
-  @staticmethod
-  def parse_dict(line):
-    # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266'
-    m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # groups example:
-    # ('<...>',
-    #  '5521',
-    #  '-----',
-    #  '003',
-    #  '...1',
-    #  '17148.446877',
-    #  'tracing_mark_write',
-    #  'trace_event_clock_sync: parent_ts=17148.447266')
-    task_name = groups[0]
-    if task_name == '<...>':
-      task_name = None
-
-    task_pid = int(groups[1])
-    tgid = groups[2]
-    if tgid == '-----':
-      tgid = None
-
-    cpu = int(groups[3])
-    # irq_flags = groups[4]
-    timestamp = float(groups[5])
-    function = groups[6]
-    function_args = groups[7]
-
-    return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args}
-
-#class TracingMarkWriteFtraceEntry(RawFtraceEntry):
-#  __tablename__ = 'tracing_mark_write_ftrace_entries'
+# This script requires 'sqlalchemy' to access the sqlite3 database.
 #
-#  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
-#  mark_type = Column(String(1), nullable=False)
-#  mark_id = Column(Integer, nullable=False)
-#  message = Column(String)
+# $> sudo apt-get install python3-pip
+# $> pip3 install --user sqlalchemy
 #
-##  __mapper_args__ = {
-##      'polymorphic_identity':'tracing_mark_write',
-##  }
-#
-#  @staticmethod
-#  def decode(raw_ftrace_entry):
-#    if raw_ftrace_entry.function != 'tracing_mark_write':
-#      raise ValueError("raw_ftrace_entry must be function 'tracing_mark_write':" + raw_ftrace_entry)
-#
-#    #"B|2446|(Paused)ClearCards|Foo"
-#    match = re.match("([^|]*)\|([^|]*)\|(.*)", raw_ftrace_entry.function_args)
-#
-#    if not match:
-#      return None
-#
-#    # ('B', '2446', '(Paused)ClearCards|Foo')
-#    groups = match.groups()
-#
-#    mark_type = groups[0]
-#    mark_id = int(groups[1])
-#    message = groups[2]
-#
-#    return TracingMarkWriteFtraceEntry(id = raw_ftrace_entry.id,
-#        mark_type = mark_type,
-#        mark_id = mark_id,
-#        message = message)
-
-class SchedSwitch(Base):
-  __tablename__ = 'sched_switches'
-
-  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
-
-  prev_comm = Column(String, nullable=False)
-  prev_pid = Column(Integer, nullable=False)
-  prev_prio = Column(Integer, nullable=False)
-  prev_state = Column(String, nullable=False)
-
-  next_comm = Column(String, nullable=False)
-  next_pid = Column(Integer, nullable=False)
-  next_prio = Column(Integer, nullable=False)
-
-#  __mapper_args__ = {
-#      'polymorphic_identity':'raw_ftrace_entry',
-#      'polymorphic_on':function
-#  }
-
-  @staticmethod
-  def parse_dict(function_args, id = None):
-    # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120'
-    m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120')
-    d = {}
-    if id is not None:
-      d['id'] = id
-    d['prev_comm'] = groups[0]
-    d['prev_pid'] = int(groups[1])
-    d['prev_prio'] = int(groups[2])
-    d['prev_state'] = groups[3]
-    d['next_comm'] = groups[4]
-    d['next_pid'] = int(groups[5])
-    d['next_prio'] = int(groups[6])
-
-    return d
-
-class SchedBlockedReason(Base):
-  __tablename__ = 'sched_blocked_reasons'
-
-  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
 
-  pid = Column(Integer, nullable=False)
-  iowait = Column(Integer, nullable=False)
-  caller = Column(String, nullable=False)
-
-  @staticmethod
-  def parse_dict(function_args, id = None):
-    # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f'
-    m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args)
-    if not m:
-      return None
-
-    groups = m.groups()
-    # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8')
-    d = {}
-    if id is not None:
-      d['id'] = id
-    d['pid'] = int(groups[0])
-    d['iowait'] = int(groups[1])
-    d['caller'] = groups[2]
-
-    return d
-
-def init_sqlalchemy(db_filename):
-  global _DEBUG
-  engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG)
-
-  # DROP TABLES
-#  Base.metadata.drop_all(engine)
-  # CREATE ... (tables)
-  Base.metadata.create_all(engine)
-
-  Session = sessionmaker(bind=engine)
-  session = Session()
-  return (session, engine)
-
-def insert_pending_entries(engine, kls, lst):
-  if len(lst) > 0:
-    # for some reason, it tries to generate an empty INSERT statement with len=0,
-    # which of course violates the first non-null constraint.
-    try:
-      # Performance-sensitive parsing according to:
-      # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
-      engine.execute(kls.__table__.insert(), lst)
-      lst.clear()
-    except sqlalchemy.exc.IntegrityError as err:
-      # possibly violating some SQL constraint, print data here.
-      print(err)
-      print(lst)
-      raise
-
-def parse_file(filename, session, engine):
-  global _LIMIT
-  global _FLUSH_LIMIT
-  count = 0
-
-  pending_entries = []
-  pending_sched_switch = []
-  pending_sched_blocked_reasons = []
-
-  def insert_all_pending_entries():
-    insert_pending_entries(engine, RawFtraceEntry, pending_entries)
-    insert_pending_entries(engine, SchedSwitch, pending_sched_switch)
-    insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons)
-
-  # use explicit encoding to avoid UnicodeDecodeError.
-  with open(filename, encoding="ISO-8859-1") as f:
-    for l in f:
-
-      if len(l) > 1 and l[0] == '#':
-        continue
-
-      count = count + 1
-
-      if _LIMIT and count >= _LIMIT:
-        break
-
-      raw_ftrace_entry = RawFtraceEntry.parse_dict(l)
-      if not raw_ftrace_entry:
-        print("WARNING: Failed to parse raw ftrace entry: " + l)
-        continue
-
-      pending_entries.append(raw_ftrace_entry)
-
-      if raw_ftrace_entry['function'] == 'sched_switch':
-        sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], count)
-
-        if not sched_switch:
-          print("WARNING: Failed to parse sched_switch: " + l)
-        else:
-          pending_sched_switch.append(sched_switch)
-
-      elif raw_ftrace_entry['function'] == 'sched_blocked_reason':
-        sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], count)
-
-        if not sbr:
-          print("WARNING: Failed to parse sched_blocked_reason: " + l)
-        else:
-          pending_sched_blocked_reasons.append(sbr)
-
-      # Objects are cached in python memory, not yet sent to SQL database.
-      #session.add(raw_ftrace_entry)
-
-      # Send INSERT/UPDATE/etc statements to the underlying SQL database.
-      if count % _FLUSH_LIMIT == 0:
-        # session.flush()
-        #session.bulk_save_objects(pending_entries)
-        #session.bulk_insert_mappings(RawFtraceEntry, pending_entries)
-        insert_all_pending_entries()
-
-  insert_all_pending_entries()
-
-  # Ensure underlying database commits changes from memory to disk.
-  session.commit()
-
-  return count
+def main(argv):
+  parser = argparse.ArgumentParser(description='Convert ftrace/systrace file into sqlite3 db.')
+  parser.add_argument('db_filename', metavar='sql_filename.db', type=str,
+                      help='path to sqlite3 db filename')
+  parser.add_argument('trace_filename', metavar='systrace.ftrace', type=str,
+                      help='path to ftrace/systrace filename')
+  parser.add_argument('--limit', type=int, help='limit the number of entries parsed [for debugging]')
 
-#def decode_raw_traces(session, engine):
-#  count = 0
-#  global _FLUSH_LIMIT
-#
-#  for tmw in session.query(RawFtraceEntry).filter_by(function = 'tracing_mark_write'):
-#    print(tmw)
-#    decoded = TracingMarkWriteFtraceEntry.decode(tmw)
-#    session.add(decoded)
-#
-#    if count % _FLUSH_LIMIT == 0:
-#      session.flush()
-#
-#  session.commit()
-#
-#  return count
+  args = parser.parse_args()
 
-def main(argv):
-  db_filename = sys.argv[1]
-  trace_filename = sys.argv[2]
+  db_filename = args.db_filename
+  trace_filename = args.trace_filename
 
-  session, engine = init_sqlalchemy(db_filename)
+  trace2db = Trace2Db(db_filename)
   print("SQL Alchemy db initialized")
 
   # parse 'raw_ftrace_entries' table
-  count = parse_file(trace_filename, session, engine)
+  count = trace2db.parse_file_into_db(trace_filename, limit=args.limit)
   print("Count was ", count)
 
-  # create other tables
-#  count = decode_raw_traces(session, engine)
-
   return 0
 
 if __name__ == '__main__':
author	Igor Murashkin <iam@google.com>	2019-06-18 16:03:20 -0700
committer	Igor Murashkin <iam@google.com>	2019-06-26 21:19:27 +0000
commit	06d017ee1913b20d5ebbb9c31e4c6061d993067e (patch)
tree	86bcd6facf165e7b70e5900e05f41c70a0dd2df1 /startop/scripts
parent	42439214ebdf1b6de12683790ea325164fda37fc (diff)