diff options
author | Kweku Adams <kwekua@google.com> | 2018-02-05 16:45:13 -0800 |
---|---|---|
committer | Kweku Adams <kwekua@google.com> | 2018-03-07 21:50:12 +0000 |
commit | eadd123d68850cb27aa6d030ade6190e30991b19 (patch) | |
tree | 28ee59bef758929559dde7b34a9dbe962f1196c9 | |
parent | 0efaf2088d8e3ba8ec560aa5eb597ef456b5213e (diff) |
Dumping stack traces to proto.
Bug: 72177715
Test: flash device and check incident.proto output
Change-Id: Id2a15e0fc62b66efe875949af97f0eb651c7e322
(cherry picked from commit 5c804e2b9893c7892900148229cf19fa6268e7dc)
-rw-r--r-- | Android.bp | 1 | ||||
-rw-r--r-- | cmds/incidentd/Android.mk | 10 | ||||
-rw-r--r-- | cmds/incidentd/incidentd.rc | 1 | ||||
-rw-r--r-- | cmds/incidentd/src/FdBuffer.cpp | 29 | ||||
-rw-r--r-- | cmds/incidentd/src/FdBuffer.h | 6 | ||||
-rw-r--r-- | cmds/incidentd/src/Section.cpp | 150 | ||||
-rw-r--r-- | cmds/incidentd/src/Section.h | 16 | ||||
-rw-r--r-- | cmds/incidentd/src/incidentd_util.cpp | 9 | ||||
-rw-r--r-- | cmds/incidentd/src/incidentd_util.h | 7 | ||||
-rw-r--r-- | core/proto/android/os/backtrace.proto | 36 | ||||
-rw-r--r-- | core/proto/android/os/incident.proto | 17 | ||||
-rw-r--r-- | libs/incident/proto/android/section.proto | 3 | ||||
-rw-r--r-- | tools/incident_section_gen/main.cpp | 2 |
13 files changed, 278 insertions, 9 deletions
diff --git a/Android.bp b/Android.bp index 1b9210cc39f2..1caa4977a168 100644 --- a/Android.bp +++ b/Android.bp @@ -813,6 +813,7 @@ gensrcs { ], srcs: [ + "core/proto/android/os/backtrace.proto", "core/proto/android/os/batterytype.proto", "core/proto/android/os/cpufreq.proto", "core/proto/android/os/cpuinfo.proto", diff --git a/cmds/incidentd/Android.mk b/cmds/incidentd/Android.mk index 3a47fe1946c2..008a1bf8397e 100644 --- a/cmds/incidentd/Android.mk +++ b/cmds/incidentd/Android.mk @@ -15,8 +15,10 @@ LOCAL_PATH:= $(call my-dir) # proto files used in incidentd to generate cppstream proto headers. -PROTO_FILES:= frameworks/base/core/proto/android/util/log.proto \ - frameworks/base/core/proto/android/os/data.proto +PROTO_FILES:= \ + frameworks/base/core/proto/android/os/backtrace.proto \ + frameworks/base/core/proto/android/os/data.proto \ + frameworks/base/core/proto/android/util/log.proto # ========= # # incidentd # @@ -46,6 +48,8 @@ LOCAL_SHARED_LIBRARIES := \ libbase \ libbinder \ libcutils \ + libdebuggerd_client \ + libdumputils \ libincident \ liblog \ libprotobuf-cpp-lite \ @@ -119,6 +123,8 @@ LOCAL_SHARED_LIBRARIES := \ libbase \ libbinder \ libcutils \ + libdebuggerd_client \ + libdumputils \ libincident \ liblog \ libprotobuf-cpp-lite \ diff --git a/cmds/incidentd/incidentd.rc b/cmds/incidentd/incidentd.rc index 6dd811452e9e..9c16a1c52e89 100644 --- a/cmds/incidentd/incidentd.rc +++ b/cmds/incidentd/incidentd.rc @@ -16,6 +16,7 @@ service incidentd /system/bin/incidentd class main user incidentd group incidentd log readproc + capabilities KILL SYS_PTRACE on post-fs-data # Create directory for incidentd diff --git a/cmds/incidentd/src/FdBuffer.cpp b/cmds/incidentd/src/FdBuffer.cpp index 64da6773686a..35701446e9d9 100644 --- a/cmds/incidentd/src/FdBuffer.cpp +++ b/cmds/incidentd/src/FdBuffer.cpp @@ -87,6 +87,35 @@ status_t FdBuffer::read(int fd, int64_t timeout) { return NO_ERROR; } +status_t FdBuffer::readFully(int fd) { + mStartTime = uptimeMillis(); + + while (true) { + if (mBuffer.size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) { + // Don't let it get too big. + mTruncated = true; + VLOG("Truncating data"); + break; + } + if (mBuffer.writeBuffer() == NULL) return NO_MEMORY; + + ssize_t amt = + TEMP_FAILURE_RETRY(::read(fd, mBuffer.writeBuffer(), mBuffer.currentToWrite())); + if (amt < 0) { + VLOG("Fail to read %d: %s", fd, strerror(errno)); + return -errno; + } else if (amt == 0) { + VLOG("Done reading %zu bytes", mBuffer.size()); + // We're done. + break; + } + mBuffer.wp()->move(amt); + } + + mFinishTime = uptimeMillis(); + return NO_ERROR; +} + status_t FdBuffer::readProcessedDataInStream(int fd, int toFd, int fromFd, int64_t timeoutMs, const bool isSysfs) { struct pollfd pfds[] = { diff --git a/cmds/incidentd/src/FdBuffer.h b/cmds/incidentd/src/FdBuffer.h index 66a3de154c51..34ebcf50905d 100644 --- a/cmds/incidentd/src/FdBuffer.h +++ b/cmds/incidentd/src/FdBuffer.h @@ -41,6 +41,12 @@ public: status_t read(int fd, int64_t timeoutMs); /** + * Read the data until we hit eof. + * Returns NO_ERROR if there were no errors. + */ + status_t readFully(int fd); + + /** * Read processed results by streaming data to a parsing process, e.g. incident helper. * The parsing process provides IO fds which are 'toFd' and 'fromFd'. The function * reads original data in 'fd' and writes to parsing process through 'toFd', then it reads diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp index 46243c052ac1..6dd76a8d0421 100644 --- a/cmds/incidentd/src/Section.cpp +++ b/cmds/incidentd/src/Section.cpp @@ -18,13 +18,19 @@ #include "Section.h" +#include <dirent.h> +#include <errno.h> #include <wait.h> #include <mutex> +#include <set> #include <android-base/file.h> +#include <android-base/stringprintf.h> #include <android/util/protobuf.h> #include <binder/IServiceManager.h> +#include <debuggerd/client.h> +#include <dumputils/dump_utils.h> #include <log/log_event_list.h> #include <log/log_read.h> #include <log/logprint.h> @@ -33,6 +39,7 @@ #include "FdBuffer.h" #include "Privacy.h" #include "PrivacyBuffer.h" +#include "frameworks/base/core/proto/android/os/backtrace.proto.h" #include "frameworks/base/core/proto/android/os/data.proto.h" #include "frameworks/base/core/proto/android/util/log.proto.h" #include "incidentd_util.h" @@ -95,6 +102,7 @@ static status_t write_section_header(int fd, int sectionId, size_t size) { return WriteFully(fd, buf, p - buf) ? NO_ERROR : -errno; } +// Reads data from FdBuffer and writes it to the requests file descriptor. static status_t write_report_requests(const int id, const FdBuffer& buffer, ReportRequestSet* requests) { status_t err = -EBADF; @@ -387,6 +395,7 @@ status_t GZipSection::Execute(ReportRequestSet* requests) const { return NO_ERROR; } + // ================================================================================ struct WorkerThreadData : public virtual RefBase { const WorkerThreadSection* section; @@ -413,7 +422,8 @@ WorkerThreadData::WorkerThreadData(const WorkerThreadSection* sec) WorkerThreadData::~WorkerThreadData() {} // ================================================================================ -WorkerThreadSection::WorkerThreadSection(int id) : Section(id) {} +WorkerThreadSection::WorkerThreadSection(int id, const int64_t timeoutMs) + : Section(id, timeoutMs) {} WorkerThreadSection::~WorkerThreadSection() {} @@ -594,7 +604,7 @@ status_t CommandSection::Execute(ReportRequestSet* requests) const { return readStatus; } - // TODO: wait for command here has one trade-off: the failed status of command won't be detected + // Waiting for command here has one trade-off: the failed status of command won't be detected // until buffer timeout, but it has advatage on starting the data stream earlier. status_t cmdStatus = wait_child(cmdPid); status_t ihStatus = wait_child(ihPid); @@ -694,7 +704,6 @@ static inline int32_t get4LE(uint8_t const* src) { } status_t LogSection::BlockingCall(int pipeWriteFd) const { - status_t err = NO_ERROR; // Open log buffer and getting logs since last retrieved time if any. unique_ptr<logger_list, void (*)(logger_list*)> loggers( gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end() @@ -705,15 +714,16 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const { if (android_logger_open(loggers.get(), mLogID) == NULL) { ALOGW("LogSection %s: Can't get logger.", this->name.string()); - return err; + return NO_ERROR; } log_msg msg; log_time lastTimestamp(0); + status_t err = NO_ERROR; ProtoOutputStream proto; while (true) { // keeps reading until logd buffer is fully read. - status_t err = android_logger_list_read(loggers.get(), &msg); + err = android_logger_list_read(loggers.get(), &msg); // err = 0 - no content, unexpected connection drop or EOF. // err = +ive number - size of retrieved data from logger // err = -ive number, OS supplied error _except_ for -EAGAIN @@ -814,3 +824,133 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const { proto.flush(pipeWriteFd); return err; } + +// ================================================================================ + +TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs) + : WorkerThreadSection(id, timeoutMs), mType(type) { + name += "tombstone "; + name += type; +} + +TombstoneSection::~TombstoneSection() {} + +status_t TombstoneSection::BlockingCall(int pipeWriteFd) const { + std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir); + if (proc.get() == nullptr) { + ALOGE("opendir /proc failed: %s\n", strerror(errno)); + return -errno; + } + + const std::set<int> hal_pids = get_interesting_hal_pids(); + + ProtoOutputStream proto; + struct dirent* d; + status_t err = NO_ERROR; + while ((d = readdir(proc.get()))) { + int pid = atoi(d->d_name); + if (pid <= 0) { + continue; + } + + const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid); + std::string exe; + if (!android::base::Readlink(link_name, &exe)) { + ALOGE("Can't read '%s': %s\n", link_name.c_str(), strerror(errno)); + continue; + } + + bool is_java_process; + if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") { + if (mType != "java") continue; + // Don't bother dumping backtraces for the zygote. + if (IsZygote(pid)) { + VLOG("Skipping Zygote"); + continue; + } + + is_java_process = true; + } else if (should_dump_native_traces(exe.c_str())) { + if (mType != "native") continue; + is_java_process = false; + } else if (hal_pids.find(pid) != hal_pids.end()) { + if (mType != "hal") continue; + is_java_process = false; + } else { + // Probably a native process we don't care about, continue. + VLOG("Skipping %d", pid); + continue; + } + + Fpipe dumpPipe; + if (!dumpPipe.init()) { + ALOGW("TombstoneSection '%s' failed to setup dump pipe", this->name.string()); + err = -errno; + break; + } + + const uint64_t start = Nanotime(); + pid_t child = fork(); + if (child < 0) { + ALOGE("Failed to fork child process"); + break; + } else if (child == 0) { + // This is the child process. + close(dumpPipe.readFd()); + const int ret = dump_backtrace_to_file_timeout( + pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, + is_java_process ? 5 : 20, dumpPipe.writeFd()); + if (ret == -1) { + if (errno == 0) { + ALOGW("Dumping failed for pid '%d', likely due to a timeout\n", pid); + } else { + ALOGE("Dumping failed for pid '%d': %s\n", pid, strerror(errno)); + } + } + if (close(dumpPipe.writeFd()) != 0) { + ALOGW("TombstoneSection '%s' failed to close dump pipe writeFd: %d", + this->name.string(), errno); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + close(dumpPipe.writeFd()); + // Parent process. + // Read from the pipe concurrently to avoid blocking the child. + FdBuffer buffer; + err = buffer.readFully(dumpPipe.readFd()); + if (err != NO_ERROR) { + ALOGW("TombstoneSection '%s' failed to read stack dump: %d", this->name.string(), err); + if (close(dumpPipe.readFd()) != 0) { + ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %s", + this->name.string(), strerror(errno)); + } + break; + } + + auto dump = std::make_unique<char[]>(buffer.size()); + auto iterator = buffer.data(); + int i = 0; + while (iterator.hasNext()) { + dump[i] = iterator.next(); + i++; + } + long long token = proto.start(android::os::BackTraceProto::TRACES); + proto.write(android::os::BackTraceProto::Stack::PID, pid); + proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i); + proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS, + static_cast<long long>(Nanotime() - start)); + proto.end(token); + + if (close(dumpPipe.readFd()) != 0) { + ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %d", this->name.string(), + errno); + err = -errno; + break; + } + } + + proto.flush(pipeWriteFd); + return err; +} diff --git a/cmds/incidentd/src/Section.h b/cmds/incidentd/src/Section.h index 8294be133bcb..19ef7ee74475 100644 --- a/cmds/incidentd/src/Section.h +++ b/cmds/incidentd/src/Section.h @@ -103,7 +103,7 @@ private: */ class WorkerThreadSection : public Section { public: - WorkerThreadSection(int id); + WorkerThreadSection(int id, const int64_t timeoutMs = REMOTE_CALL_TIMEOUT_MS); virtual ~WorkerThreadSection(); virtual status_t Execute(ReportRequestSet* requests) const; @@ -161,4 +161,18 @@ private: bool mBinary; }; +/** + * Section that gets data from tombstoned. + */ +class TombstoneSection : public WorkerThreadSection { +public: + TombstoneSection(int id, const char* type, const int64_t timeoutMs = 30000 /* 30 seconds */); + virtual ~TombstoneSection(); + + virtual status_t BlockingCall(int pipeWriteFd) const; + +private: + std::string mType; +}; + #endif // SECTIONS_H diff --git a/cmds/incidentd/src/incidentd_util.cpp b/cmds/incidentd/src/incidentd_util.cpp index c095f2bcf144..c869c7a8d1d4 100644 --- a/cmds/incidentd/src/incidentd_util.cpp +++ b/cmds/incidentd/src/incidentd_util.cpp @@ -80,6 +80,7 @@ pid_t fork_execute_cmd(const char* cmd, char* const argv[], Fpipe* input, Fpipe* close(output->writeFd()); return pid; } + // ================================================================================ const char** varargs(const char* first, va_list rest) { va_list copied_rest; @@ -101,3 +102,11 @@ const char** varargs(const char* first, va_list rest) { ret[numOfArgs] = NULL; return ret; } + +// ================================================================================ +const uint64_t NANOS_PER_SEC = 1000000000; +uint64_t Nanotime() { + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return static_cast<uint64_t>(ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec); +} diff --git a/cmds/incidentd/src/incidentd_util.h b/cmds/incidentd/src/incidentd_util.h index db7ec82d83f4..3f7df91e7e50 100644 --- a/cmds/incidentd/src/incidentd_util.h +++ b/cmds/incidentd/src/incidentd_util.h @@ -60,4 +60,9 @@ pid_t fork_execute_cmd(const char* cmd, char* const argv[], Fpipe* input, Fpipe* */ const char** varargs(const char* first, va_list rest); -#endif // INCIDENTD_UTIL_H
\ No newline at end of file +/** + * Returns the current monotonic clock time in nanoseconds. + */ +uint64_t Nanotime(); + +#endif // INCIDENTD_UTIL_H diff --git a/core/proto/android/os/backtrace.proto b/core/proto/android/os/backtrace.proto new file mode 100644 index 000000000000..ba81386bb4aa --- /dev/null +++ b/core/proto/android/os/backtrace.proto @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto2"; +package android.os; + +option java_multiple_files = true; + +import "frameworks/base/libs/incident/proto/android/privacy.proto"; + +message BackTraceProto { + option (android.msg_privacy).dest = DEST_AUTOMATIC; + + message Stack { + option (android.msg_privacy).dest = DEST_AUTOMATIC; + + optional int32 pid = 1; + optional string dump = 2; + // Time it took to dump the stacktrace. + optional int64 dump_duration_ns = 3; + } + repeated Stack traces = 1; +} diff --git a/core/proto/android/os/incident.proto b/core/proto/android/os/incident.proto index 7326829b584b..bb9568bfcd8b 100644 --- a/core/proto/android/os/incident.proto +++ b/core/proto/android/os/incident.proto @@ -17,6 +17,7 @@ syntax = "proto2"; option java_multiple_files = true; +import "frameworks/base/core/proto/android/os/backtrace.proto"; import "frameworks/base/core/proto/android/os/batterytype.proto"; import "frameworks/base/core/proto/android/os/cpufreq.proto"; import "frameworks/base/core/proto/android/os/cpuinfo.proto"; @@ -115,6 +116,22 @@ message IncidentProto { (section).args = "LOG_ID_KERNEL" ]; + // Stack dumps + optional android.os.BackTraceProto native_traces = 1200 [ + (section).type = SECTION_TOMBSTONE, + (section).args = "native" + ]; + + optional android.os.BackTraceProto hal_traces = 1201 [ + (section).type = SECTION_TOMBSTONE, + (section).args = "hal" + ]; + + optional android.os.BackTraceProto java_traces = 1202 [ + (section).type = SECTION_TOMBSTONE, + (section).args = "java" + ]; + // Linux services optional ProcrankProto procrank = 2000 [ (section).type = SECTION_NONE, // disable procrank until figure out permission diff --git a/libs/incident/proto/android/section.proto b/libs/incident/proto/android/section.proto index ef6a8ff6bcea..b3ed393394d4 100644 --- a/libs/incident/proto/android/section.proto +++ b/libs/incident/proto/android/section.proto @@ -43,6 +43,9 @@ enum SectionType { // incidentd read file and gzip the data in bytes field SECTION_GZIP = 5; + + // incidentd calls tombstoned for annotated field + SECTION_TOMBSTONE = 6; } message SectionFlags { diff --git a/tools/incident_section_gen/main.cpp b/tools/incident_section_gen/main.cpp index e396a638927e..8219150d3029 100644 --- a/tools/incident_section_gen/main.cpp +++ b/tools/incident_section_gen/main.cpp @@ -427,6 +427,8 @@ static bool generateSectionListCpp(Descriptor const* descriptor) { printf(" new GZipSection(%d,", field->number()); splitAndPrint(s.args()); printf(" NULL),\n"); + case SECTION_TOMBSTONE: + printf(" new TombstoneSection(%d, \"%s\"),\n", field->number(), s.args().c_str()); break; } } |