diff options
Diffstat (limited to 'cmds/incidentd/src/Section.cpp')
-rw-r--r-- | cmds/incidentd/src/Section.cpp | 336 |
1 files changed, 229 insertions, 107 deletions
diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp index 6cbfd47bb660..e56ed39ea32e 100644 --- a/cmds/incidentd/src/Section.cpp +++ b/cmds/incidentd/src/Section.cpp @@ -20,9 +20,9 @@ #include <dirent.h> #include <errno.h> - #include <mutex> #include <set> +#include <thread> #include <android-base/file.h> #include <android-base/properties.h> @@ -35,12 +35,14 @@ #include <log/log_event_list.h> #include <log/logprint.h> #include <private/android_logger.h> +#include <sys/mman.h> #include "FdBuffer.h" #include "Privacy.h" #include "frameworks/base/core/proto/android/os/backtrace.proto.h" #include "frameworks/base/core/proto/android/os/data.proto.h" #include "frameworks/base/core/proto/android/util/log.proto.h" +#include "frameworks/base/core/proto/android/util/textdump.proto.h" #include "incidentd_util.h" namespace android { @@ -104,7 +106,6 @@ status_t FileSection::Execute(ReportWriter* writer) const { return NO_ERROR; } - FdBuffer buffer; Fpipe p2cPipe; Fpipe c2pPipe; // initiate pipes to pass data to/from incident_helper @@ -120,6 +121,7 @@ status_t FileSection::Execute(ReportWriter* writer) const { } // parent process + FdBuffer buffer; status_t readStatus = buffer.readProcessedDataInStream(fd.get(), std::move(p2cPipe.writeFd()), std::move(c2pPipe.readFd()), this->timeoutMs, mIsSysfs); @@ -134,7 +136,7 @@ status_t FileSection::Execute(ReportWriter* writer) const { status_t ihStatus = wait_child(pid); if (ihStatus != NO_ERROR) { ALOGW("[%s] abnormal child process: %s", this->name.string(), strerror(-ihStatus)); - return ihStatus; + return OK; // Not a fatal error. } return writer->writeSection(buffer); @@ -233,7 +235,7 @@ struct WorkerThreadData : public virtual RefBase { Fpipe pipe; // Lock protects these fields - mutex lock; + std::mutex lock; bool workerDone; status_t workerError; @@ -260,78 +262,42 @@ void sigpipe_handler(int signum) { } } -static void* worker_thread_func(void* cookie) { - // Don't crash the service if we write to a closed pipe (which can happen if - // dumping times out). - signal(SIGPIPE, sigpipe_handler); - - WorkerThreadData* data = (WorkerThreadData*)cookie; - status_t err = data->section->BlockingCall(data->pipe.writeFd().get()); - - { - unique_lock<mutex> lock(data->lock); - data->workerDone = true; - data->workerError = err; - } - - data->pipe.writeFd().reset(); - data->decStrong(data->section); - // data might be gone now. don't use it after this point in this thread. - return NULL; -} - status_t WorkerThreadSection::Execute(ReportWriter* writer) const { - status_t err = NO_ERROR; - pthread_t thread; - pthread_attr_t attr; - bool workerDone = false; - FdBuffer buffer; - - // Data shared between this thread and the worker thread. + // Create shared data and pipe. Don't put data on the stack since this thread may exit early. sp<WorkerThreadData> data = new WorkerThreadData(this); - - // Create the pipe if (!data->pipe.init()) { return -errno; } - - // Create the thread - err = pthread_attr_init(&attr); - if (err != 0) { - return -err; - } - // TODO: Do we need to tweak thread priority? - err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - if (err != 0) { - pthread_attr_destroy(&attr); - return -err; - } - - // The worker thread needs a reference and we can't let the count go to zero - // if that thread is slow to start. data->incStrong(this); - - err = pthread_create(&thread, &attr, worker_thread_func, (void*)data.get()); - pthread_attr_destroy(&attr); - if (err != 0) { + std::thread([data, this]() { + // Don't crash the service if writing to a closed pipe (may happen if dumping times out) + signal(SIGPIPE, sigpipe_handler); + status_t err = data->section->BlockingCall(data->pipe.writeFd()); + { + std::scoped_lock<std::mutex> lock(data->lock); + data->workerDone = true; + data->workerError = err; + // unique_fd is not thread safe. If we don't lock it, reset() may pause half way while + // the other thread executes to the end, calling ~Fpipe, which is a race condition. + data->pipe.writeFd().reset(); + } data->decStrong(this); - return -err; - } + }).detach(); // Loop reading until either the timeout or the worker side is done (i.e. eof). + status_t err = NO_ERROR; + bool workerDone = false; + FdBuffer buffer; err = buffer.read(data->pipe.readFd().get(), this->timeoutMs); if (err != NO_ERROR) { ALOGE("[%s] reader failed with error '%s'", this->name.string(), strerror(-err)); } - // Done with the read fd. The worker thread closes the write one so - // we never race and get here first. - data->pipe.readFd().reset(); - // If the worker side is finished, then return its error (which may overwrite // our possible error -- but it's more interesting anyway). If not, then we timed out. { - unique_lock<mutex> lock(data->lock); + std::scoped_lock<std::mutex> lock(data->lock); + data->pipe.close(); if (data->workerError != NO_ERROR) { err = data->workerError; ALOGE("[%s] worker failed with error '%s'", this->name.string(), strerror(-err)); @@ -390,7 +356,6 @@ CommandSection::CommandSection(int id, const char* command, ...) : Section(id) { CommandSection::~CommandSection() { free(mCommand); } status_t CommandSection::Execute(ReportWriter* writer) const { - FdBuffer buffer; Fpipe cmdPipe; Fpipe ihPipe; @@ -411,6 +376,7 @@ status_t CommandSection::Execute(ReportWriter* writer) const { } cmdPipe.writeFd().reset(); + FdBuffer buffer; status_t readStatus = buffer.read(ihPipe.readFd().get(), this->timeoutMs); writer->setSectionStats(buffer); if (readStatus != NO_ERROR || buffer.timedOut()) { @@ -457,7 +423,7 @@ DumpsysSection::DumpsysSection(int id, const char* service, ...) DumpsysSection::~DumpsysSection() {} -status_t DumpsysSection::BlockingCall(int pipeWriteFd) const { +status_t DumpsysSection::BlockingCall(unique_fd& pipeWriteFd) const { // checkService won't wait for the service to show up like getService will. sp<IBinder> service = defaultServiceManager()->checkService(mService); @@ -466,19 +432,120 @@ status_t DumpsysSection::BlockingCall(int pipeWriteFd) const { return NAME_NOT_FOUND; } - service->dump(pipeWriteFd, mArgs); + service->dump(pipeWriteFd.get(), mArgs); return NO_ERROR; } // ================================================================================ +TextDumpsysSection::TextDumpsysSection(int id, const char* service, ...) + :Section(id), mService(service) { + name = "dumpsys "; + name += service; + + va_list args; + va_start(args, service); + while (true) { + const char* arg = va_arg(args, const char*); + if (arg == NULL) { + break; + } + mArgs.add(String16(arg)); + name += " "; + name += arg; + } + va_end(args); +} + +TextDumpsysSection::~TextDumpsysSection() {} + +status_t TextDumpsysSection::Execute(ReportWriter* writer) const { + // checkService won't wait for the service to show up like getService will. + sp<IBinder> service = defaultServiceManager()->checkService(mService); + if (service == NULL) { + ALOGW("TextDumpsysSection: Can't lookup service: %s", String8(mService).string()); + return NAME_NOT_FOUND; + } + + // Create pipe + Fpipe dumpPipe; + if (!dumpPipe.init()) { + ALOGW("[%s] failed to setup pipe", this->name.string()); + return -errno; + } + + // Run dumping thread + const uint64_t start = Nanotime(); + std::thread worker([write_fd = std::move(dumpPipe.writeFd()), service = std::move(service), + this]() mutable { + // Don't crash the service if writing to a closed pipe (may happen if dumping times out) + signal(SIGPIPE, sigpipe_handler); + status_t err = service->dump(write_fd.get(), this->mArgs); + if (err != OK) { + ALOGW("[%s] dump thread failed. Error: %s", this->name.string(), strerror(-err)); + } + write_fd.reset(); + }); + + // Collect dump content + FdBuffer buffer; + ProtoOutputStream proto; + proto.write(TextDumpProto::COMMAND, std::string(name.string())); + proto.write(TextDumpProto::DUMP_DURATION_NS, int64_t(Nanotime() - start)); + buffer.write(proto.data()); + + sp<EncodedBuffer> internalBuffer = buffer.data(); + internalBuffer->writeHeader((uint32_t)TextDumpProto::CONTENT, WIRE_TYPE_LENGTH_DELIMITED); + size_t editPos = internalBuffer->wp()->pos(); + internalBuffer->wp()->move(8); // reserve 8 bytes for the varint of the data size + size_t dataBeginPos = internalBuffer->wp()->pos(); + + status_t readStatus = buffer.read(dumpPipe.readFd(), this->timeoutMs); + dumpPipe.readFd().reset(); + writer->setSectionStats(buffer); + if (readStatus != OK || buffer.timedOut()) { + ALOGW("[%s] failed to read from dumpsys: %s, timedout: %s", this->name.string(), + strerror(-readStatus), buffer.timedOut() ? "true" : "false"); + worker.detach(); + return readStatus; + } + worker.join(); // wait for worker to finish + + // Revisit the actual size from dumpsys and edit the internal buffer accordingly. + size_t dumpSize = buffer.size() - dataBeginPos; + internalBuffer->wp()->rewind()->move(editPos); + internalBuffer->writeRawVarint32(dumpSize); + internalBuffer->copy(dataBeginPos, dumpSize); + + return writer->writeSection(buffer); +} + +// ================================================================================ // initialization only once in Section.cpp. map<log_id_t, log_time> LogSection::gLastLogsRetrieved; -LogSection::LogSection(int id, log_id_t logID) : WorkerThreadSection(id), mLogID(logID) { - name = "logcat "; - name += android_log_id_to_name(logID); - switch (logID) { +LogSection::LogSection(int id, const char* logID, ...) : WorkerThreadSection(id), mLogMode(logModeBase) { + name = "logcat -b "; + name += logID; + + va_list args; + va_start(args, logID); + mLogID = android_name_to_log_id(logID); + while(true) { + const char* arg = va_arg(args, const char*); + if (arg == NULL) { + break; + } + if (!strcmp(arg, "-L")) { + // Read from last logcat buffer + mLogMode = mLogMode | ANDROID_LOG_PSTORE; + } + name += " "; + name += arg; + } + va_end(args); + + switch (mLogID) { case LOG_ID_EVENTS: case LOG_ID_STATS: case LOG_ID_SECURITY: @@ -507,42 +574,51 @@ static inline int32_t get4LE(uint8_t const* src) { return src[0] | (src[1] << 8) | (src[2] << 16) | (src[3] << 24); } -status_t LogSection::BlockingCall(int pipeWriteFd) const { +status_t LogSection::BlockingCall(unique_fd& pipeWriteFd) const { + // heap profile shows that liblog malloc & free significant amount of memory in this process. + // Hence forking a new process to prevent memory fragmentation. + pid_t pid = fork(); + if (pid < 0) { + ALOGW("[%s] failed to fork", this->name.string()); + return errno; + } + if (pid > 0) { + return wait_child(pid, this->timeoutMs); + } // Open log buffer and getting logs since last retrieved time if any. unique_ptr<logger_list, void (*)(logger_list*)> loggers( gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end() - ? android_logger_list_alloc(ANDROID_LOG_NONBLOCK, 0, 0) - : android_logger_list_alloc_time(ANDROID_LOG_NONBLOCK, - gLastLogsRetrieved[mLogID], 0), + ? android_logger_list_alloc(mLogMode, 0, 0) + : android_logger_list_alloc_time(mLogMode, gLastLogsRetrieved[mLogID], 0), android_logger_list_free); if (android_logger_open(loggers.get(), mLogID) == NULL) { ALOGE("[%s] Can't get logger.", this->name.string()); - return -1; + _exit(EXIT_FAILURE); } log_msg msg; log_time lastTimestamp(0); ProtoOutputStream proto; + status_t err = OK; while (true) { // keeps reading until logd buffer is fully read. - status_t err = android_logger_list_read(loggers.get(), &msg); - // err = 0 - no content, unexpected connection drop or EOF. - // err = +ive number - size of retrieved data from logger - // err = -ive number, OS supplied error _except_ for -EAGAIN - // err = -EAGAIN, graceful indication for ANDRODI_LOG_NONBLOCK that this is the end of data. - if (err <= 0) { - if (err != -EAGAIN) { + status_t status = android_logger_list_read(loggers.get(), &msg); + // status = 0 - no content, unexpected connection drop or EOF. + // status = +ive number - size of retrieved data from logger + // status = -ive number, OS supplied error _except_ for -EAGAIN + // status = -EAGAIN, graceful indication for ANDRODI_LOG_NONBLOCK that this is the end. + if (status <= 0) { + if (status != -EAGAIN) { ALOGW("[%s] fails to read a log_msg.\n", this->name.string()); + err = -status; } - // dump previous logs and don't consider this error a failure. break; } if (mBinary) { // remove the first uint32 which is tag's index in event log tags android_log_context context = create_android_log_parser(msg.msg() + sizeof(uint32_t), msg.len() - sizeof(uint32_t)); - ; android_log_list_element elem; lastTimestamp.tv_sec = msg.entry.sec; @@ -602,9 +678,10 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const { } } else { AndroidLogEntry entry; - err = android_log_processLogBuffer(&msg.entry, &entry); - if (err != NO_ERROR) { + status = android_log_processLogBuffer(&msg.entry, &entry); + if (status != OK) { ALOGW("[%s] fails to process to an entry.\n", this->name.string()); + err = status; break; } lastTimestamp.tv_sec = entry.tv_sec; @@ -623,17 +700,24 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const { trimTail(entry.message, entry.messageLen)); proto.end(token); } + if (!proto.flush(pipeWriteFd.get())) { + if (errno == EPIPE) { + ALOGW("[%s] wrote to a broken pipe\n", this->name.string()); + } + err = errno; + break; + } + proto.clear(); } gLastLogsRetrieved[mLogID] = lastTimestamp; - if (!proto.flush(pipeWriteFd) && errno == EPIPE) { - ALOGE("[%s] wrote to a broken pipe\n", this->name.string()); - return EPIPE; - } - return NO_ERROR; + _exit(err); } // ================================================================================ +const int LINK_NAME_LEN = 64; +const int EXE_NAME_LEN = 1024; + TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs) : WorkerThreadSection(id, timeoutMs), mType(type) { name = "tombstone "; @@ -642,7 +726,7 @@ TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeo TombstoneSection::~TombstoneSection() {} -status_t TombstoneSection::BlockingCall(int pipeWriteFd) const { +status_t TombstoneSection::BlockingCall(unique_fd& pipeWriteFd) const { std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir); if (proc.get() == nullptr) { ALOGE("opendir /proc failed: %s\n", strerror(errno)); @@ -651,25 +735,37 @@ status_t TombstoneSection::BlockingCall(int pipeWriteFd) const { const std::set<int> hal_pids = get_interesting_hal_pids(); - ProtoOutputStream proto; + auto pooledBuffer = get_buffer_from_pool(); + ProtoOutputStream proto(pooledBuffer); + // dumpBufferSize should be a multiple of page size (4 KB) to reduce memory fragmentation + size_t dumpBufferSize = 64 * 1024; // 64 KB is enough for most tombstone dump + char* dumpBuffer = (char*)mmap(NULL, dumpBufferSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); struct dirent* d; + char link_name[LINK_NAME_LEN]; + char exe_name[EXE_NAME_LEN]; status_t err = NO_ERROR; while ((d = readdir(proc.get()))) { int pid = atoi(d->d_name); if (pid <= 0) { continue; } - - const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid); - std::string exe; - if (!android::base::Readlink(link_name, &exe)) { - ALOGE("Section %s: Can't read '%s': %s\n", name.string(), - link_name.c_str(), strerror(errno)); + snprintf(link_name, LINK_NAME_LEN, "/proc/%d/exe", pid); + struct stat fileStat; + if (stat(link_name, &fileStat) != OK) { continue; } + ssize_t exe_name_len = readlink(link_name, exe_name, EXE_NAME_LEN); + if (exe_name_len < 0 || exe_name_len >= EXE_NAME_LEN) { + ALOGE("[%s] Can't read '%s': %s", name.string(), link_name, strerror(errno)); + continue; + } + // readlink(2) does not put a null terminator at the end + exe_name[exe_name_len] = '\0'; bool is_java_process; - if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") { + if (strncmp(exe_name, "/system/bin/app_process32", LINK_NAME_LEN) == 0 || + strncmp(exe_name, "/system/bin/app_process64", LINK_NAME_LEN) == 0) { if (mType != "java") continue; // Don't bother dumping backtraces for the zygote. if (IsZygote(pid)) { @@ -678,7 +774,7 @@ status_t TombstoneSection::BlockingCall(int pipeWriteFd) const { } is_java_process = true; - } else if (should_dump_native_traces(exe.c_str())) { + } else if (should_dump_native_traces(exe_name)) { if (mType != "native") continue; is_java_process = false; } else if (hal_pids.find(pid) != hal_pids.end()) { @@ -734,32 +830,58 @@ status_t TombstoneSection::BlockingCall(int pipeWriteFd) const { ALOGE("[%s] child had an issue: %s\n", this->name.string(), strerror(-cStatus)); } - auto dump = std::make_unique<char[]>(buffer.size()); + // Resize dump buffer + if (dumpBufferSize < buffer.size()) { + munmap(dumpBuffer, dumpBufferSize); + while(dumpBufferSize < buffer.size()) dumpBufferSize = dumpBufferSize << 1; + dumpBuffer = (char*)mmap(NULL, dumpBufferSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + } sp<ProtoReader> reader = buffer.data()->read(); int i = 0; while (reader->hasNext()) { - dump[i] = reader->next(); + dumpBuffer[i] = reader->next(); i++; } uint64_t token = proto.start(android::os::BackTraceProto::TRACES); proto.write(android::os::BackTraceProto::Stack::PID, pid); - proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i); + proto.write(android::os::BackTraceProto::Stack::DUMP, dumpBuffer, i); proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS, static_cast<long long>(Nanotime() - start)); proto.end(token); dumpPipe.readFd().reset(); - } - - if (!proto.flush(pipeWriteFd) && errno == EPIPE) { - ALOGE("[%s] wrote to a broken pipe\n", this->name.string()); - if (err != NO_ERROR) { - return EPIPE; + if (!proto.flush(pipeWriteFd.get())) { + if (errno == EPIPE) { + ALOGE("[%s] wrote to a broken pipe\n", this->name.string()); + } + err = errno; + break; } + proto.clear(); } - + munmap(dumpBuffer, dumpBufferSize); + return_buffer_to_pool(pooledBuffer); return err; } +// ================================================================================ +BringYourOwnSection::BringYourOwnSection(int id, const char* customName, const uid_t callingUid, + const sp<IIncidentDumpCallback>& callback) + : WorkerThreadSection(id, REMOTE_CALL_TIMEOUT_MS), uid(callingUid), mCallback(callback) { + name = "registered "; + name += customName; +} + +BringYourOwnSection::~BringYourOwnSection() {} + +status_t BringYourOwnSection::BlockingCall(unique_fd& pipeWriteFd) const { + android::os::ParcelFileDescriptor pfd(std::move(pipeWriteFd)); + if(mCallback != nullptr) { + mCallback->onDumpSection(pfd); + } + return NO_ERROR; +} + } // namespace incidentd } // namespace os } // namespace android |