diff options
-rw-r--r-- | pixelstats/MmMetricsReporter.cpp | 691 | ||||
-rw-r--r-- | pixelstats/SysfsCollector.cpp | 59 | ||||
-rw-r--r-- | pixelstats/include/pixelstats/MmMetricsReporter.h | 67 | ||||
-rw-r--r-- | pixelstats/include/pixelstats/SysfsCollector.h | 1 | ||||
-rw-r--r-- | pixelstats/pixelatoms.proto | 90 |
5 files changed, 874 insertions, 34 deletions
diff --git a/pixelstats/MmMetricsReporter.cpp b/pixelstats/MmMetricsReporter.cpp index 558fd2d..874cc5b 100644 --- a/pixelstats/MmMetricsReporter.cpp +++ b/pixelstats/MmMetricsReporter.cpp @@ -18,6 +18,7 @@ #include <aidl/android/frameworks/stats/IStats.h> #include <android-base/file.h> +#include <android-base/parsedouble.h> #include <android-base/parseint.h> #include <android-base/properties.h> #include <android-base/stringprintf.h> @@ -53,13 +54,13 @@ const std::vector<MmMetricsReporter::MmMetricsInfo> MmMetricsReporter::kMmMetric {"nr_anon_pages", PixelMmMetricsPerHour::kAnonPagesFieldNumber, false}, {"nr_file_pages", PixelMmMetricsPerHour::kFilePagesFieldNumber, false}, {"nr_slab_reclaimable", PixelMmMetricsPerHour::kSlabReclaimableFieldNumber, false}, + {"nr_slab_unreclaimable", PixelMmMetricsPerHour::kSlabUnreclaimableFieldNumber, false}, {"nr_zspages", PixelMmMetricsPerHour::kZspagesFieldNumber, false}, {"nr_unevictable", PixelMmMetricsPerHour::kUnevictableFieldNumber, false}, }; const std::vector<MmMetricsReporter::MmMetricsInfo> MmMetricsReporter::kMmMetricsPerDayInfo = { {"workingset_refault", PixelMmMetricsPerDay::kWorkingsetRefaultFieldNumber, true}, - {"workingset_refault_file", PixelMmMetricsPerDay::kWorkingsetRefaultFieldNumber, true}, {"pswpin", PixelMmMetricsPerDay::kPswpinFieldNumber, true}, {"pswpout", PixelMmMetricsPerDay::kPswpoutFieldNumber, true}, {"allocstall_dma", PixelMmMetricsPerDay::kAllocstallDmaFieldNumber, true}, @@ -78,6 +79,18 @@ const std::vector<MmMetricsReporter::MmMetricsInfo> MmMetricsReporter::kMmMetric {"pgalloc_costly_order", PixelMmMetricsPerDay::kPgallocHighFieldNumber, true}, {"pgcache_hit", PixelMmMetricsPerDay::kPgcacheHitFieldNumber, true}, {"pgcache_miss", PixelMmMetricsPerDay::kPgcacheMissFieldNumber, true}, + {"workingset_refault_file", PixelMmMetricsPerDay::kWorkingsetRefaultFileFieldNumber, true}, + {"workingset_refault_anon", PixelMmMetricsPerDay::kWorkingsetRefaultAnonFieldNumber, true}, + {"compact_success", PixelMmMetricsPerDay::kCompactSuccessFieldNumber, true}, + {"compact_fail", PixelMmMetricsPerDay::kCompactFailFieldNumber, true}, + {"kswapd_low_wmark_hit_quickly", PixelMmMetricsPerDay::kKswapdLowWmarkHqFieldNumber, true}, + {"kswapd_high_wmark_hit_quickly", PixelMmMetricsPerDay::kKswapdHighWmarkHqFieldNumber, + true}, + {"thp_file_alloc", PixelMmMetricsPerDay::kThpFileAllocFieldNumber, true}, + {"thp_zero_page_alloc", PixelMmMetricsPerDay::kThpZeroPageAllocFieldNumber, true}, + {"thp_split_page", PixelMmMetricsPerDay::kThpSplitPageFieldNumber, true}, + {"thp_migration_split", PixelMmMetricsPerDay::kThpMigrationSplitFieldNumber, true}, + {"thp_deferred_split_page", PixelMmMetricsPerDay::kThpDeferredSplitPageFieldNumber, true}, }; const std::vector<MmMetricsReporter::MmMetricsInfo> MmMetricsReporter::kCmaStatusInfo = { @@ -146,7 +159,11 @@ MmMetricsReporter::MmMetricsReporter() kIonTotalPoolsPath("/sys/kernel/dma_heap/total_pools_kb"), kIonTotalPoolsPathForLegacy("/sys/kernel/ion/total_pools_kb"), kGpuTotalPages("/sys/kernel/pixel_stat/gpu/mem/total_page_count"), - kPixelStatMm("/sys/kernel/pixel_stat/mm") { + kCompactDuration("/sys/kernel/pixel_stat/mm/compaction/mm_compaction_duration"), + kDirectReclaimBasePath("/sys/kernel/pixel_stat/mm/vmscan/direct_reclaim"), + kPixelStatMm("/sys/kernel/pixel_stat/mm"), + prev_compaction_duration_(kNumCompactionDurationPrevMetrics, 0), + prev_direct_reclaim_(kNumDirectReclaimPrevMetrics, 0) { is_user_build_ = checkUserBuild(); ker_mm_metrics_support_ = checkKernelMMMetricSupport(); } @@ -170,6 +187,90 @@ bool MmMetricsReporter::ReadFileToUint(const char *const path, uint64_t *val) { return true; } +/* + * This function reads whole file and parses tokens separated by <delim> into + * long integers. Useful for direct reclaim & compaction duration sysfs nodes. + * Data write is using all or none policy: It will not write partial data unless + * all data values are good. + * + * path: file to open/read + * data: where to store the results + * start_idx: index into data[] where to start saving the results + * delim: delimiters separating different longs + * skip: how many resulting longs to skip before saving + * nonnegtive: set to true to validate positive numbers + * + * Return value: number of longs actually stored on success. negative + * error codes on errors. + */ +static int ReadFileToLongs(const std::string &path, std::vector<long> *data, int start_idx, + const char *delim, int skip, bool nonnegative = false) { + std::vector<long> out; + enum { err_read_file = -1, err_parse = -2 }; + std::string file_contents; + + if (!ReadFileToString(path, &file_contents)) { + // Don't print this log if the file doesn't exist, since logs will be printed repeatedly. + if (errno != ENOENT) { + ALOGI("Unable to read %s - %s", path.c_str(), strerror(errno)); + } + return err_read_file; + } + + file_contents = android::base::Trim(file_contents); + std::vector<std::string> words = android::base::Tokenize(file_contents, delim); + if (words.size() == 0) + return 0; + + for (auto &w : words) { + if (skip) { + skip--; + continue; + } + long tmp; + if (!android::base::ParseInt(w, &tmp) || (nonnegative && tmp < 0)) + return err_parse; + out.push_back(tmp); + } + + int min_size = std::max(static_cast<int>(out.size()) + start_idx, 0); + if (min_size > data->size()) + data->resize(min_size); + std::copy(out.begin(), out.end(), data->begin() + start_idx); + + return out.size(); +} + +/* + * This function calls ReadFileToLongs, and checks the expected number + * of long integers read. Useful for direct reclaim & compaction duration + * sysfs nodes. + * + * path: file to open/read + * data: where to store the results + * start_idx: index into data[] where to start saving the results + * delim: delimiters separating different longs + * skip: how many resulting longs to skip before saving + * expected_num: number of expected longs to be read. + * nonnegtive: set to true to validate positive numbers + * + * Return value: true if successfully get expected number of long values. + * otherwise false. + */ +static inline bool ReadFileToLongsCheck(const std::string &path, std::vector<long> *store, + int start_idx, const char *delim, int skip, + int expected_num, bool nonnegative = false) { + int num = ReadFileToLongs(path, store, start_idx, delim, skip, nonnegative); + + if (num == expected_num) + return true; + + int last_idx = std::min(start_idx + expected_num, static_cast<int>(store->size())); + std::fill(store->begin() + start_idx, store->begin() + last_idx, -1); + + return false; +} + bool MmMetricsReporter::reportVendorAtom(const std::shared_ptr<IStats> &stats_client, int atom_id, const std::vector<VendorAtomValue> &values, const std::string &atom_name) { @@ -272,7 +373,7 @@ void MmMetricsReporter::fillAtomValues(const std::vector<MmMetricsInfo> &metrics if (max_idx < entry.atom_key) max_idx = entry.atom_key; } - int size = max_idx - kVendorAtomOffset + 1; + unsigned int size = max_idx - kVendorAtomOffset + 1; if (atom_values->size() < size) atom_values->resize(size, tmp); @@ -301,6 +402,10 @@ void MmMetricsReporter::fillAtomValues(const std::vector<MmMetricsInfo> &metrics (*prev_mm_metrics) = mm_metrics; } +void MmMetricsReporter::aggregatePixelMmMetricsPer5Min() { + aggregatePressureStall(); +} + void MmMetricsReporter::logPixelMmMetricsPerHour(const std::shared_ptr<IStats> &stats_client) { if (!MmMetricsSupported()) return; @@ -312,28 +417,23 @@ void MmMetricsReporter::logPixelMmMetricsPerHour(const std::shared_ptr<IStats> & uint64_t ion_total_pools = getIonTotalPools(); uint64_t gpu_memory = getGpuMemory(); - std::vector<VendorAtomValue> values; - bool is_first_atom = (prev_hour_vmstat_.size() == 0) ? true : false; - fillAtomValues(kMmMetricsPerHourInfo, vmstat, &prev_hour_vmstat_, &values); - - // resize values to add the following fields + // allocate enough values[] entries for the metrics. VendorAtomValue tmp; tmp.set<VendorAtomValue::longValue>(0); - int size = PixelMmMetricsPerHour::kGpuMemoryFieldNumber - kVendorAtomOffset + 1; - if (values.size() < size) { - values.resize(size, tmp); - } + int last_value_index = + PixelMmMetricsPerHour::kPsiMemSomeAvg300AvgFieldNumber - kVendorAtomOffset; + std::vector<VendorAtomValue> values(last_value_index + 1, tmp); + + fillAtomValues(kMmMetricsPerHourInfo, vmstat, &prev_hour_vmstat_, &values); tmp.set<VendorAtomValue::longValue>(ion_total_pools); values[PixelMmMetricsPerHour::kIonTotalPoolsFieldNumber - kVendorAtomOffset] = tmp; tmp.set<VendorAtomValue::longValue>(gpu_memory); values[PixelMmMetricsPerHour::kGpuMemoryFieldNumber - kVendorAtomOffset] = tmp; + fillPressureStallAtom(&values); - // Don't report the first atom to avoid big spike in accumulated values. - if (!is_first_atom) { - // Send vendor atom to IStats HAL - reportVendorAtom(stats_client, PixelAtoms::Atom::kPixelMmMetricsPerHour, values, - "PixelMmMetricsPerHour"); - } + // Send vendor atom to IStats HAL + reportVendorAtom(stats_client, PixelAtoms::Atom::kPixelMmMetricsPerHour, values, + "PixelMmMetricsPerHour"); } void MmMetricsReporter::logPixelMmMetricsPerDay(const std::shared_ptr<IStats> &stats_client) { @@ -344,8 +444,21 @@ void MmMetricsReporter::logPixelMmMetricsPerDay(const std::shared_ptr<IStats> &s if (vmstat.size() == 0) return; - std::vector<VendorAtomValue> values; + std::vector<long> direct_reclaim; + readDirectReclaimStat(&direct_reclaim); + + std::vector<long> compaction_duration; + readCompactionDurationStat(&compaction_duration); + bool is_first_atom = (prev_day_vmstat_.size() == 0) ? true : false; + + // allocate enough values[] entries for the metrics. + VendorAtomValue tmp; + tmp.set<VendorAtomValue::longValue>(0); + int last_value_index = + PixelMmMetricsPerDay::kThpDeferredSplitPageFieldNumber - kVendorAtomOffset; + std::vector<VendorAtomValue> values(last_value_index + 1, tmp); + fillAtomValues(kMmMetricsPerDayInfo, vmstat, &prev_day_vmstat_, &values); std::map<std::string, uint64_t> pixel_vmstat = @@ -355,6 +468,8 @@ void MmMetricsReporter::logPixelMmMetricsPerDay(const std::shared_ptr<IStats> &s &prev_kswapd_stime_, &values); fillProcessStime(PixelMmMetricsPerDay::kKcompactdStimeClksFieldNumber, "kcompactd0", &kcompactd_pid_, &prev_kcompactd_stime_, &values); + fillDirectReclaimStatAtom(direct_reclaim, &values); + fillCompactionDurationStatAtom(direct_reclaim, &values); // Don't report the first atom to avoid big spike in accumulated values. if (!is_first_atom) { @@ -498,6 +613,544 @@ std::map<std::string, uint64_t> MmMetricsReporter::readCmaStat( } /** + * This function reads compaction duration sysfs node + * (/sys/kernel/pixel_stat/mm/compaction/mm_compaction_duration) + * + * store: vector to save compaction duration info + */ +void MmMetricsReporter::readCompactionDurationStat(std::vector<long> *store) { + static const std::string path(kCompactDuration); + constexpr int num_metrics = 6; + + store->resize(num_metrics); + + int start_idx = 0; + int expected_num = num_metrics; + + if (!ReadFileToLongsCheck(path, store, start_idx, " ", 1, expected_num, true)) { + ALOGI("Unable to read %s for the direct reclaim info.", path.c_str()); + } +} + +/** + * This function fills atom values (values) from acquired compaction duration + * information from vector store + * + * store: the already collected (by readCompactionDurationStat()) compaction + * duration information + * values: the atom value vector to be filled. + */ +void MmMetricsReporter::fillCompactionDurationStatAtom(const std::vector<long> &store, + std::vector<VendorAtomValue> *values) { + // first metric index + constexpr int start_idx = PixelMmMetricsPerDay::kCompactionTotalTimeFieldNumber; + constexpr int num_metrics = 6; + + if (!MmMetricsSupported()) + return; + + int size = start_idx + num_metrics - kVendorAtomOffset; + if (values->size() < size) + values->resize(size); + + for (int i = 0; i < num_metrics; i++) { + VendorAtomValue tmp; + if (store[i] == -1) { + tmp.set<VendorAtomValue::longValue>(0); + } else { + tmp.set<VendorAtomValue::longValue>(store[i] - prev_compaction_duration_[i]); + prev_compaction_duration_[i] = store[i]; + } + (*values)[start_idx + i] = tmp; + } + prev_compaction_duration_ = store; +} + +/** + * This function reads direct reclaim sysfs node (4 files: + * /sys/kernel/pixel_stat/mm/vmscan/direct_reclaim/<level>/latency_stat, + * where <level> = native, top, visible, other.), and save total time and + * 4 latency information per file. Total (1+4) x 4 = 20 metrics will be + * saved. + * + * store: vector to save direct reclaim info + */ +void MmMetricsReporter::readDirectReclaimStat(std::vector<long> *store) { + static const std::string base_path(kDirectReclaimBasePath); + static const std::vector<std::string> dr_levels{"native", "top", "visible", "other"}; + static const std::string sysfs_name = "latency_stat"; + constexpr int num_metrics_per_file = 5; + int num_file = dr_levels.size(); + int num_metrics = num_metrics_per_file * num_file; + + store->resize(num_metrics); + int pass = -1; + for (auto level : dr_levels) { + ++pass; + std::string path = base_path + '/' + level + '/' + sysfs_name; + int start_idx = pass * num_metrics_per_file; + int expected_num = num_metrics_per_file; + if (!ReadFileToLongsCheck(path, store, start_idx, " ", 1, expected_num, true)) { + ALOGI("Unable to read %s for the direct reclaim info.", path.c_str()); + } + } +} + +/** + * This function fills atom values (values) from acquired direct reclaim + * information from vector store + * + * store: the already collected (by readDirectReclaimStat()) direct reclaim + * information + * values: the atom value vector to be filled. + */ +void MmMetricsReporter::fillDirectReclaimStatAtom(const std::vector<long> &store, + std::vector<VendorAtomValue> *values) { + // first metric index + constexpr int start_idx = PixelMmMetricsPerDay::kDirectReclaimNativeLatencyTotalTimeFieldNumber; + constexpr int num_metrics = 20; /* num_metrics_per_file * num_file */ + + if (!MmMetricsSupported()) + return; + + int size = start_idx + num_metrics - kVendorAtomOffset; + if (values->size() < size) + values->resize(size); + + for (int i = 0; i < num_metrics; i++) { + VendorAtomValue tmp; + tmp.set<VendorAtomValue::longValue>(store[i] - prev_direct_reclaim_[i]); + (*values)[start_idx + i] = tmp; + } + prev_direct_reclaim_ = store; +} + +/** + * This function reads pressure (PSI) files (loop thru all 3 files: cpu, io, and + * memory) and calls the parser to parse and store the metric values. + * Note that each file have two lines (except cpu has one line only): one with + * a leading "full", and the other with a leading "some", showing the category + * for that line. + * A category has 4 metrics, avg10, avg60, avg300, and total. + * i.e. the moving average % of PSI in 10s, 60s, 300s time window plus lastly + * the total stalled time, except that 'cpu' has no 'full' category. + * In total, we have 3 x 2 x 4 - 4 = 24 - 4 = 20 metrics, arranged in + * the order of + * + * cpu_some_avg<xyz> + * cpu_some_total + * io_full_avg<xyz> + * io_full_total + * io_some_avg<xyz> + * io_some_total + * mem_full_avg<xyz> + * mem_full_total + * mem_some_avg<xyz> + * mem_some_total + * + * where <xyz>=10, 60, 300 in the order as they appear. + * + * Note that for those avg values (i.e. <abc>_<def>_avg<xyz>), they + * are in percentage with 2-decimal digit accuracy. We will use an + * integer in 2-decimal fixed point format to represent the values. + * i.e. value x 100, or to cope with floating point errors, + * floor(value x 100 + 0.5) + * + * In fact, in newer kernels, "cpu" PSI has no "full" category. Some + * old kernel has them all zeros, to keep backward compatibility. The + * parse function called by this function is able to detect and ignore + * the "cpu, full" category. + * + * sample pressure stall files: + * /proc/pressure # cat cpu + * some avg10=2.93 avg60=3.17 avg300=3.15 total=94628150260 + * /proc/pressure # cat io + * some avg10=1.06 avg60=1.15 avg300=1.18 total=37709873805 + * full avg10=1.06 avg60=1.10 avg300=1.11 total=36592322936 + * /proc/pressure # cat memory + * some avg10=0.00 avg60=0.00 avg300=0.00 total=29705314 + * full avg10=0.00 avg60=0.00 avg300=0.00 total=17234456 + * + * PSI information definitions could be found at + * https://www.kernel.org/doc/html/latest/accounting/psi.html + * + * basePath: the base path to the pressure stall information + * store: pointer to the vector to store the 20 metrics in the mentioned + * order + */ +void MmMetricsReporter::readPressureStall(const char *basePath, std::vector<long> *store) { + constexpr int kTypeIdxCpu = 0; + + // Callers should have already prepared this, but we resize it here for safety + store->resize(kPsiNumAllMetrics); + std::fill(store->begin(), store->end(), -1); + + // To make the process unified, we prepend an imaginary "cpu + full" + // type-category combination. Now, each file (cpu, io, memnry) contains + // two categories, i.e. "full" and "some". + // Each category has <kPsiNumNames> merics and thus need that many entries + // to store them, except that the first category (the imaginary one) do not + // need any storage. So we set the save index for the 1st file ("cpu") to + // -kPsiNumNames. + int file_save_idx = -kPsiNumNames; + + // loop thru all pressure stall files: cpu, io, memory + for (int type_idx = 0; type_idx < kPsiNumFiles; + ++type_idx, file_save_idx += kPsiMetricsPerFile) { + std::string file_contents; + std::string path = std::string("") + basePath + '/' + kPsiTypes[type_idx]; + + if (!ReadFileToString(path, &file_contents)) { + // Don't print this log if the file doesn't exist, since logs will be printed + // repeatedly. + if (errno != ENOENT) + ALOGI("Unable to read %s - %s", path.c_str(), strerror(errno)); + goto err_out; + } + if (!MmMetricsReporter::parsePressureStallFileContent(type_idx == kTypeIdxCpu, + file_contents, store, file_save_idx)) + goto err_out; + } + return; + +err_out: + std::fill(store->begin(), store->end(), -1); +} + +/* + * This function parses a pressure stall file, which contains two + * lines, i.e. the "full", and "some" lines, except that the 'cpu' file + * contains only one line ("some"). Refer to the function comments of + * readPressureStall() for pressure stall file format. + * + * For old kernel, 'cpu' file might contain an extra line for "full", which + * will be ignored. + * + * is_cpu: Is the data from the file 'cpu' + * lines: the file content + * store: the output vector to hold the parsed data. + * file_save_idx: base index to start saving 'store' vector for this file. + * + * Return value: true on success, false otherwise. + */ +bool MmMetricsReporter::parsePressureStallFileContent(bool is_cpu, std::string lines, + std::vector<long> *store, int file_save_idx) { + constexpr int kNumOfWords = 5; // expected number of words separated by spaces. + constexpr int kCategoryFull = 0; + + std::istringstream data(lines); + std::string line; + + while (std::getline(data, line)) { + int category_idx = 0; + + line = android::base::Trim(line); + std::vector<std::string> words = android::base::Tokenize(line, " "); + if (words.size() != kNumOfWords) { + ALOGE("PSI parse fail: num of words = %d != expected %d", + static_cast<int>(words.size()), kNumOfWords); + return false; + } + + // words[0] should be either "full" or "some", the category name. + for (auto &cat : kPsiCategories) { + if (words[0].compare(cat) == 0) + break; + ++category_idx; + } + if (category_idx == kPsiNumCategories) { + ALOGE("PSI parse fail: unknown category %s", words[0].c_str()); + return false; + } + + // skip (cpu, full) combination. + if (is_cpu && category_idx == kCategoryFull) { + ALOGI("kernel: old PSI sysfs node."); + continue; + } + + // Now we have separated words in a vector, e.g. + // ["some", "avg10=2.93", "avg60=3.17", "avg300=3.15", total=94628150260"] + // call parsePressureStallWords to parse them. + int line_save_idx = file_save_idx + category_idx * kPsiNumNames; + if (!parsePressureStallWords(words, store, line_save_idx)) + return false; + } + return true; +} + +// This function parses the already split words, e.g. +// ["some", "avg10=0.00", "avg60=0.00", "avg300=0.00", "total=29705314"], +// from a line (category) in a pressure stall file. +// +// words: the split words in the form of "name=value" +// store: the output vector +// line_save_idx: the base start index to save in vector for this line (category) +// +// Return value: true on success, false otherwise. +bool MmMetricsReporter::parsePressureStallWords(std::vector<std::string> words, + std::vector<long> *store, int line_save_idx) { + // Skip the first word, which is already parsed by the caller. + // All others are value pairs in "name=value" form. + // e.g. ["some", "avg10=0.00", "avg60=0.00", "avg300=0.00", "total=29705314"] + // "some" is skipped. + for (int i = 1; i < words.size(); ++i) { + std::vector<std::string> metric = android::base::Tokenize(words[i], "="); + if (metric.size() != 2) { + ALOGE("%s: parse error (name=value) @ idx %d", __FUNCTION__, i); + return false; + } + if (!MmMetricsReporter::savePressureMetrics(metric[0], metric[1], store, line_save_idx)) + return false; + } + return true; +} + +// This function parses one value pair in "name=value" format, and depending on +// the name, save to its proper location in the store vector. +// name = "avg10" -> save to index base_save_idx. +// name = "avg60" -> save to index base_save_idx + 1. +// name = "avg300" -> save to index base_save_idx + 2. +// name = "total" -> save to index base_save_idx + 3. +// +// name: the metrics name +// value: the metrics value +// store: the output vector +// base_save_idx: the base save index +// +// Return value: true on success, false otherwise. +// +bool MmMetricsReporter::savePressureMetrics(std::string name, std::string value, + std::vector<long> *store, int base_save_idx) { + int name_idx = 0; + constexpr int kNameIdxTotal = 3; + + for (auto &mn : kPsiMetricNames) { + if (name.compare(mn) == 0) + break; + ++name_idx; + } + if (name_idx == kPsiNumNames) { + ALOGE("%s: parse error: unknown metric name.", __FUNCTION__); + return false; + } + + long out; + if (name_idx == kNameIdxTotal) { + // 'total' metrics + unsigned long tmp; + if (!android::base::ParseUint(value, &tmp)) + out = -1; + else + out = tmp; + } else { + // 'avg' metrics + double d = -1.0; + if (android::base::ParseDouble(value, &d)) + out = static_cast<long>(d * 100 + 0.5); + else + out = -1; + } + + if (base_save_idx + name_idx >= store->size()) { + // should never reach here + ALOGE("out of bound access to store[] (src line %d) @ index %d", __LINE__, + base_save_idx + name_idx); + return false; + } else { + (*store)[base_save_idx + name_idx] = out; + } + return true; +} + +/** + * This function reads in the current pressure (PSI) information, and aggregates + * it (except for the "total" information, which will overwrite + * the previous value without aggregation. + * + * data are arranged in the following order, and must comply the order defined + * in the proto: + * + * // note: these 5 'total' metrics are not aggregated. + * cpu_some_total + * io_full_total + * io_some_total + * mem_full_total + * mem_some_total + * + * // 9 aggregated metrics as above avg<xyz>_<aggregate> + * // where <xyz> = 10, 60, 300; <aggregate> = min, max, sum + * cpu_some_avg10_min + * cpu_some_avg10_max + * cpu_some_avg10_sum + * cpu_some_avg60_min + * cpu_some_avg60_max + * cpu_some_avg60_sum + * cpu_some_avg300_min + * cpu_some_avg300_max + * cpu_some_avg300_sum + * + * // similar 9 metrics as above avg<xyz>_<aggregate> + * io_full_avg<xyz>_<aggregate> + * + * // similar 9 metrics as above avg<xyz>_<aggregate> + * io_some_avg<xyz>_<aggregate> + * + * // similar 9 metrics as above avg<xyz>_<aggregate> + * mem_full_avg<xyz>_<aggregate> + * + * // similar 9 metrics as above avg<xyz>_<aggregate> + * mem_some_avg<xyz>_<aggregate> + * + * In addition, it increases psi_data_set_count_ by 1 (in order to calculate + * the average from the "_sum" aggregate.) + */ +void MmMetricsReporter::aggregatePressureStall() { + constexpr int kFirstTotalOffset = kPsiNumAvgs; + + if (!MmMetricsSupported()) + return; + + std::vector<long> psi(kPsiNumAllMetrics, -1); + readPressureStall(kPsiBasePath, &psi); + + // Pre-check for possible later out of bound error, if readPressureStall() + // decreases the vector size. + // It's for safety only. The condition should never be true. + if (psi.size() != kPsiNumAllMetrics) { + ALOGE("Wrong psi[] size %d != expected %d after read.", static_cast<int>(psi.size()), + kPsiNumAllMetrics); + return; + } + + // check raw metrics and preventively handle errors: Although we don't expect read sysfs + // node could fail. Discard all current readings on any error. + for (int i = 0; i < kPsiNumAllMetrics; ++i) { + if (psi[i] == -1) { + ALOGE("Bad data @ psi[%ld] = -1", psi[i]); + goto err_out; + } + } + + // "total" metrics are accumulative: just replace the previous accumulation. + for (int i = 0; i < kPsiNumAllTotals; ++i) { + int psi_idx; + + psi_idx = i * kPsiNumNames + kFirstTotalOffset; + if (psi_idx >= psi.size()) { + // should never reach here + ALOGE("out of bound access to psi[] (src line %d) @ index %d", __LINE__, psi_idx); + goto err_out; + } else { + psi_total_[i] = psi[psi_idx]; + } + } + + // "avg" metrics will be aggregated to min, max and sum + // later on, the sum will be divided by psi_data_set_count_ to get the average. + int aggr_idx; + aggr_idx = 0; + for (int psi_idx = 0; psi_idx < kPsiNumAllMetrics; ++psi_idx) { + if (psi_idx % kPsiNumNames == kFirstTotalOffset) + continue; // skip 'total' metrics, already processed. + + if (aggr_idx + 3 > kPsiNumAllUploadAvgMetrics) { + // should never reach here + ALOGE("out of bound access to psi_aggregated_[] (src line %d) @ index %d ~ %d", + __LINE__, aggr_idx, aggr_idx + 2); + return; // give up avgs, but keep totals (so don't go err_out + } + + long value = psi[psi_idx]; + if (psi_data_set_count_ == 0) { + psi_aggregated_[aggr_idx++] = value; + psi_aggregated_[aggr_idx++] = value; + psi_aggregated_[aggr_idx++] = value; + } else { + psi_aggregated_[aggr_idx++] = std::min(value, psi_aggregated_[aggr_idx]); + psi_aggregated_[aggr_idx++] = std::max(value, psi_aggregated_[aggr_idx]); + psi_aggregated_[aggr_idx++] += value; + } + } + ++psi_data_set_count_; + return; + +err_out: + for (int i = 0; i < kPsiNumAllTotals; ++i) psi_total_[i] = -1; +} + +/** + * This function fills atom values (values) from psi_aggregated_[] + * + * values: the atom value vector to be filled. + */ +void MmMetricsReporter::fillPressureStallAtom(std::vector<VendorAtomValue> *values) { + constexpr int avg_of_avg_offset = 2; + constexpr int total_start_idx = + PixelMmMetricsPerHour::kPsiCpuSomeTotalFieldNumber - kVendorAtomOffset; + constexpr int avg_start_idx = total_start_idx + kPsiNumAllTotals; + + if (!MmMetricsSupported()) + return; + + VendorAtomValue tmp; + + // The caller should have setup the correct total size, + // but we check and extend the size when it's too small for safety. + unsigned int min_value_size = total_start_idx + kPsiNumAllUploadMetrics; + if (values->size() < min_value_size) + values->resize(min_value_size); + + // "total" metric + int metric_idx = total_start_idx; + for (int save = 0; save < kPsiNumAllTotals; ++save, ++metric_idx) { + if (psi_data_set_count_ == 0) + psi_total_[save] = -1; // no data: invalidate the current total + + // A good difference needs a good previous value and a good current value. + if (psi_total_[save] != -1 && prev_psi_total_[save] != -1) + tmp.set<VendorAtomValue::longValue>(psi_total_[save] - prev_psi_total_[save]); + else + tmp.set<VendorAtomValue::longValue>(-1); + + prev_psi_total_[save] = psi_total_[save]; + if (metric_idx >= values->size()) { + // should never reach here + ALOGE("out of bound access to value[] for psi-total @ index %d", metric_idx); + goto cleanup; + } else { + (*values)[metric_idx] = tmp; + } + } + + // "avg" metrics -> aggregate to min, max, and avg of the original avg + metric_idx = avg_start_idx; + for (int save = 0; save < kPsiNumAllUploadAvgMetrics; ++save, ++metric_idx) { + if (psi_data_set_count_) { + if (save % kPsiNumOfAggregatedType == avg_of_avg_offset) { + // avg of avg + tmp.set<VendorAtomValue::intValue>(psi_aggregated_[save] / psi_data_set_count_); + } else { + // min or max of avg + tmp.set<VendorAtomValue::intValue>(psi_aggregated_[save]); + } + } else { + tmp.set<VendorAtomValue::intValue>(-1); + } + if (metric_idx >= values->size()) { + // should never reach here + ALOGE("out of bound access to value[] for psi-avg @ index %d", metric_idx); + goto cleanup; + } else { + (*values)[metric_idx] = tmp; + } + } + +cleanup: + psi_data_set_count_ = 0; +} + +/** * This function is to collect CMA metrics and upload them. * The CMA metrics are collected by readCmaStat(), copied into atom values * by fillAtomValues(), and then uploaded by reportVendorAtom(). The collected diff --git a/pixelstats/SysfsCollector.cpp b/pixelstats/SysfsCollector.cpp index 30fd676..4a3711b 100644 --- a/pixelstats/SysfsCollector.cpp +++ b/pixelstats/SysfsCollector.cpp @@ -1041,6 +1041,10 @@ void SysfsCollector::logPerDay() { logVendorAudioHardwareStats(stats_client); } +void SysfsCollector::aggregatePer5Min() { + mm_metrics_reporter_.aggregatePixelMmMetricsPer5Min(); +} + void SysfsCollector::logPerHour() { const std::shared_ptr<IStats> stats_client = getStatsService(); if (!stats_client) { @@ -1068,17 +1072,29 @@ void SysfsCollector::collect(void) { // Sleep for 30 seconds on launch to allow codec driver to load. sleep(30); + // sample & aggregate for the first time. + aggregatePer5Min(); + // Collect first set of stats on boot. logPerHour(); logPerDay(); - // Set an one-hour timer. struct itimerspec period; - const int kSecondsPerHour = 60 * 60; - int hours = 0; - period.it_interval.tv_sec = kSecondsPerHour; + + // gcd (greatest common divisor) of all the following timings + constexpr int kSecondsPerWake = 5 * 60; + + constexpr int kWakesPer5Min = 5 * 60 / kSecondsPerWake; + constexpr int kWakesPerHour = 60 * 60 / kSecondsPerWake; + constexpr int kWakesPerDay = 24 * 60 * 60 / kSecondsPerWake; + + int wake_5min = 0; + int wake_hours = 0; + int wake_days = 0; + + period.it_interval.tv_sec = kSecondsPerWake; period.it_interval.tv_nsec = 0; - period.it_value.tv_sec = kSecondsPerHour; + period.it_value.tv_sec = kSecondsPerWake; period.it_value.tv_nsec = 0; if (timerfd_settime(timerfd, 0, &period, NULL)) { @@ -1088,22 +1104,41 @@ void SysfsCollector::collect(void) { while (1) { int readval; - do { + union { char buf[8]; + uint64_t count; + } expire; + + do { errno = 0; - readval = read(timerfd, buf, sizeof(buf)); + readval = read(timerfd, expire.buf, sizeof(expire.buf)); } while (readval < 0 && errno == EINTR); if (readval < 0) { ALOGE("Timerfd error - %s\n", strerror(errno)); return; } - hours++; - logPerHour(); - if (hours == 24) { - // Collect stats every 24hrs after. + wake_5min += expire.count; + wake_hours += expire.count; + wake_days += expire.count; + + if (wake_5min >= kWakesPer5Min) { + wake_5min %= kWakesPer5Min; + aggregatePer5Min(); + } + + if (wake_hours >= kWakesPerHour) { + if (wake_hours >= 2 * kWakesPerHour) + ALOGW("Hourly wake: sleep too much: expire.count=%" PRId64, expire.count); + wake_hours %= kWakesPerHour; + logPerHour(); + } + + if (wake_days >= kWakesPerDay) { + if (wake_hours >= 2 * kWakesPerDay) + ALOGW("Daily wake: sleep too much: expire.count=%" PRId64, expire.count); + wake_days %= kWakesPerDay; logPerDay(); - hours = 0; } } } diff --git a/pixelstats/include/pixelstats/MmMetricsReporter.h b/pixelstats/include/pixelstats/MmMetricsReporter.h index 89f12f5..cfcfac3 100644 --- a/pixelstats/include/pixelstats/MmMetricsReporter.h +++ b/pixelstats/include/pixelstats/MmMetricsReporter.h @@ -37,6 +37,7 @@ using aidl::android::frameworks::stats::VendorAtomValue; class MmMetricsReporter { public: MmMetricsReporter(); + void aggregatePixelMmMetricsPer5Min(); void logPixelMmMetricsPerHour(const std::shared_ptr<IStats> &stats_client); void logPixelMmMetricsPerDay(const std::shared_ptr<IStats> &stats_client); void logCmaStatus(const std::shared_ptr<IStats> &stats_client); @@ -62,6 +63,43 @@ class MmMetricsReporter { static const std::vector<MmMetricsInfo> kCmaStatusInfo; static const std::vector<MmMetricsInfo> kCmaStatusExtInfo; + // raw PSI + static constexpr const char *kPsiBasePath = "/proc/pressure"; + static constexpr const char *kPsiTypes[3] = {"cpu", "io", "memory"}; + static constexpr const char *kPsiCategories[2] = {"full", "some"}; + static constexpr const char *kPsiMetricNames[4] = {"avg10", "avg60", "avg300", "total"}; + static constexpr int kPsiNumFiles = sizeof(kPsiTypes) / sizeof(kPsiTypes[0]); + static constexpr int kPsiNumCategories = sizeof(kPsiCategories) / sizeof(kPsiCategories[0]); + // number of statistics metric names (one total and several timed averages, per category) + static constexpr int kPsiNumNames = sizeof(kPsiMetricNames) / sizeof(kPsiMetricNames[0]); + + // Though cpu has no 'full' category, here we assume it has + // So, all file will contain 2 lines x 4 metrics per line = 8 metrics total. + static constexpr int kPsiMetricsPerFile = kPsiNumCategories * kPsiNumNames; + + // we have 1 'total' and all others 'averages' per category + // "total" metrics are already accumulative and thus no aggregation is needed. + // raw values are used. + static constexpr int kPsiNumTotals = 1; + static constexpr int kPsiNumAvgs = kPsiNumNames - kPsiNumTotals; + + // -1 since "cpu" type has no "full" category + static constexpr int kPsiNumAllCategories = kPsiNumFiles * kPsiNumCategories - 1; + + // number of raw metrics: total and avgs, and the combined all: added together. + static constexpr int kPsiNumAllTotals = kPsiNumAllCategories * kPsiNumTotals; + static constexpr int kPsiNumAllAvgs = kPsiNumAllCategories * kPsiNumAvgs; + static constexpr int kPsiNumAllMetrics = kPsiNumAllTotals + kPsiNumAllAvgs; + + // aggregated into (1) min, (2) max, (3) average (internally the sum is kept than the average) + static constexpr int kPsiNumOfAggregatedType = 3; + + // # of upload metrics will have a aggregation factor on all 'average' type raw metrics. + static constexpr int kPsiNumAllUploadAvgMetrics = kPsiNumAllAvgs * kPsiNumOfAggregatedType; + static constexpr int kPsiNumAllUploadTotalMetrics = kPsiNumAllTotals; + static constexpr int kPsiNumAllUploadMetrics = + kPsiNumAllUploadTotalMetrics + kPsiNumAllUploadAvgMetrics; + bool checkKernelMMMetricSupport(); bool MmMetricsSupported() { @@ -80,6 +118,21 @@ class MmMetricsReporter { bool ReadFileToUint(const char *const path, uint64_t *val); bool reportVendorAtom(const std::shared_ptr<IStats> &stats_client, int atom_id, const std::vector<VendorAtomValue> &values, const std::string &atom_name); + void readCompactionDurationStat(std::vector<long> *store); + void fillCompactionDurationStatAtom(const std::vector<long> &store, + std::vector<VendorAtomValue> *values); + void readDirectReclaimStat(std::vector<long> *store); + void fillDirectReclaimStatAtom(const std::vector<long> &store, + std::vector<VendorAtomValue> *values); + void readPressureStall(const char *basePath, std::vector<long> *store); + bool parsePressureStallFileContent(bool is_cpu, std::string lines, std::vector<long> *store, + int file_save_idx); + bool parsePressureStallWords(std::vector<std::string> words, std::vector<long> *store, + int line_save_idx); + bool savePressureMetrics(std::string name, std::string value, std::vector<long> *store, + int base_save_idx); + void fillPressureStallAtom(std::vector<VendorAtomValue> *values); + void aggregatePressureStall(); std::map<std::string, uint64_t> readVmStat(const char *path); uint64_t getIonTotalPools(); uint64_t getGpuMemory(); @@ -103,12 +156,22 @@ class MmMetricsReporter { const char *const kIonTotalPoolsPath; const char *const kIonTotalPoolsPathForLegacy; const char *const kGpuTotalPages; + const char *const kCompactDuration; + const char *const kDirectReclaimBasePath; const char *const kPixelStatMm; // Proto messages are 1-indexed and VendorAtom field numbers start at 2, so // store everything in the values array at the index of the field number // -2. - const int kVendorAtomOffset = 2; - + static constexpr int kVendorAtomOffset = 2; + static constexpr int kNumCompactionDurationPrevMetrics = 6; + static constexpr int kNumDirectReclaimPrevMetrics = 20; + + std::vector<long> prev_compaction_duration_; + std::vector<long> prev_direct_reclaim_; + long prev_psi_total_[kPsiNumAllTotals]; + long psi_total_[kPsiNumAllTotals]; + long psi_aggregated_[kPsiNumAllUploadAvgMetrics]; // min, max and avg of original avgXXX + int psi_data_set_count_ = 0; std::map<std::string, uint64_t> prev_hour_vmstat_; std::map<std::string, uint64_t> prev_day_vmstat_; std::map<std::string, uint64_t> prev_day_pixel_vmstat_; diff --git a/pixelstats/include/pixelstats/SysfsCollector.h b/pixelstats/include/pixelstats/SysfsCollector.h index f846d49..c8bfb43 100644 --- a/pixelstats/include/pixelstats/SysfsCollector.h +++ b/pixelstats/include/pixelstats/SysfsCollector.h @@ -70,6 +70,7 @@ class SysfsCollector { private: bool ReadFileToInt(const std::string &path, int *val); bool ReadFileToInt(const char *path, int *val); + void aggregatePer5Min(); void logPerDay(); void logPerHour(); diff --git a/pixelstats/pixelatoms.proto b/pixelstats/pixelatoms.proto index 3c14fd3..3d957bf 100644 --- a/pixelstats/pixelatoms.proto +++ b/pixelstats/pixelatoms.proto @@ -526,12 +526,63 @@ message PixelMmMetricsPerHour { optional int64 unevictable = 7; optional int64 ion_total_pools = 8; optional int64 gpu_memory = 9; + optional int64 slab_unreclaimable = 10; + optional int64 psi_cpu_some_total = 11; + optional int64 psi_io_full_total = 12; + optional int64 psi_io_some_total = 13; + optional int64 psi_mem_full_total = 14; + optional int64 psi_mem_some_total = 15; + optional int32 psi_cpu_some_avg10_min = 16; + optional int32 psi_cpu_some_avg10_max = 17; + optional int32 psi_cpu_some_avg10_avg = 18; + optional int32 psi_cpu_some_avg60_min = 19; + optional int32 psi_cpu_some_avg60_max = 20; + optional int32 psi_cpu_some_avg60_avg = 21; + optional int32 psi_cpu_some_avg300_min = 22; + optional int32 psi_cpu_some_avg300_max = 23; + optional int32 psi_cpu_some_avg300_avg = 24; + optional int32 psi_io_full_avg10_min = 25; + optional int32 psi_io_full_avg10_max = 26; + optional int32 psi_io_full_avg10_avg = 27; + optional int32 psi_io_full_avg60_min = 28; + optional int32 psi_io_full_avg60_max = 29; + optional int32 psi_io_full_avg60_avg = 30; + optional int32 psi_io_full_avg300_min = 31; + optional int32 psi_io_full_avg300_max = 32; + optional int32 psi_io_full_avg300_avg = 33; + optional int32 psi_io_some_avg10_min = 34; + optional int32 psi_io_some_avg10_max = 35; + optional int32 psi_io_some_avg10_avg = 36; + optional int32 psi_io_some_avg60_min = 37; + optional int32 psi_io_some_avg60_max = 38; + optional int32 psi_io_some_avg60_avg = 39; + optional int32 psi_io_some_avg300_min = 40; + optional int32 psi_io_some_avg300_max = 41; + optional int32 psi_io_some_avg300_avg = 42; + optional int32 psi_mem_full_avg10_min = 43; + optional int32 psi_mem_full_avg10_max = 44; + optional int32 psi_mem_full_avg10_avg = 45; + optional int32 psi_mem_full_avg60_min = 46; + optional int32 psi_mem_full_avg60_max = 47; + optional int32 psi_mem_full_avg60_avg = 48; + optional int32 psi_mem_full_avg300_min = 49; + optional int32 psi_mem_full_avg300_max = 50; + optional int32 psi_mem_full_avg300_avg = 51; + optional int32 psi_mem_some_avg10_min = 52; + optional int32 psi_mem_some_avg10_max = 53; + optional int32 psi_mem_some_avg10_avg = 54; + optional int32 psi_mem_some_avg60_min = 55; + optional int32 psi_mem_some_avg60_max = 56; + optional int32 psi_mem_some_avg60_avg = 57; + optional int32 psi_mem_some_avg300_min = 58; + optional int32 psi_mem_some_avg300_max = 59; + optional int32 psi_mem_some_avg300_avg = 60; } /* A message containing Pixel memory metrics collected daily. */ message PixelMmMetricsPerDay { optional string reverse_domain_name = 1; - optional int64 workingset_refault = 2; + optional int64 workingset_refault = 2; /* refault_file */ optional int64 pswpin = 3; optional int64 pswpout = 4; optional int64 allocstall_dma = 5; @@ -552,6 +603,43 @@ message PixelMmMetricsPerDay { optional int64 pgcache_miss = 20; optional int64 kswapd_stime_clks = 21; optional int64 kcompactd_stime_clks = 22; + optional int64 direct_reclaim_native_latency_total_time = 23; + optional int64 direct_reclaim_native_latency0 = 24; + optional int64 direct_reclaim_native_latency1 = 25; + optional int64 direct_reclaim_native_latency2 = 26; + optional int64 direct_reclaim_native_latency3 = 27; + optional int64 direct_reclaim_visible_latency_total_time = 28; + optional int64 direct_reclaim_visible_latency0 = 29; + optional int64 direct_reclaim_visible_latency1 = 30; + optional int64 direct_reclaim_visible_latency2 = 31; + optional int64 direct_reclaim_visible_latency3 = 32; + optional int64 direct_reclaim_top_latency_total_time = 33; + optional int64 direct_reclaim_top_latency0 = 34; + optional int64 direct_reclaim_top_latency1 = 35; + optional int64 direct_reclaim_top_latency2 = 36; + optional int64 direct_reclaim_top_latency3 = 37; + optional int64 direct_reclaim_other_latency_total_time = 38; + optional int64 direct_reclaim_other_latency0 = 39; + optional int64 direct_reclaim_other_latency1 = 40; + optional int64 direct_reclaim_other_latency2 = 41; + optional int64 direct_reclaim_other_latency3 = 42; + optional int64 compaction_total_time = 43; + optional int64 compaction_ev_count0 = 44; + optional int64 compaction_ev_count1 = 45; + optional int64 compaction_ev_count2 = 46; + optional int64 compaction_ev_count3 = 47; + optional int64 compaction_ev_count4 = 48; + optional int64 workingset_refault_anon = 49; + optional int64 workingset_refault_file = 50; + optional int64 compact_success = 51; + optional int64 compact_fail = 52; + optional int64 kswapd_low_wmark_hq = 53; + optional int64 kswapd_high_wmark_hq = 54; + optional int64 thp_file_alloc = 55; + optional int64 thp_zero_page_alloc = 56; + optional int64 thp_split_page = 57; + optional int64 thp_migration_split = 58; + optional int64 thp_deferred_split_page = 59; } /* A message containing CMA metrics collected from dogfooding only. */ |