diff options
Diffstat (limited to 'cmds/statsd/src/guardrail/StatsdStats.h')
-rw-r--r-- | cmds/statsd/src/guardrail/StatsdStats.h | 678 |
1 files changed, 0 insertions, 678 deletions
diff --git a/cmds/statsd/src/guardrail/StatsdStats.h b/cmds/statsd/src/guardrail/StatsdStats.h deleted file mode 100644 index 005048446fc3..000000000000 --- a/cmds/statsd/src/guardrail/StatsdStats.h +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Copyright 2017, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "config/ConfigKey.h" - -#include <gtest/gtest_prod.h> -#include <log/log_time.h> -#include <list> -#include <mutex> -#include <string> -#include <vector> -#include <unordered_map> - -namespace android { -namespace os { -namespace statsd { - -struct ConfigStats { - int32_t uid; - int64_t id; - int32_t creation_time_sec; - int32_t deletion_time_sec = 0; - int32_t reset_time_sec = 0; - int32_t metric_count; - int32_t condition_count; - int32_t matcher_count; - int32_t alert_count; - bool is_valid; - - std::list<int32_t> broadcast_sent_time_sec; - - // Times at which this config is activated. - std::list<int32_t> activation_time_sec; - - // Times at which this config is deactivated. - std::list<int32_t> deactivation_time_sec; - - std::list<int32_t> data_drop_time_sec; - // Number of bytes dropped at corresponding time. - std::list<int64_t> data_drop_bytes; - std::list<std::pair<int32_t, int64_t>> dump_report_stats; - - // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount. - std::map<const int64_t, int> matcher_stats; - - // Stores the number of output tuple of condition trackers when it's bigger than - // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, - // it means some data has been dropped. The map size is capped by kMaxConfigCount. - std::map<const int64_t, int> condition_stats; - - // Stores the number of output tuple of metric producers when it's bigger than - // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, - // it means some data has been dropped. The map size is capped by kMaxConfigCount. - std::map<const int64_t, int> metric_stats; - - // Stores the max number of output tuple of dimensions in condition across dimensions in what - // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is - // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by - // kMaxConfigCount. - std::map<const int64_t, int> metric_dimension_in_condition_stats; - - // Stores the number of times an anomaly detection alert has been declared. - // The map size is capped by kMaxConfigCount. - std::map<const int64_t, int> alert_stats; - - // Stores the config ID for each sub-config used. - std::list<std::pair<const int64_t, const int32_t>> annotations; -}; - -struct UidMapStats { - int32_t changes; - int32_t bytes_used; - int32_t dropped_changes; - int32_t deleted_apps = 0; -}; - -// Keeps track of stats of statsd. -// Single instance shared across the process. All public methods are thread safe. -class StatsdStats { -public: - static StatsdStats& getInstance(); - ~StatsdStats(){}; - - const static int kDimensionKeySizeSoftLimit = 500; - const static int kDimensionKeySizeHardLimit = 800; - - // Per atom dimension key size limit - static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap; - - const static int kMaxConfigCountPerUid = 20; - const static int kMaxAlertCountPerConfig = 100; - const static int kMaxConditionCountPerConfig = 300; - const static int kMaxMetricCountPerConfig = 1000; - const static int kMaxMatcherCountPerConfig = 800; - - // The max number of old config stats we keep. - const static int kMaxIceBoxSize = 20; - - const static int kMaxLoggerErrors = 20; - - const static int kMaxSystemServerRestarts = 20; - - const static int kMaxTimestampCount = 20; - - const static int kMaxLogSourceCount = 50; - - const static int kMaxPullAtomPackages = 100; - - // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd - // drops the metrics data in memory. - static const size_t kMaxMetricsBytesPerConfig = 2 * 1024 * 1024; - - // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the - // data subscriber that it's time to call getData. - static const size_t kBytesPerConfigTriggerGetData = 192 * 1024; - - // Cap the UID map's memory usage to this. This should be fairly high since the UID information - // is critical for understanding the metrics. - const static size_t kMaxBytesUsedUidMap = 50 * 1024; - - // The number of deleted apps that are stored in the uid map. - const static int kMaxDeletedAppsInUidMap = 100; - - /* Minimum period between two broadcasts in nanoseconds. */ - static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC; - - /* Min period between two checks of byte size per config key in nanoseconds. */ - static const int64_t kMinByteSizeCheckPeriodNs = 60 * NS_PER_SEC; - - /* Minimum period between two activation broadcasts in nanoseconds. */ - static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC; - - // Maximum age (30 days) that files on disk can exist in seconds. - static const int kMaxAgeSecond = 60 * 60 * 24 * 30; - - // Maximum age (2 days) that local history files on disk can exist in seconds. - static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2; - - // Maximum number of files (1000) that can be in stats directory on disk. - static const int kMaxFileNumber = 1000; - - // Maximum size of all files that can be written to stats directory on disk. - static const int kMaxFileSize = 50 * 1024 * 1024; - - // How long to try to clear puller cache from last time - static const long kPullerCacheClearIntervalSec = 1; - - // Max time to do a pull. - static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC; - - // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId. - static const int kMaxNonPlatformPushedAtoms = 100; - - // Maximum atom id value that we consider a platform pushed atom. - // This should be updated once highest pushed atom id in atoms.proto approaches this value. - static const int kMaxPushedAtomId = 500; - - // Atom id that is the start of the pulled atoms. - static const int kPullAtomStartTag = 10000; - - // Atom id that is the start of vendor atoms. - static const int kVendorAtomStartTag = 100000; - - // Vendor pulled atom start id. - static const int32_t kVendorPulledAtomStartTag = 150000; - - // Beginning of range for timestamp truncation. - static const int32_t kTimestampTruncationStartTag = 300000; - - // End of range for timestamp truncation. - static const int32_t kTimestampTruncationEndTag = 304999; - - // Max accepted atom id. - static const int32_t kMaxAtomTag = 200000; - - static const int64_t kInt64Max = 0x7fffffffffffffffLL; - - static const int32_t kMaxLoggedBucketDropEvents = 10; - - /** - * Report a new config has been received and report the static stats about the config. - * - * The static stats include: the count of metrics, conditions, matchers, and alerts. - * If the config is not valid, this config stats will be put into icebox immediately. - */ - void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount, - int matchersCount, int alertCount, - const std::list<std::pair<const int64_t, const int32_t>>& annotations, - bool isValid); - /** - * Report a config has been removed. - */ - void noteConfigRemoved(const ConfigKey& key); - /** - * Report a config has been reset when ttl expires. - */ - void noteConfigReset(const ConfigKey& key); - - /** - * Report a broadcast has been sent to a config owner to collect the data. - */ - void noteBroadcastSent(const ConfigKey& key); - - /** - * Report that a config has become activated or deactivated. - * This can be different from whether or not a broadcast is sent if the - * guardrail prevented the broadcast from being sent. - */ - void noteActiveStatusChanged(const ConfigKey& key, bool activate); - - /** - * Report a config's metrics data has been dropped. - */ - void noteDataDropped(const ConfigKey& key, const size_t totalBytes); - - /** - * Report metrics data report has been sent. - * - * The report may be requested via StatsManager API, or through adb cmd. - */ - void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes); - - /** - * Report the size of output tuple of a condition. - * - * Note: only report when the condition has an output dimension, and the tuple - * count > kDimensionKeySizeSoftLimit. - * - * [key]: The config key that this condition belongs to. - * [id]: The id of the condition. - * [size]: The output tuple size. - */ - void noteConditionDimensionSize(const ConfigKey& key, const int64_t& id, int size); - - /** - * Report the size of output tuple of a metric. - * - * Note: only report when the metric has an output dimension, and the tuple - * count > kDimensionKeySizeSoftLimit. - * - * [key]: The config key that this metric belongs to. - * [id]: The id of the metric. - * [size]: The output tuple size. - */ - void noteMetricDimensionSize(const ConfigKey& key, const int64_t& id, int size); - - /** - * Report the max size of output tuple of dimension in condition across dimensions in what. - * - * Note: only report when the metric has an output dimension in condition, and the max tuple - * count > kDimensionKeySizeSoftLimit. - * - * [key]: The config key that this metric belongs to. - * [id]: The id of the metric. - * [size]: The output tuple size. - */ - void noteMetricDimensionInConditionSize(const ConfigKey& key, const int64_t& id, int size); - - /** - * Report a matcher has been matched. - * - * [key]: The config key that this matcher belongs to. - * [id]: The id of the matcher. - */ - void noteMatcherMatched(const ConfigKey& key, const int64_t& id); - - /** - * Report that an anomaly detection alert has been declared. - * - * [key]: The config key that this alert belongs to. - * [id]: The id of the alert. - */ - void noteAnomalyDeclared(const ConfigKey& key, const int64_t& id); - - /** - * Report an atom event has been logged. - */ - void noteAtomLogged(int atomId, int32_t timeSec); - - /** - * Report that statsd modified the anomaly alarm registered with StatsCompanionService. - */ - void noteRegisteredAnomalyAlarmChanged(); - - /** - * Report that statsd modified the periodic alarm registered with StatsCompanionService. - */ - void noteRegisteredPeriodicAlarmChanged(); - - /** - * Records the number of delta entries that are being dropped from the uid map. - */ - void noteUidMapDropped(int deltas); - - /** - * Records that an app was deleted (from statsd's map). - */ - void noteUidMapAppDeletionDropped(); - - /** - * Updates the number of changes currently stored in the uid map. - */ - void setUidMapChanges(int changes); - void setCurrentUidMapMemory(int bytes); - - /* - * Updates minimum interval between pulls for an pulled atom. - */ - void updateMinPullIntervalSec(int pullAtomId, long intervalSec); - - /* - * Notes an atom is pulled. - */ - void notePull(int pullAtomId); - - /* - * Notes an atom is served from puller cache. - */ - void notePullFromCache(int pullAtomId); - - /* - * Notify data error for pulled atom. - */ - void notePullDataError(int pullAtomId); - - /* - * Records time for actual pulling, not including those served from cache and not including - * statsd processing delays. - */ - void notePullTime(int pullAtomId, int64_t pullTimeNs); - - /* - * Records pull delay for a pulled atom, including those served from cache and including statsd - * processing delays. - */ - void notePullDelay(int pullAtomId, int64_t pullDelayNs); - - /* - * Records pull exceeds timeout for the puller. - */ - void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis); - - /* - * Records pull exceeds max delay for a metric. - */ - void notePullExceedMaxDelay(int pullAtomId); - - /* - * Records when system server restarts. - */ - void noteSystemServerRestart(int32_t timeSec); - - /** - * Records statsd skipped an event. - */ - void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError, - int32_t lastAtomTag, int32_t uid, int32_t pid); - - /** - * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if - * the pull timed out, or if the outgoing binder call failed. - * This count will only increment if the puller was actually invoked. - * - * It does not include a pull not occurring due to not finding the appropriate - * puller. These cases are covered in other counts. - */ - void notePullFailed(int atomId); - - /** - * Records that the pull of an atom has failed due to not having a uid provider. - */ - void notePullUidProviderNotFound(int atomId); - - /** - * Records that the pull of an atom has failed due not finding a puller registered by a - * trusted uid. - */ - void notePullerNotFound(int atomId); - - /** - * Records that the pull has failed due to the outgoing binder call failing. - */ - void notePullBinderCallFailed(int atomId); - - /** - * A pull with no data occurred - */ - void noteEmptyData(int atomId); - - /** - * Records that a puller callback for the given atomId was registered or unregistered. - * - * @param registered True if the callback was registered, false if was unregistered. - */ - void notePullerCallbackRegistrationChanged(int atomId, bool registered); - - /** - * Hard limit was reached in the cardinality of an atom - */ - void noteHardDimensionLimitReached(int64_t metricId); - - /** - * A log event was too late, arrived in the wrong bucket and was skipped - */ - void noteLateLogEventSkipped(int64_t metricId); - - /** - * Buckets were skipped as time elapsed without any data for them - */ - void noteSkippedForwardBuckets(int64_t metricId); - - /** - * An unsupported value type was received - */ - void noteBadValueType(int64_t metricId); - - /** - * Buckets were dropped due to reclaim memory. - */ - void noteBucketDropped(int64_t metricId); - - /** - * A condition change was too late, arrived in the wrong bucket and was skipped - */ - void noteConditionChangeInNextBucket(int64_t metricId); - - /** - * A bucket has been tagged as invalid. - */ - void noteInvalidatedBucket(int64_t metricId); - - /** - * Tracks the total number of buckets (include skipped/invalid buckets). - */ - void noteBucketCount(int64_t metricId); - - /** - * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and - * the end of the bucket. - */ - void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs); - - /** - * Number of buckets with unknown condition. - */ - void noteBucketUnknownCondition(int64_t metricId); - - /* Reports one event has been dropped due to queue overflow, and the oldest event timestamp in - * the queue */ - void noteEventQueueOverflow(int64_t oldestEventTimestampNs); - - /** - * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast - * should have been sent, but instead was skipped due to hitting the guardrail. - */ - void noteActivationBroadcastGuardrailHit(const int uid); - - /** - * Reports that an atom is erroneous or cannot be parsed successfully by - * statsd. An atom tag of 0 indicates that the client did not supply the - * atom id within the encoding. - * - * For pushed atoms only, this call should be preceded by a call to - * noteAtomLogged. - */ - void noteAtomError(int atomTag, bool pull=false); - - /** - * Reset the historical stats. Including all stats in icebox, and the tracked stats about - * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue - * to collect stats after reset() has been called. - */ - void reset(); - - /** - * Output the stats in protobuf binary format to [buffer]. - * - * [reset]: whether to clear the historical stats after the call. - */ - void dumpStats(std::vector<uint8_t>* buffer, bool reset); - - /** - * Output statsd stats in human readable format to [out] file descriptor. - */ - void dumpStats(int outFd) const; - - typedef struct PullTimeoutMetadata { - int64_t pullTimeoutUptimeMillis; - int64_t pullTimeoutElapsedMillis; - PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) : - pullTimeoutUptimeMillis(uptimeMillis), - pullTimeoutElapsedMillis(elapsedMillis) {/* do nothing */} - } PullTimeoutMetadata; - - typedef struct { - long totalPull = 0; - long totalPullFromCache = 0; - long minPullIntervalSec = LONG_MAX; - int64_t avgPullTimeNs = 0; - int64_t maxPullTimeNs = 0; - long numPullTime = 0; - int64_t avgPullDelayNs = 0; - int64_t maxPullDelayNs = 0; - long numPullDelay = 0; - long dataError = 0; - long pullTimeout = 0; - long pullExceedMaxDelay = 0; - long pullFailed = 0; - long pullUidProviderNotFound = 0; - long pullerNotFound = 0; - long emptyData = 0; - long registeredCount = 0; - long unregisteredCount = 0; - int32_t atomErrorCount = 0; - long binderCallFailCount = 0; - std::list<PullTimeoutMetadata> pullTimeoutMetadata; - } PulledAtomStats; - - typedef struct { - long hardDimensionLimitReached = 0; - long lateLogEventSkipped = 0; - long skippedForwardBuckets = 0; - long badValueType = 0; - long conditionChangeInNextBucket = 0; - long invalidatedBucket = 0; - long bucketDropped = 0; - int64_t minBucketBoundaryDelayNs = 0; - int64_t maxBucketBoundaryDelayNs = 0; - long bucketUnknownCondition = 0; - long bucketCount = 0; - } AtomMetricStats; - -private: - StatsdStats(); - - mutable std::mutex mLock; - - int32_t mStartTimeSec; - - // Track the number of dropped entries used by the uid map. - UidMapStats mUidMapStats; - - // The stats about the configs that are still in use. - // The map size is capped by kMaxConfigCount. - std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats; - - // Stores the stats for the configs that are no longer in use. - // The size of the vector is capped by kMaxIceBoxSize. - std::list<const std::shared_ptr<ConfigStats>> mIceBox; - - // Stores the number of times a pushed atom is logged. - // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms - // out of that range will be put in mNonPlatformPushedAtomStats. - // This is a vector, not a map because it will be accessed A LOT -- for each stats log. - std::vector<int> mPushedAtomStats; - - // Stores the number of times a pushed atom is logged for atom ids above kMaxPushedAtomId. - // The max size of the map is kMaxNonPlatformPushedAtoms. - std::unordered_map<int, int> mNonPlatformPushedAtomStats; - - // Maps PullAtomId to its stats. The size is capped by the puller atom counts. - std::map<int, PulledAtomStats> mPulledAtomStats; - - // Stores the number of times a pushed atom was logged erroneously. The - // corresponding counts for pulled atoms are stored in PulledAtomStats. - // The max size of this map is kMaxAtomErrorsStatsSize. - std::map<int, int> mPushedAtomErrorStats; - int kMaxPushedAtomErrorStatsSize = 100; - - // Maps metric ID to its stats. The size is capped by the number of metrics. - std::map<int64_t, AtomMetricStats> mAtomMetricStats; - - // Maps uids to times when the activation changed broadcast not sent due to hitting the - // guardrail. The size is capped by the number of configs, and up to 20 times per uid. - std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats; - - struct LogLossStats { - LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid, - int32_t pid) - : mWallClockSec(sec), - mCount(count), - mLastError(error), - mLastTag(tag), - mUid(uid), - mPid(pid) { - } - int32_t mWallClockSec; - int32_t mCount; - // error code defined in linux/errno.h - int32_t mLastError; - int32_t mLastTag; - int32_t mUid; - int32_t mPid; - }; - - // Max of {(now - oldestEventTimestamp) when overflow happens}. - // This number is helpful to understand how SLOW statsd can be. - int64_t mMaxQueueHistoryNs = 0; - - // Min of {(now - oldestEventTimestamp) when overflow happens}. - // This number is helpful to understand how FAST the events floods to statsd. - int64_t mMinQueueHistoryNs = kInt64Max; - - // Total number of events that are lost due to queue overflow. - int32_t mOverflowCount = 0; - - // Timestamps when we detect log loss, and the number of logs lost. - std::list<LogLossStats> mLogLossStats; - - std::list<int32_t> mSystemServerRestartSec; - - // Stores the number of times statsd modified the anomaly alarm registered with - // StatsCompanionService. - int mAnomalyAlarmRegisteredStats = 0; - - // Stores the number of times statsd registers the periodic alarm changes - int mPeriodicAlarmRegisteredStats = 0; - - void noteConfigResetInternalLocked(const ConfigKey& key); - - void noteConfigRemovedInternalLocked(const ConfigKey& key); - - void resetInternalLocked(); - - void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec); - - void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes, int32_t timeSec); - - void noteBroadcastSent(const ConfigKey& key, int32_t timeSec); - - void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec); - - void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec); - - void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats); - - int getPushedAtomErrors(int atomId) const; - - /** - * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference - * will live as long as `this`. - */ - StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId); - - FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd); - FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd); - FRIEND_TEST(StatsdStatsTest, TestConfigRemove); - FRIEND_TEST(StatsdStatsTest, TestSubStats); - FRIEND_TEST(StatsdStatsTest, TestAtomLog); - FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog); - FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold); - FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor); - FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash); - FRIEND_TEST(StatsdStatsTest, TestPullAtomStats); - FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats); - FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit); - FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats); - - FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved); -}; - -} // namespace statsd -} // namespace os -} // namespace android |