diff options
author | Remi NGUYEN VAN <reminv@google.com> | 2020-07-06 03:01:27 +0000 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2020-07-06 03:01:27 +0000 |
commit | bf45240a2ac9dc17240bfff6596aee64e1d70a28 (patch) | |
tree | 1fb9ae79902d27934a5b94334331628d580e02ee /src | |
parent | e1b31f105824614c6a506a99faed954d0d6214d3 (diff) | |
parent | 7b6435fd3b3aac33c6b239040bdeac1573a444a5 (diff) |
Merge "Refine validation metrics" into rvc-dev
Diffstat (limited to 'src')
3 files changed, 149 insertions, 51 deletions
diff --git a/src/android/net/util/NetworkStackUtils.java b/src/android/net/util/NetworkStackUtils.java index 19ca4b5..94de7c3 100755 --- a/src/android/net/util/NetworkStackUtils.java +++ b/src/android/net/util/NetworkStackUtils.java @@ -226,6 +226,15 @@ public class NetworkStackUtils { public static final String DNS_PROBE_PRIVATE_IP_NO_INTERNET_VERSION = "dns_probe_private_ip_no_internet"; + /** + * Experiment flag to enable validation metrics sent by NetworkMonitor. + * + * Metrics are sent by default. They can be disabled by setting the flag to a number greater + * than the APK version (for example 999999999). + * @see #isFeatureEnabled(Context, String, String, boolean) + */ + public static final String VALIDATION_METRICS_VERSION = "validation_metrics_version"; + static { System.loadLibrary("networkstackutilsjni"); } @@ -348,6 +357,9 @@ public class NetworkStackUtils { * {@link DeviceConfig} is enabled by comparing NetworkStack module version {@link NetworkStack} * with current version of property. If this property version is valid, the corresponding * experimental feature would be enabled, otherwise disabled. + * + * This is useful to ensure that if a module install is rolled back, flags are not left fully + * rolled out on a version where they have not been well tested. * @param context The global context information about an app environment. * @param namespace The namespace containing the property to look up. * @param name The name of the property to look up. @@ -363,6 +375,9 @@ public class NetworkStackUtils { * {@link DeviceConfig} is enabled by comparing NetworkStack module version {@link NetworkStack} * with current version of property. If this property version is valid, the corresponding * experimental feature would be enabled, otherwise disabled. + * + * This is useful to ensure that if a module install is rolled back, flags are not left fully + * rolled out on a version where they have not been well tested. * @param context The global context information about an app environment. * @param namespace The namespace containing the property to look up. * @param name The name of the property to look up. diff --git a/src/com/android/networkstack/metrics/NetworkValidationMetrics.java b/src/com/android/networkstack/metrics/NetworkValidationMetrics.java index 3789c5c..f27a939 100644 --- a/src/com/android/networkstack/metrics/NetworkValidationMetrics.java +++ b/src/com/android/networkstack/metrics/NetworkValidationMetrics.java @@ -60,9 +60,11 @@ public class NetworkValidationMetrics { public static final int MAX_PROBE_EVENTS_COUNT = 20; /** - * Reset this NetworkValidationMetrics. + * Reset this NetworkValidationMetrics and start collecting timing and metrics. + * + * <p>This must be called when validation starts. */ - public void reset(@Nullable NetworkCapabilities nc) { + public void startCollection(@Nullable NetworkCapabilities nc) { mStatsBuilder.clear(); mProbeEventsBuilder.clear(); mCapportApiDataBuilder.clear(); @@ -72,16 +74,23 @@ public class NetworkValidationMetrics { } /** - * Returns the enum TransportType + * Returns the enum TransportType. + * + * <p>This method only supports a limited set of common transport type combinations that can be + * measured through metrics, and will return {@link TransportType#TT_UNKNOWN} for others. This + * ensures that, for example, metrics for a TRANSPORT_NEW_UNKNOWN | TRANSPORT_ETHERNET network + * cannot get aggregated with / compared with a "normal" TRANSPORT_ETHERNET network without + * noticing. * - * @param NetworkCapabilities + * @param nc Capabilities to extract transport type from. * @return the TransportType which is defined in * core/proto/android/stats/connectivity/network_stack.proto */ @VisibleForTesting - public static TransportType getTransportTypeFromNC( - @Nullable NetworkCapabilities nc) { + public static TransportType getTransportTypeFromNC(@Nullable NetworkCapabilities nc) { if (nc == null) return TransportType.TT_UNKNOWN; + + final int trCount = nc.getTransportTypes().length; boolean hasCellular = nc.hasTransport(TRANSPORT_CELLULAR); boolean hasWifi = nc.hasTransport(TRANSPORT_WIFI); boolean hasBT = nc.hasTransport(TRANSPORT_BLUETOOTH); @@ -90,13 +99,29 @@ public class NetworkValidationMetrics { boolean hasWifiAware = nc.hasTransport(TRANSPORT_WIFI_AWARE); boolean hasLopan = nc.hasTransport(TRANSPORT_LOWPAN); - if (hasCellular && hasWifi && hasVpn) return TransportType.TT_WIFI_CELLULAR_VPN; - if (hasWifi) return hasVpn ? TransportType.TT_WIFI_VPN : TransportType.TT_WIFI; - if (hasCellular) return hasVpn ? TransportType.TT_CELLULAR_VPN : TransportType.TT_CELLULAR; - if (hasBT) return hasVpn ? TransportType.TT_BLUETOOTH_VPN : TransportType.TT_BLUETOOTH; - if (hasEthernet) return hasVpn ? TransportType.TT_ETHERNET_VPN : TransportType.TT_ETHERNET; - if (hasWifiAware) return TransportType.TT_WIFI_AWARE; - if (hasLopan) return TransportType.TT_LOWPAN; + // VPN networks are not subject to validation and should not see validation stats, but + // metrics could be added to measure private DNS probes only. + if (trCount == 3 && hasCellular && hasWifi && hasVpn) { + return TransportType.TT_WIFI_CELLULAR_VPN; + } + + if (trCount == 2 && hasVpn) { + if (hasWifi) return TransportType.TT_WIFI_VPN; + if (hasCellular) return TransportType.TT_CELLULAR_VPN; + if (hasBT) return TransportType.TT_BLUETOOTH_VPN; + if (hasEthernet) return TransportType.TT_ETHERNET_VPN; + } + + if (trCount == 1) { + if (hasWifi) return TransportType.TT_WIFI; + if (hasCellular) return TransportType.TT_CELLULAR; + if (hasBT) return TransportType.TT_BLUETOOTH; + if (hasEthernet) return TransportType.TT_ETHERNET; + if (hasWifiAware) return TransportType.TT_WIFI_AWARE; + if (hasLopan) return TransportType.TT_LOWPAN; + // TODO: consider having a TT_VPN for VPN-only transport + } + return TransportType.TT_UNKNOWN; } @@ -146,6 +171,8 @@ public class NetworkValidationMetrics { */ @VisibleForTesting public static ValidationResult validationResultToEnum(int result, String redirectUrl) { + // TODO: consider adding a VR_PARTIAL_SUCCESS field to track cases where users accepted + // partial connectivity if ((result & INetworkMonitor.NETWORK_VALIDATION_RESULT_VALID) != 0) { return ValidationResult.VR_SUCCESS; } else if (redirectUrl != null) { @@ -158,12 +185,14 @@ public class NetworkValidationMetrics { } /** - * Write each network probe event to mProbeEventsBuilder. + * Add a network probe event to the metrics builder. */ - public void setProbeEvent(final ProbeType type, final long durationUs, final ProbeResult result, + public void addProbeEvent(final ProbeType type, final long durationUs, final ProbeResult result, @Nullable final CaptivePortalDataShim capportData) { // When the number of ProbeEvents of mProbeEventsBuilder exceeds // MAX_PROBE_EVENTS_COUNT, stop adding ProbeEvent. + // TODO: consider recording the total number of probes in a separate field to know how + // many probes are skipped. if (mProbeEventsBuilder.getProbeEventCount() >= MAX_PROBE_EVENTS_COUNT) return; int latencyUs = NetworkStackUtils.saturatedCast(durationUs); @@ -178,7 +207,9 @@ public class NetworkValidationMetrics { (capportData.getExpiryTimeMillis() - currentTimeMillis()) / 1000; mCapportApiDataBuilder .setRemainingTtlSecs(NetworkStackUtils.saturatedCast(secondsRemaining)) - .setRemainingBytes(NetworkStackUtils.saturatedCast(capportData.getByteLimit())) + // TODO: rename this field to setRemainingKBytes, or use a long + .setRemainingBytes( + NetworkStackUtils.saturatedCast(capportData.getByteLimit() / 1000)) .setHasPortalUrl((capportData.getUserPortalUrl() != null)) .setHasVenueInfo((capportData.getVenueInfoUrl() != null)); probeEventBuilder.setCapportApiData(mCapportApiDataBuilder); @@ -196,25 +227,28 @@ public class NetworkValidationMetrics { /** * Write the NetworkValidationReported proto to statsd. + * + * <p>This is a no-op if {@link #startCollection(NetworkCapabilities)} was not called since the + * last call to this method. */ - public NetworkValidationReported sendValidationStats() { + public NetworkValidationReported maybeStopCollectionAndSend() { if (!mWatch.isStarted()) return null; mStatsBuilder.setProbeEvents(mProbeEventsBuilder); mStatsBuilder.setLatencyMicros(NetworkStackUtils.saturatedCast(mWatch.stop())); mStatsBuilder.setValidationIndex(mValidationIndex); // write a random value(0 ~ 999) for sampling. mStatsBuilder.setRandomNumber((int) (Math.random() * 1000)); - final NetworkValidationReported mStats = mStatsBuilder.build(); - final byte[] probeEvents = mStats.getProbeEvents().toByteArray(); + final NetworkValidationReported stats = mStatsBuilder.build(); + final byte[] probeEvents = stats.getProbeEvents().toByteArray(); NetworkStackStatsLog.write(NetworkStackStatsLog.NETWORK_VALIDATION_REPORTED, - mStats.getTransportType().getNumber(), + stats.getTransportType().getNumber(), probeEvents, - mStats.getValidationResult().getNumber(), - mStats.getLatencyMicros(), - mStats.getValidationIndex(), - mStats.getRandomNumber()); + stats.getValidationResult().getNumber(), + stats.getLatencyMicros(), + stats.getValidationIndex(), + stats.getRandomNumber()); mWatch.reset(); - return mStats; + return stats; } } diff --git a/src/com/android/server/connectivity/NetworkMonitor.java b/src/com/android/server/connectivity/NetworkMonitor.java index 47c910a..40de26e 100755 --- a/src/com/android/server/connectivity/NetworkMonitor.java +++ b/src/com/android/server/connectivity/NetworkMonitor.java @@ -450,7 +450,14 @@ public class NetworkMonitor extends StateMachine { protected boolean mIsCaptivePortalCheckEnabled; private boolean mUseHttps; - // The total number of captive portal detection attempts for this NetworkMonitor instance. + /** + * The total number of completed validation attempts (network validated or a captive portal was + * detected) for this NetworkMonitor instance. + * This does not include attempts that were interrupted, retried or finished with a result that + * is not success or portal. See {@code mValidationIndex} in {@link NetworkValidationMetrics} + * for a count of all attempts. + * TODO: remove when removing legacy metrics. + */ private int mValidations = 0; // Set if the user explicitly selected "Do not use this network" in captive portal sign-in app. @@ -504,6 +511,14 @@ public class NetworkMonitor extends StateMachine { private final boolean mPrivateIpNoInternetEnabled; + private final boolean mMetricsEnabled; + + // The validation metrics are accessed by individual probe threads, and by the StateMachine + // thread. All accesses must be synchronized to make sure the StateMachine thread can see + // reports from all probes. + // TODO: as that most usage is in the StateMachine thread and probes only add their probe + // events, consider having probes return their stats to the StateMachine, and only access this + // member on the StateMachine thread without synchronization. @GuardedBy("mNetworkValidationMetrics") private final NetworkValidationMetrics mNetworkValidationMetrics = new NetworkValidationMetrics(); @@ -571,6 +586,8 @@ public class NetworkMonitor extends StateMachine { mIsCaptivePortalCheckEnabled = getIsCaptivePortalCheckEnabled(); mPrivateIpNoInternetEnabled = getIsPrivateIpNoInternetEnabled(); + mMetricsEnabled = deps.isFeatureEnabled(context, NAMESPACE_CONNECTIVITY, + NetworkStackUtils.VALIDATION_METRICS_VERSION, true /* defaultEnabled */); mUseHttps = getUseHttpsValidation(); mCaptivePortalUserAgent = getCaptivePortalUserAgent(); mCaptivePortalHttpsUrls = makeCaptivePortalHttpsUrls(); @@ -783,28 +800,48 @@ public class NetworkMonitor extends StateMachine { } } - private void recordMetricsReset(@Nullable NetworkCapabilities nc) { - synchronized (mNetworkValidationMetrics) { - mNetworkValidationMetrics.reset(nc); + private void startMetricsCollection() { + if (!mMetricsEnabled) return; + try { + synchronized (mNetworkValidationMetrics) { + mNetworkValidationMetrics.startCollection(mNetworkCapabilities); + } + } catch (Exception e) { + Log.wtf(TAG, "Error resetting validation metrics", e); } } - private void recordMetricsProbeEvent(ProbeType type, long latencyMicros, ProbeResult result, + private void recordProbeEventMetrics(ProbeType type, long latencyMicros, ProbeResult result, CaptivePortalDataShim capportData) { - synchronized (mNetworkValidationMetrics) { - mNetworkValidationMetrics.setProbeEvent(type, latencyMicros, result, capportData); + if (!mMetricsEnabled) return; + try { + synchronized (mNetworkValidationMetrics) { + mNetworkValidationMetrics.addProbeEvent(type, latencyMicros, result, capportData); + } + } catch (Exception e) { + Log.wtf(TAG, "Error recording probe event", e); } } - private void recordMetricsValidationResult(int result, String redirectUrl) { - synchronized (mNetworkValidationMetrics) { - mNetworkValidationMetrics.setValidationResult(result, redirectUrl); + private void recordValidationResult(int result, String redirectUrl) { + if (!mMetricsEnabled) return; + try { + synchronized (mNetworkValidationMetrics) { + mNetworkValidationMetrics.setValidationResult(result, redirectUrl); + } + } catch (Exception e) { + Log.wtf(TAG, "Error recording validation result", e); } } - private void recordMetricsValidationStats() { - synchronized (mNetworkValidationMetrics) { - mNetworkValidationMetrics.sendValidationStats(); + private void maybeStopCollectionAndSendMetrics() { + if (!mMetricsEnabled) return; + try { + synchronized (mNetworkValidationMetrics) { + mNetworkValidationMetrics.maybeStopCollectionAndSend(); + } + } catch (Exception e) { + Log.wtf(TAG, "Error sending validation stats", e); } } @@ -820,7 +857,7 @@ public class NetworkMonitor extends StateMachine { transitionTo(mEvaluatingState); return HANDLED; case CMD_NETWORK_DISCONNECTED: - recordMetricsValidationStats(); + maybeStopCollectionAndSendMetrics(); logNetworkEvent(NetworkEvent.NETWORK_DISCONNECTED); quit(); return HANDLED; @@ -972,7 +1009,7 @@ public class NetworkMonitor extends StateMachine { initSocketTrackingIfRequired(); // start periodical polling. sendTcpPollingEvent(); - recordMetricsValidationStats(); + maybeStopCollectionAndSendMetrics(); } private void initSocketTrackingIfRequired() { @@ -1323,7 +1360,7 @@ public class NetworkMonitor extends StateMachine { sendMessageDelayed(CMD_CAPTIVE_PORTAL_RECHECK, 0 /* no UID */, CAPTIVE_PORTAL_REEVALUATE_DELAY_MS); mValidations++; - recordMetricsValidationStats(); + maybeStopCollectionAndSendMetrics(); } @Override @@ -1357,7 +1394,9 @@ public class NetworkMonitor extends StateMachine { handlePrivateDnsEvaluationFailure(); // The private DNS probe fails-fast if the server hostname cannot // be resolved. Record it as a failure with zero latency. - recordMetricsProbeEvent(ProbeType.PT_PRIVDNS, 0 /* latency */, + // TODO: refactor this together with the probe recorded in + // sendPrivateDnsProbe, so logging is symmetric / easier to follow. + recordProbeEventMetrics(ProbeType.PT_PRIVDNS, 0 /* latency */, ProbeResult.PR_FAILURE, null /* capportData */); break; } @@ -1468,7 +1507,7 @@ public class NetworkMonitor extends StateMachine { validationLog(PROBE_PRIVDNS, host, String.format("%dus - Error: %s", time, uhe.getMessage())); } - recordMetricsProbeEvent(ProbeType.PT_PRIVDNS, time, success ? ProbeResult.PR_SUCCESS : + recordProbeEventMetrics(ProbeType.PT_PRIVDNS, time, success ? ProbeResult.PR_SUCCESS : ProbeResult.PR_FAILURE, null /* capportData */); logValidationProbe(time, PROBE_PRIVDNS, success ? DNS_SUCCESS : DNS_FAILURE); return success; @@ -1480,8 +1519,14 @@ public class NetworkMonitor extends StateMachine { @Override public void enter() { - recordMetricsValidationStats(); - recordMetricsReset(mNetworkCapabilities); + // When starting a full probe cycle here, record any pending stats (for example if + // CMD_FORCE_REEVALUATE was called before evaluation finished, as can happen in + // EvaluatingPrivateDnsState). + maybeStopCollectionAndSendMetrics(); + // Restart the metrics collection timers. Metrics will be stopped and sent when the + // validation attempt finishes (as success, failure or portal), or if it is interrupted + // (by being restarted or if NetworkMonitor stops). + startMetricsCollection(); if (mEvaluateAttempts >= BLAME_FOR_EVALUATION_ATTEMPTS) { //Don't continue to blame UID forever. TrafficStats.clearThreadStatsUid(); @@ -1563,7 +1608,9 @@ public class NetworkMonitor extends StateMachine { private class WaitingForNextProbeState extends State { @Override public void enter() { - recordMetricsValidationStats(); + // Send metrics for this evaluation attempt. Metrics collection (and its timers) will be + // restarted when the next probe starts. + maybeStopCollectionAndSendMetrics(); scheduleNextProbe(); } @@ -2313,7 +2360,7 @@ public class NetworkMonitor extends StateMachine { // network validation (the HTTPS probe, which would likely fail anyway) or the PAC probe. if (mPrivateIpNoInternetEnabled && probeType == ValidationProbeEvent.PROBE_HTTP && (proxy == null) && hasPrivateIpAddress(resolvedAddr)) { - recordMetricsProbeEvent(NetworkValidationMetrics.probeTypeToEnum(probeType), + recordProbeEventMetrics(NetworkValidationMetrics.probeTypeToEnum(probeType), 0 /* latency */, ProbeResult.PR_PRIVATE_IP_DNS, null /* capportData */); return CaptivePortalProbeResult.PRIVATE_IP; } @@ -2346,7 +2393,7 @@ public class NetworkMonitor extends StateMachine { result = ValidationProbeEvent.DNS_FAILURE; } final long latency = watch.stop(); - recordMetricsProbeEvent(ProbeType.PT_DNS, latency, + recordProbeEventMetrics(ProbeType.PT_DNS, latency, (result == ValidationProbeEvent.DNS_SUCCESS) ? ProbeResult.PR_SUCCESS : ProbeResult.PR_FAILURE, null /* capportData */); logValidationProbe(latency, ValidationProbeEvent.PROBE_DNS, result); @@ -2473,7 +2520,7 @@ public class NetworkMonitor extends StateMachine { } else { probeResult = probeSpec.getResult(httpResponseCode, redirectUrl); } - recordMetricsProbeEvent(NetworkValidationMetrics.probeTypeToEnum(probeType), + recordProbeEventMetrics(NetworkValidationMetrics.probeTypeToEnum(probeType), probeTimer.stop(), NetworkValidationMetrics.httpProbeResultToEnum(probeResult), null /* capportData */); return probeResult; @@ -2629,6 +2676,8 @@ public class NetworkMonitor extends StateMachine { } private CaptivePortalDataShim sendCapportApiProbe() { + // TODO: consider adding metrics counters for each case returning null in this method + // (cases where the API is not implemented properly). validationLog("Fetching captive portal data from " + mCaptivePortalApiUrl); final String apiContent; @@ -2689,7 +2738,7 @@ public class NetworkMonitor extends StateMachine { if (mCaptivePortalApiUrl == null) return null; final Stopwatch capportApiWatch = new Stopwatch().start(); final CaptivePortalDataShim capportData = sendCapportApiProbe(); - recordMetricsProbeEvent(ProbeType.PT_CAPPORT_API, capportApiWatch.stop(), + recordProbeEventMetrics(ProbeType.PT_CAPPORT_API, capportApiWatch.stop(), capportData == null ? ProbeResult.PR_FAILURE : ProbeResult.PR_SUCCESS, capportData); return capportData; @@ -3412,7 +3461,7 @@ public class NetworkMonitor extends StateMachine { p.redirectUrl = redirectUrl; p.timestampMillis = SystemClock.elapsedRealtime(); notifyNetworkTested(p); - recordMetricsValidationResult(result, redirectUrl); + recordValidationResult(result, redirectUrl); } @VisibleForTesting |