diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/android/net/util/DataStallUtils.java | 53 | ||||
-rw-r--r-- | src/com/android/networkstack/netlink/TcpSocketTracker.java | 498 | ||||
-rw-r--r-- | src/com/android/server/connectivity/NetworkMonitor.java | 80 |
3 files changed, 617 insertions, 14 deletions
diff --git a/src/android/net/util/DataStallUtils.java b/src/android/net/util/DataStallUtils.java index b6dbeb1..454faf6 100644 --- a/src/android/net/util/DataStallUtils.java +++ b/src/android/net/util/DataStallUtils.java @@ -20,10 +20,11 @@ package android.net.util; * Collection of utilities for data stall. */ public class DataStallUtils { - /** - * Detect data stall via using dns timeout counts. - */ - public static final int DATA_STALL_EVALUATION_TYPE_DNS = 1; + /** Detect data stall using dns timeout counts. */ + public static final int DATA_STALL_EVALUATION_TYPE_DNS = 1 << 0; + /** Detect data stall using tcp connection fail rate. */ + public static final int DATA_STALL_EVALUATION_TYPE_TCP = 1 << 1; + // Default configuration values for data stall detection. public static final int DEFAULT_CONSECUTIVE_DNS_TIMEOUT_THRESHOLD = 5; public static final int DEFAULT_DATA_STALL_MIN_EVALUATE_TIME_MS = 60 * 1000; @@ -60,13 +61,55 @@ public class DataStallUtils { * Type: int * Valid values: * {@link #DATA_STALL_EVALUATION_TYPE_DNS} : Use dns as a signal. + * {@link #DATA_STALL_EVALUATION_TYPE_TCP} : Use tcp info as a signal. */ public static final String CONFIG_DATA_STALL_EVALUATION_TYPE = "data_stall_evaluation_type"; - public static final int DEFAULT_DATA_STALL_EVALUATION_TYPES = DATA_STALL_EVALUATION_TYPE_DNS; + public static final int DEFAULT_DATA_STALL_EVALUATION_TYPES = + DATA_STALL_EVALUATION_TYPE_DNS | DATA_STALL_EVALUATION_TYPE_TCP; // The default number of DNS events kept of the log kept for dns signal evaluation. Each event // is represented by a {@link com.android.server.connectivity.NetworkMonitor#DnsResult} objects. // It's also the size of array of {@link com.android.server.connectivity.nano.DnsEvent} kept in // metrics. Note that increasing the size may cause statsd log buffer bust. Need to check the // design in statsd when you try to increase the size. public static final int DEFAULT_DNS_LOG_SIZE = 20; + + /** + * The time interval for polling tcp info to observe the tcp health. + */ + public static String CONFIG_DATA_STALL_TCP_POLLING_INTERVAL = "data_stall_tcp_polling_interval"; + + /** + * Default polling interval to observe the tcp health. + */ + public static int DEFAULT_TCP_POLLING_INTERVAL_MS = 10_000; + + /** + * Default tcp packets fail rate to suspect as a data stall. + * + * Calculated by ((# of packets lost)+(# of packets retrans))/(# of packets sent)*100. Ideally, + * the percentage should be 100%. However, the ongoing packets may not be considered as neither + * lost or retrans yet. It will cause the percentage lower. + */ + public static final int DEFAULT_TCP_PACKETS_FAIL_PERCENTAGE = 80; + + /** + * The percentage of tcp packets fail rate to be suspected as a data stall. + * + * Type: int + * Valid values: 0 to 100. + */ + public static final String CONFIG_TCP_PACKETS_FAIL_RATE = "tcp_packets_fail_rate"; + + /** Corresponds to enum from bionic/libc/include/netinet/tcp.h. */ + public static final int TCP_ESTABLISHED = 1; + public static final int TCP_SYN_SENT = 2; + public static final int TCP_SYN_RECV = 3; + public static final int TCP_MONITOR_STATE_FILTER = + (1 << TCP_ESTABLISHED) | (1 << TCP_SYN_SENT) | (1 << TCP_SYN_RECV); + + /** + * Threshold for the minimal tcp packets count to evaluate data stall via tcp info. + */ + public static final int DEFAULT_DATA_STALL_MIN_PACKETS_THRESHOLD = 10; + public static final String CONFIG_MIN_PACKETS_THRESHOLD = "tcp_min_packets_threshold"; } diff --git a/src/com/android/networkstack/netlink/TcpSocketTracker.java b/src/com/android/networkstack/netlink/TcpSocketTracker.java new file mode 100644 index 0000000..8eb81b2 --- /dev/null +++ b/src/com/android/networkstack/netlink/TcpSocketTracker.java @@ -0,0 +1,498 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.android.networkstack.netlink; + +import static android.net.netlink.InetDiagMessage.InetDiagReqV2; +import static android.net.netlink.NetlinkConstants.NLMSG_DONE; +import static android.net.netlink.StructNlMsgHdr.NLM_F_DUMP; +import static android.net.netlink.StructNlMsgHdr.NLM_F_REQUEST; +import static android.net.util.DataStallUtils.CONFIG_MIN_PACKETS_THRESHOLD; +import static android.net.util.DataStallUtils.CONFIG_TCP_PACKETS_FAIL_RATE; +import static android.net.util.DataStallUtils.DEFAULT_DATA_STALL_MIN_PACKETS_THRESHOLD; +import static android.net.util.DataStallUtils.DEFAULT_TCP_PACKETS_FAIL_PERCENTAGE; +import static android.net.util.DataStallUtils.TCP_MONITOR_STATE_FILTER; +import static android.provider.DeviceConfig.NAMESPACE_CONNECTIVITY; +import static android.system.OsConstants.AF_INET; +import static android.system.OsConstants.AF_INET6; +import static android.system.OsConstants.AF_NETLINK; +import static android.system.OsConstants.IPPROTO_TCP; +import static android.system.OsConstants.NETLINK_INET_DIAG; +import static android.system.OsConstants.SOCK_CLOEXEC; +import static android.system.OsConstants.SOCK_DGRAM; +import static android.system.OsConstants.SOL_SOCKET; +import static android.system.OsConstants.SO_SNDTIMEO; + +import android.net.netlink.NetlinkSocket; +import android.net.netlink.StructInetDiagMsg; +import android.net.netlink.StructNlMsgHdr; +import android.net.util.NetworkStackUtils; +import android.net.util.SocketUtils; +import android.os.Build; +import android.os.SystemClock; +import android.system.ErrnoException; +import android.system.Os; +import android.system.StructTimeval; +import android.util.Log; +import android.util.LongSparseArray; +import android.util.SparseArray; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import com.android.internal.annotations.VisibleForTesting; +import com.android.networkstack.apishim.ShimUtils; + +import java.io.FileDescriptor; +import java.io.InterruptedIOException; +import java.net.SocketException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +/** + * Class for NetworkStack to send a SockDiag request and parse the returned tcp info. + * + * This should be only access from the NetworkMonitor statemahcine thread. + */ +public class TcpSocketTracker { + private static final String TAG = "TcpSocketTracker"; + private static final boolean DBG = false; + private static final int[] ADDRESS_FAMILIES = new int[] {AF_INET6, AF_INET}; + // Enough for parsing v1 tcp_info for more than 200 sockets per time. + private static final int DEFAULT_RECV_BUFSIZE = 60_000; + // Default I/O timeout time in ms of the socket request. + private static final long IO_TIMEOUT = 3_000L; + // Map to definition in bionic/libc/kernel/uapi/linux/netlink.h. + private static final int NLMSG_ALIGNTO = 4; + /** + * Flag for dumping struct tcp_info. + * Corresponding to enum definition in external/strace/linux/inet_diag.h. + */ + private static final int INET_DIAG_MEMINFO = 1; + @VisibleForTesting + public static final int SOCKDIAG_MSG_HEADER_SIZE = + StructNlMsgHdr.STRUCT_SIZE + StructInetDiagMsg.STRUCT_SIZE; + /** Cookie offset of an InetMagMessage header. */ + private static final int IDIAG_COOKIE_OFFSET = 44; + /** + * Gather the socket info. + * + * Key: The idiag_cookie value of the socket. See struct inet_diag_sockid in + * <linux_src>/include/uapi/linux/inet_diag.h + * Value: See {@Code SocketInfo} + */ + private final LongSparseArray<SocketInfo> mSocketInfos = new LongSparseArray<>(); + // Number of packets sent since the last received packet + private int mSentSinceLastRecv; + // The latest fail rate calculated by the latest tcp info. + private int mLatestPacketFailRate; + /** + * Request to send to kernel to request tcp info. + * + * Key: Ip family type. + * Value: Bytes array represent the {@Code InetDiagReqV2}. + */ + private final SparseArray<byte[]> mSockDiagMsg = new SparseArray<>(); + @VisibleForTesting + public final Dependencies mDependencies; + + public TcpSocketTracker(Dependencies dps) { + // Request tcp info from NetworkStack directly needs extra SELinux permission added after Q + // release. + mDependencies = dps; + if (!mDependencies.isTcpInfoParsingSupported()) return; + + // Build SocketDiag messages. + for (final int family : ADDRESS_FAMILIES) { + mSockDiagMsg.put( + family, + InetDiagReqV2(IPPROTO_TCP, + null /* local addr */, + null /* remote addr */, + family, + (short) (NLM_F_REQUEST | NLM_F_DUMP) /* flag */, + 0 /* pad */, + 1 << INET_DIAG_MEMINFO /* idiagExt */, + TCP_MONITOR_STATE_FILTER)); + } + } + + /** + * Request to send a SockDiag Netlink request. Receive and parse the returned message. This + * function should only be called from statemachine thread of NetworkMonitor. + * + * @Return if this polling request executes successfully or not. + * + * TODO: Need to filter socket info based on the target network. + */ + public boolean pollSocketsInfo() { + if (!mDependencies.isTcpInfoParsingSupported()) return false; + FileDescriptor fd = null; + try { + final long time = SystemClock.elapsedRealtime(); + fd = mDependencies.connectToKernel(); + + final TcpStat stat = new TcpStat(); + for (final int family : ADDRESS_FAMILIES) { + mDependencies.sendPollingRequest(fd, mSockDiagMsg.get(family)); + // Messages are composed with the following format. Stop parsing when receiving + // message with nlmsg_type NLMSG_DONE. + // +------------------+---------------+--------------+--------+ + // | Netlink Header | Family Header | Attributes | rtattr | + // | struct nlmsghdr | struct rtmsg | struct rtattr| data | + // +------------------+---------------+--------------+--------+ + // : : : + // +------------------+---------------+--------------+--------+ + // | Netlink Header | Family Header | Attributes | rtattr | + // | struct nlmsghdr | struct rtmsg | struct rtattr| data | + // +------------------+---------------+--------------+--------+ + final ByteBuffer bytes = mDependencies.recvMesssage(fd); + + while (enoughBytesRemainForValidNlMsg(bytes)) { + final StructNlMsgHdr nlmsghdr = StructNlMsgHdr.parse(bytes); + final int nlmsgLen = nlmsghdr.nlmsg_len; + log("pollSocketsInfo: nlmsghdr=" + nlmsghdr); + if (nlmsghdr.nlmsg_type == NLMSG_DONE) break; + + if (isValidInetDiagMsgSize(nlmsgLen)) { + // Get the socket cookie value. Composed by two Integers value. + // Corresponds to inet_diag_sockid in + // <linux_src>/include/uapi/linux/inet_diag.h + bytes.position(bytes.position() + IDIAG_COOKIE_OFFSET); + // It's stored in native with 2 int. Parse it as long for convenience. + final long cookie = bytes.getLong(); + // Skip the rest part of StructInetDiagMsg. + bytes.position(bytes.position() + 5 * Integer.BYTES); + final SocketInfo info = + parseSockInfo(bytes, family, nlmsgLen, time); + // Update TcpStats based on previous and current socket info. + stat.accumulate(calculateLatestPacketsStat(info, mSocketInfos.get(cookie))); + mSocketInfos.put(cookie, info); + } + } + } + // Calculate mSentSinceLastRecv and mLatestPacketFailRate. + mSentSinceLastRecv = (stat.receivedCount == 0) + ? (mSentSinceLastRecv + stat.sentCount) : 0; + mLatestPacketFailRate = ((stat.sentCount != 0) + ? ((stat.retransmitCount + stat.lostCount) * 100 / stat.sentCount) : 0); + + // Remove out-of-date socket info. + cleanupSocketInfo(time); + return true; + } catch (ErrnoException | SocketException | InterruptedIOException e) { + Log.e(TAG, "Fail to get TCP info via netlink.", e); + } finally { + NetworkStackUtils.closeSocketQuietly(fd); + } + + return false; + } + + private void cleanupSocketInfo(final long time) { + final int size = mSocketInfos.size(); + final List<Long> toRemove = new ArrayList<Long>(); + for (int i = 0; i < size; i++) { + final long key = mSocketInfos.keyAt(i); + if (mSocketInfos.get(key).updateTime < time) { + toRemove.add(key); + } + } + for (final Long key : toRemove) { + mSocketInfos.remove(key); + } + } + + /** Parse a {@code SocketInfo} from the given position of the given byte buffer. */ + @VisibleForTesting + @NonNull + SocketInfo parseSockInfo(@NonNull final ByteBuffer bytes, final int family, + final int nlmsgLen, final long time) { + final int remainingDataSize = bytes.position() + nlmsgLen - SOCKDIAG_MSG_HEADER_SIZE; + TcpInfo tcpInfo = null; + int mark = SocketInfo.INIT_MARK_VALUE; + // Get a tcp_info. + while (bytes.position() < remainingDataSize) { + final RoutingAttribute rtattr = + new RoutingAttribute(bytes.getShort(), bytes.getShort()); + final int dataLen = rtattr.getDataLength(); + if (rtattr.rtaType == RoutingAttribute.INET_DIAG_INFO) { + tcpInfo = TcpInfo.parse(bytes, dataLen); + } else if (rtattr.rtaType == RoutingAttribute.INET_DIAG_MARK) { + mark = bytes.getInt(); + } else { + // Data provided by kernel will include both valid data and padding data. The data + // len provided from kernel indicates the valid data size. Readers must deduce the + // alignment by themselves. + skipRemainingAttributesBytesAligned(bytes, dataLen); + } + } + final SocketInfo info = new SocketInfo(tcpInfo, family, mark, time); + log("pollSocketsInfo, " + info); + return info; + } + + /** + * Return if data stall is suspected or not by checking the latest tcp connection fail rate. + * Expect to check after polling the latest status. This function should only be called from + * statemachine thread of NetworkMonitor. + */ + public boolean isDataStallSuspected() { + if (!mDependencies.isTcpInfoParsingSupported()) return false; + return (getLatestPacketFailRate() >= getTcpPacketsFailRateThreshold()); + } + + /** Calculate the change between the {@param current} and {@param previous}. */ + private TcpStat calculateLatestPacketsStat(@NonNull final SocketInfo current, + @Nullable final SocketInfo previous) { + final TcpStat stat = new TcpStat(); + + if (current.tcpInfo != null) { + stat.sentCount = current.tcpInfo.getValue(TcpInfo.Field.SEGS_OUT).intValue(); + stat.receivedCount = current.tcpInfo.getValue(TcpInfo.Field.SEGS_IN).intValue(); + stat.lostCount = current.tcpInfo.getValue(TcpInfo.Field.LOST).intValue(); + stat.retransmitCount = current.tcpInfo.getValue(TcpInfo.Field.RETRANSMITS).intValue(); + } + if (previous != null && previous.tcpInfo != null) { + stat.sentCount -= previous.tcpInfo.getValue(TcpInfo.Field.SEGS_OUT).intValue(); + stat.receivedCount -= previous.tcpInfo.getValue(TcpInfo.Field.SEGS_IN).intValue(); + stat.lostCount -= previous.tcpInfo.getValue(TcpInfo.Field.LOST).intValue(); + stat.retransmitCount -= previous.tcpInfo.getValue(TcpInfo.Field.RETRANSMITS).intValue(); + } + + return stat; + } + + /** + * Get tcp connection fail rate based on packet lost and retransmission count. + */ + public int getLatestPacketFailRate() { + if (!mDependencies.isTcpInfoParsingSupported()) return 0; + // Only return fail rate if device sent enough packets. + if (getSentSinceLastRecv() < getMinPacketsThreshold()) return 0; + return mLatestPacketFailRate; + } + + /** + * Return the number of packets sent since last received. Note that this number is calculated + * between each polling period, not an accurate number. + */ + public int getSentSinceLastRecv() { + if (!mDependencies.isTcpInfoParsingSupported()) return 0; + return mSentSinceLastRecv; + } + + private int getMinPacketsThreshold() { + return mDependencies.getDeviceConfigPropertyInt(NAMESPACE_CONNECTIVITY, + CONFIG_MIN_PACKETS_THRESHOLD, DEFAULT_DATA_STALL_MIN_PACKETS_THRESHOLD); + } + + private int getTcpPacketsFailRateThreshold() { + return mDependencies.getDeviceConfigPropertyInt(NAMESPACE_CONNECTIVITY, + CONFIG_TCP_PACKETS_FAIL_RATE, DEFAULT_TCP_PACKETS_FAIL_PERCENTAGE); + } + + /** Check if the length and position of the given ByteBuffer is valid for a nlmsghdr message. */ + @VisibleForTesting + static boolean enoughBytesRemainForValidNlMsg(@NonNull final ByteBuffer bytes) { + return bytes.remaining() >= StructNlMsgHdr.STRUCT_SIZE; + } + + private static boolean isValidInetDiagMsgSize(final int nlMsgLen) { + return nlMsgLen >= SOCKDIAG_MSG_HEADER_SIZE; + } + + /** + * Method to skip the remaining attributes bytes. + * Corresponds to NLMSG_NEXT in bionic/libc/kernel/uapi/linux/netlink.h. + * + * @param buffer the target ByteBuffer + * @param len the remaining length to skip. + */ + private void skipRemainingAttributesBytesAligned(@NonNull final ByteBuffer buffer, + final int len) { + // Data in {@Code RoutingAttribute} is followed after header with size {@Code NLMSG_ALIGNTO} + // bytes long for each block. Next attribute will start after the padding bytes if any. + // If all remaining bytes after header are valid in a data block, next attr will just start + // after valid bytes. + // + // E.g. With NLMSG_ALIGNTO(4), an attr struct with length 5 means 1 byte valid data remains + // after header and 3(4-1) padding bytes. Next attr with length 8 will start after the + // padding bytes and contain 4(8-4) valid bytes of data. The next attr start after the + // valid bytes, like: + // + // [HEADER(L=5)][ 4-Bytes DATA ][ HEADER(L=8) ][4 bytes DATA][Next attr] + // [ 5 valid bytes ][3 padding bytes ][ 8 valid bytes ] ... + final int cur = buffer.position(); + buffer.position(cur + ((len + NLMSG_ALIGNTO - 1) & ~(NLMSG_ALIGNTO - 1))); + } + + private void log(final String str) { + if (DBG) Log.d(TAG, str); + } + + /** + * Corresponds to {@code struct rtattr} from bionic/libc/kernel/uapi/linux/rtnetlink.h + * + * struct rtattr { + * unsigned short rta_len; // Length of option + * unsigned short rta_type; // Type of option + * // Data follows + * }; + */ + class RoutingAttribute { + public static final int HEADER_LENGTH = 4; + // Corresponds to enum definition in bionic/libc/kernel/uapi/linux/inet_diag.h + public static final int INET_DIAG_INFO = 2; + public static final int INET_DIAG_MARK = 15; + + public final short rtaLen; // The whole valid size of the struct. + public final short rtaType; + + RoutingAttribute(final short len, final short type) { + rtaLen = len; + rtaType = type; + } + public int getDataLength() { + return rtaLen - HEADER_LENGTH; + } + } + + /** + * Data class for keeping the socket info. + */ + @VisibleForTesting + class SocketInfo { + // Initial mark value corresponds to the initValue in system/netd/include/Fwmark.h. + public static final int INIT_MARK_VALUE = 0; + @Nullable + public final TcpInfo tcpInfo; + // One of {@code AF_INET6, AF_INET}. + public final int ipFamily; + // "fwmark" value of the socket queried from native. + // TODO: Used to do bit-wise '&' operation to get netId information. + public final int fwmark; + // Socket information updated elapsed real time. + public final long updateTime; + + SocketInfo(@Nullable final TcpInfo info, final int family, final int mark, + final long time) { + tcpInfo = info; + ipFamily = family; + updateTime = time; + fwmark = mark; + } + + @Override + public String toString() { + return "SocketInfo {Type:" + ipTypeToString(ipFamily) + ", " + + tcpInfo + ", mark:" + fwmark + " updated at " + updateTime + "}"; + } + + private String ipTypeToString(final int type) { + if (type == AF_INET) { + return "IP"; + } else if (type == AF_INET6) { + return "IPV6"; + } else { + return "UNKNOWN"; + } + } + } + + /** + * private data class only for storing the Tcp statistic for calculating the fail rate and sent + * count + * */ + private class TcpStat { + public int sentCount; + public int lostCount; + public int retransmitCount; + public int receivedCount; + + void accumulate(final TcpStat stat) { + sentCount += stat.sentCount; + lostCount += stat.lostCount; + receivedCount += stat.receivedCount; + retransmitCount += stat.retransmitCount; + } + } + + + /** + * Dependencies class for testing. + */ + @VisibleForTesting + public static class Dependencies { + /** + * Connect to kernel via netlink socket. + * + * @return fd the fileDescriptor of the socket. + * Throw ErrnoException, SocketException if the exception is thrown. + */ + public FileDescriptor connectToKernel() throws ErrnoException, SocketException { + final FileDescriptor fd = + Os.socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_INET_DIAG); + Os.connect( + fd, SocketUtils.makeNetlinkSocketAddress(0 /* portId */, 0 /* groupMask */)); + + return fd; + } + /** + * Send composed message request to kernel. + * @param fd see {@Code FileDescriptor} + * @param msg the byte array represent the request message to write to kernel. + * + * Throw ErrnoException or InterruptedIOException if the exception is thrown. + */ + public void sendPollingRequest(@NonNull final FileDescriptor fd, @NonNull final byte[] msg) + throws ErrnoException, InterruptedIOException { + Os.setsockoptTimeval(fd, SOL_SOCKET, SO_SNDTIMEO, + StructTimeval.fromMillis(IO_TIMEOUT)); + Os.write(fd, msg, 0 /* byteOffset */, msg.length); + } + + /** + * Look up the value of a property in DeviceConfig. + * @param namespace The namespace containing the property to look up. + * @param name The name of the property to look up. + * @param defaultValue The value to return if the property does not exist or has no non-null + * value. + * @return the corresponding value, or defaultValue if none exists. + */ + public int getDeviceConfigPropertyInt(@NonNull final String namespace, + @NonNull final String name, final int defaultValue) { + return NetworkStackUtils.getDeviceConfigPropertyInt(namespace, name, defaultValue); + } + + /** + * Return if request tcp info via netlink socket is supported or not. + */ + public boolean isTcpInfoParsingSupported() { + // Request tcp info from NetworkStack directly needs extra SELinux permission added + // after Q release. + return ShimUtils.isReleaseOrDevelopmentApiAbove(Build.VERSION_CODES.Q); + } + + /** + * Receive the request message from kernel via given fd. + */ + public ByteBuffer recvMesssage(@NonNull final FileDescriptor fd) + throws ErrnoException, InterruptedIOException { + return NetlinkSocket.recvMessage(fd, DEFAULT_RECV_BUFSIZE, IO_TIMEOUT); + } + } +} diff --git a/src/com/android/server/connectivity/NetworkMonitor.java b/src/com/android/server/connectivity/NetworkMonitor.java index bda0c9a..63c294c 100644 --- a/src/com/android/server/connectivity/NetworkMonitor.java +++ b/src/com/android/server/connectivity/NetworkMonitor.java @@ -45,13 +45,16 @@ import static android.net.metrics.ValidationProbeEvent.PROBE_PRIVDNS; import static android.net.util.DataStallUtils.CONFIG_DATA_STALL_CONSECUTIVE_DNS_TIMEOUT_THRESHOLD; import static android.net.util.DataStallUtils.CONFIG_DATA_STALL_EVALUATION_TYPE; import static android.net.util.DataStallUtils.CONFIG_DATA_STALL_MIN_EVALUATE_INTERVAL; +import static android.net.util.DataStallUtils.CONFIG_DATA_STALL_TCP_POLLING_INTERVAL; import static android.net.util.DataStallUtils.CONFIG_DATA_STALL_VALID_DNS_TIME_THRESHOLD; import static android.net.util.DataStallUtils.DATA_STALL_EVALUATION_TYPE_DNS; +import static android.net.util.DataStallUtils.DATA_STALL_EVALUATION_TYPE_TCP; import static android.net.util.DataStallUtils.DEFAULT_CONSECUTIVE_DNS_TIMEOUT_THRESHOLD; import static android.net.util.DataStallUtils.DEFAULT_DATA_STALL_EVALUATION_TYPES; import static android.net.util.DataStallUtils.DEFAULT_DATA_STALL_MIN_EVALUATE_TIME_MS; import static android.net.util.DataStallUtils.DEFAULT_DATA_STALL_VALID_DNS_TIME_THRESHOLD_MS; import static android.net.util.DataStallUtils.DEFAULT_DNS_LOG_SIZE; +import static android.net.util.DataStallUtils.DEFAULT_TCP_POLLING_INTERVAL_MS; import static android.net.util.NetworkStackUtils.CAPTIVE_PORTAL_FALLBACK_PROBE_SPECS; import static android.net.util.NetworkStackUtils.CAPTIVE_PORTAL_FALLBACK_URL; import static android.net.util.NetworkStackUtils.CAPTIVE_PORTAL_HTTPS_URL; @@ -125,6 +128,7 @@ import com.android.internal.util.TrafficStatsConstants; import com.android.networkstack.R; import com.android.networkstack.metrics.DataStallDetectionStats; import com.android.networkstack.metrics.DataStallStatsUtils; +import com.android.networkstack.netlink.TcpSocketTracker; import com.android.networkstack.util.DnsUtils; import java.io.IOException; @@ -273,6 +277,10 @@ public class NetworkMonitor extends StateMachine { */ private static final int EVENT_NETWORK_CAPABILITIES_CHANGED = 20; + /** + * Message to self to poll current tcp status from kernel. + */ + private static final int EVENT_POLL_TCPINFO = 21; // Start mReevaluateDelayMs at this value and double. private static final int INITIAL_REEVALUATE_DELAY_MS = 1000; private static final int MAX_REEVALUATE_DELAY_MS = 10 * 60 * 1000; @@ -300,7 +308,7 @@ public class NetworkMonitor extends StateMachine { private final IpConnectivityLog mMetricsLog; private final Dependencies mDependencies; private final DataStallStatsUtils mDetectionStatsUtils; - + private final TcpSocketTracker mTcpTracker; // Configuration values for captive portal detection probes. private final String mCaptivePortalUserAgent; private final URL mCaptivePortalHttpsUrl; @@ -434,6 +442,7 @@ public class NetworkMonitor extends StateMachine { mDataStallMinEvaluateTime = getDataStallMinEvaluateTime(); mDataStallValidDnsTimeThreshold = getDataStallValidDnsTimeThreshold(); mDataStallEvaluationType = getDataStallEvaluationType(); + mTcpTracker = new TcpSocketTracker(new TcpSocketTracker.Dependencies()); // Provide empty LinkProperties and NetworkCapabilities to make sure they are never null, // even before notifyNetworkConnected. @@ -726,6 +735,7 @@ public class NetworkMonitor extends StateMachine { } mEvaluationState.reportEvaluationResult(result, null /* redirectUrl */); mValidations++; + sendTcpPollingEvent(); } @Override @@ -740,10 +750,16 @@ public class NetworkMonitor extends StateMachine { break; case EVENT_DNS_NOTIFICATION: mDnsStallDetector.accumulateConsecutiveDnsTimeoutCount(message.arg1); - if (isDataStall()) { - mCollectDataStallMetrics = true; - validationLog("Suspecting data stall, reevaluate"); + if (evaluateDataStall()) { + transitionTo(mEvaluatingState); + } + break; + case EVENT_POLL_TCPINFO: + // Transit if retrieve socket info is succeeded and suspected as a stall. + if (getTcpSocketTracker().pollSocketsInfo() && evaluateDataStall()) { transitionTo(mEvaluatingState); + } else { + sendTcpPollingEvent(); } break; default: @@ -751,6 +767,29 @@ public class NetworkMonitor extends StateMachine { } return HANDLED; } + + boolean evaluateDataStall() { + if (isDataStall()) { + // TODO: Add tcp info into metrics. + mCollectDataStallMetrics = true; + validationLog("Suspecting data stall, reevaluate"); + return true; + } + return false; + } + + @Override + public void exit() { + // Not useful for non-ValidatedState. + removeMessages(EVENT_POLL_TCPINFO); + } + } + + @VisibleForTesting + void sendTcpPollingEvent() { + if (isValidationRequired()) { + sendMessageDelayed(EVENT_POLL_TCPINFO, getTcpPollingInterval()); + } } private void writeDataStallStats(@NonNull final CaptivePortalProbeResult result) { @@ -1341,6 +1380,12 @@ public class NetworkMonitor extends StateMachine { DEFAULT_DATA_STALL_EVALUATION_TYPES); } + private int getTcpPollingInterval() { + return mDependencies.getDeviceConfigPropertyInt(NAMESPACE_CONNECTIVITY, + CONFIG_DATA_STALL_TCP_POLLING_INTERVAL, + DEFAULT_TCP_POLLING_INTERVAL_MS); + } + private URL[] makeCaptivePortalFallbackUrls() { try { final String firstUrl = mDependencies.getSetting(mContext, CAPTIVE_PORTAL_FALLBACK_URL, @@ -2060,12 +2105,16 @@ public class NetworkMonitor extends StateMachine { } } - @VisibleForTesting protected DnsStallDetector getDnsStallDetector() { return mDnsStallDetector; } + @VisibleForTesting + protected TcpSocketTracker getTcpSocketTracker() { + return mTcpTracker; + } + private boolean dataStallEvaluateTypeEnabled(int type) { return (mDataStallEvaluationType & type) != 0; } @@ -2077,7 +2126,7 @@ public class NetworkMonitor extends StateMachine { @VisibleForTesting protected boolean isDataStall() { - boolean result = false; + Boolean result = null; // Reevaluation will generate traffic. Thus, set a minimal reevaluation timer to limit the // possible traffic cost in metered network. if (!mNetworkCapabilities.hasCapability(NET_CAPABILITY_NOT_METERED) @@ -2085,11 +2134,22 @@ public class NetworkMonitor extends StateMachine { < mDataStallMinEvaluateTime)) { return false; } + // Check TCP signal. Suspect it may be a data stall if : + // 1. TCP connection fail rate(lost+retrans) is higher than threshold. + // 2. Accumulate enough packets count. + // TODO: Need to filter per target network. + if (dataStallEvaluateTypeEnabled(DATA_STALL_EVALUATION_TYPE_TCP)) { + if (getTcpSocketTracker().getSentSinceLastRecv() > 0) { + result = false; + } else if (getTcpSocketTracker().isDataStallSuspected()) { + result = true; + } + } // Check dns signal. Suspect it may be a data stall if both : // 1. The number of consecutive DNS query timeouts >= mConsecutiveDnsTimeoutThreshold. // 2. Those consecutive DNS queries happened in the last mValidDataStallDnsTimeThreshold ms. - if (dataStallEvaluateTypeEnabled(DATA_STALL_EVALUATION_TYPE_DNS)) { + if ((result == null) && dataStallEvaluateTypeEnabled(DATA_STALL_EVALUATION_TYPE_DNS)) { if (mDnsStallDetector.isDataStallSuspected(mConsecutiveDnsTimeoutThreshold, mDataStallValidDnsTimeThreshold)) { result = true; @@ -2099,10 +2159,12 @@ public class NetworkMonitor extends StateMachine { if (VDBG_STALL) { log("isDataStall: result=" + result + ", consecutive dns timeout count=" - + mDnsStallDetector.getConsecutiveTimeoutCount()); + + mDnsStallDetector.getConsecutiveTimeoutCount() + + ", tcp packets received=" + getTcpSocketTracker().getSentSinceLastRecv() + + ", tcp fail rate=" + getTcpSocketTracker().getLatestPacketFailRate()); } - return result; + return (result == null) ? false : result; } // Class to keep state of evaluation results and probe results. |