diff options
-rw-r--r-- | core/sysprop/Android.bp | 8 | ||||
-rw-r--r-- | core/sysprop/WatchdogProperties.sysprop | 45 | ||||
-rw-r--r-- | core/sysprop/api/com.android.sysprop.watchdog-current.txt | 20 | ||||
-rw-r--r-- | core/sysprop/api/com.android.sysprop.watchdog-latest.txt | 20 | ||||
-rw-r--r-- | services/core/Android.bp | 1 | ||||
-rw-r--r-- | services/core/java/com/android/server/Watchdog.java | 121 |
6 files changed, 215 insertions, 0 deletions
diff --git a/core/sysprop/Android.bp b/core/sysprop/Android.bp index 7f20a0ba6642..237ede2006ea 100644 --- a/core/sysprop/Android.bp +++ b/core/sysprop/Android.bp @@ -19,3 +19,11 @@ sysprop_library { api_packages: ["android.sysprop"], vendor_available: false, } + +sysprop_library { + name: "com.android.sysprop.watchdog", + srcs: ["WatchdogProperties.sysprop"], + property_owner: "Platform", + api_packages: ["android.sysprop"], + vendor_available: false, +} diff --git a/core/sysprop/WatchdogProperties.sysprop b/core/sysprop/WatchdogProperties.sysprop new file mode 100644 index 000000000000..1bcc773a9a5d --- /dev/null +++ b/core/sysprop/WatchdogProperties.sysprop @@ -0,0 +1,45 @@ +# Copyright (C) 2020 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module: "android.sysprop.WatchdogProperties" +owner: Platform + +# To escape the watchdog timeout loop, fatal reboot the system when +# watchdog timed out 'fatal_count' times in 'fatal_window_second' +# seconds, if both values are not 0. Default value of both is 0. +prop { + api_name: "fatal_count" + type: Integer + prop_name: "framework_watchdog.fatal_count" + scope: Internal + access: Readonly +} + +prop { + api_name: "fatal_window_second" + type: Integer + prop_name: "framework_watchdog.fatal_window.second" + scope: Internal + access: Readonly +} + +# The fatal counting can be disabled by setting property +# 'is_fatal_ignore' to true. +prop { + api_name: "is_fatal_ignore" + type: Boolean + prop_name: "persist.debug.framework_watchdog.fatal_ignore" + scope: Internal + access: Readonly +} diff --git a/core/sysprop/api/com.android.sysprop.watchdog-current.txt b/core/sysprop/api/com.android.sysprop.watchdog-current.txt new file mode 100644 index 000000000000..d901aef945c9 --- /dev/null +++ b/core/sysprop/api/com.android.sysprop.watchdog-current.txt @@ -0,0 +1,20 @@ +props { + module: "android.sysprop.WatchdogProperties" + prop { + api_name: "fatal_count" + type: Integer + scope: Internal + prop_name: "framework_watchdog.fatal_count" + } + prop { + api_name: "fatal_window_second" + type: Integer + scope: Internal + prop_name: "framework_watchdog.fatal_window.second" + } + prop { + api_name: "is_fatal_ignore" + scope: Internal + prop_name: "persist.debug.framework_watchdog.fatal_ignore" + } +} diff --git a/core/sysprop/api/com.android.sysprop.watchdog-latest.txt b/core/sysprop/api/com.android.sysprop.watchdog-latest.txt new file mode 100644 index 000000000000..d901aef945c9 --- /dev/null +++ b/core/sysprop/api/com.android.sysprop.watchdog-latest.txt @@ -0,0 +1,20 @@ +props { + module: "android.sysprop.WatchdogProperties" + prop { + api_name: "fatal_count" + type: Integer + scope: Internal + prop_name: "framework_watchdog.fatal_count" + } + prop { + api_name: "fatal_window_second" + type: Integer + scope: Internal + prop_name: "framework_watchdog.fatal_window.second" + } + prop { + api_name: "is_fatal_ignore" + scope: Internal + prop_name: "persist.debug.framework_watchdog.fatal_ignore" + } +} diff --git a/services/core/Android.bp b/services/core/Android.bp index 776c8f5912ff..431555bb57d5 100644 --- a/services/core/Android.bp +++ b/services/core/Android.bp @@ -132,6 +132,7 @@ java_library_static { "netd_aidl_interfaces-platform-java", "overlayable_policy_aidl-java", "SurfaceFlingerProperties", + "com.android.sysprop.watchdog", ], } diff --git a/services/core/java/com/android/server/Watchdog.java b/services/core/java/com/android/server/Watchdog.java index 17c0970c5ca7..418deb801085 100644 --- a/services/core/java/com/android/server/Watchdog.java +++ b/services/core/java/com/android/server/Watchdog.java @@ -23,7 +23,9 @@ import android.content.Intent; import android.content.IntentFilter; import android.hidl.manager.V1_0.IServiceManager; import android.os.Binder; +import android.os.Build; import android.os.Debug; +import android.os.FileUtils; import android.os.Handler; import android.os.IPowerManager; import android.os.Looper; @@ -31,10 +33,12 @@ import android.os.Process; import android.os.RemoteException; import android.os.ServiceManager; import android.os.SystemClock; +import android.os.SystemProperties; import android.util.EventLog; import android.util.Log; import android.util.Slog; import android.util.SparseArray; +import android.sysprop.WatchdogProperties; import com.android.internal.os.ProcessCpuTracker; import com.android.internal.os.ZygoteConnectionConstants; @@ -42,12 +46,16 @@ import com.android.internal.util.FrameworkStatsLog; import com.android.server.am.ActivityManagerService; import com.android.server.wm.SurfaceAnimationThread; +import java.io.BufferedReader; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; +import java.util.concurrent.TimeUnit; import java.util.HashSet; import java.util.List; @@ -75,6 +83,12 @@ public class Watchdog extends Thread { private static final int WAITED_HALF = 2; private static final int OVERDUE = 3; + // Track watchdog timeout history and break the crash loop if there is. + private static final String TIMEOUT_HISTORY_FILE = "/data/system/watchdog-timeout-history.txt"; + private static final String PROP_FATAL_LOOP_COUNT = "framework_watchdog.fatal_count"; + private static final String PROP_FATAL_LOOP_WINDOWS_SECS = + "framework_watchdog.fatal_window.second"; + // Which native processes to dump into dropbox's stack traces public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { "/system/bin/audioserver", @@ -688,6 +702,10 @@ public class Watchdog extends Thread { Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject); WatchdogDiagnostics.diagnoseCheckers(blockedCheckers); Slog.w(TAG, "*** GOODBYE!"); + if (!Build.IS_USER && isCrashLoopFound() + && !WatchdogProperties.is_fatal_ignore().orElse(false)) { + breakCrashLoop(); + } Process.killProcess(Process.myPid()); System.exit(10); } @@ -705,4 +723,107 @@ public class Watchdog extends Thread { Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e); } } + + private void resetTimeoutHistory() { + writeTimeoutHistory(new ArrayList<String>()); + } + + private void writeTimeoutHistory(Iterable<String> crashHistory) { + String data = String.join(",", crashHistory); + + try (FileWriter writer = new FileWriter(TIMEOUT_HISTORY_FILE)) { + writer.write(SystemProperties.get("ro.boottime.zygote")); + writer.write(":"); + writer.write(data); + } catch (IOException e) { + Slog.e(TAG, "Failed to write file " + TIMEOUT_HISTORY_FILE, e); + } + } + + private String[] readTimeoutHistory() { + final String[] emptyStringArray = {}; + + try (BufferedReader reader = new BufferedReader(new FileReader(TIMEOUT_HISTORY_FILE))) { + String line = reader.readLine(); + if (line == null) { + return emptyStringArray; + } + + String[] data = line.trim().split(":"); + String boottime = data.length >= 1 ? data[0] : ""; + String history = data.length >= 2 ? data[1] : ""; + if (SystemProperties.get("ro.boottime.zygote").equals(boottime) && !history.isEmpty()) { + return history.split(","); + } else { + return emptyStringArray; + } + } catch (FileNotFoundException e) { + return emptyStringArray; + } catch (IOException e) { + Slog.e(TAG, "Failed to read file " + TIMEOUT_HISTORY_FILE, e); + return emptyStringArray; + } + } + + private boolean hasActiveUsbConnection() { + try { + final String state = FileUtils.readTextFile( + new File("/sys/class/android_usb/android0/state"), + 128 /*max*/, null /*ellipsis*/).trim(); + if ("CONFIGURED".equals(state)) { + return true; + } + } catch (IOException e) { + Slog.w(TAG, "Failed to determine if device was on USB", e); + } + return false; + } + + private boolean isCrashLoopFound() { + int fatalCount = WatchdogProperties.fatal_count().orElse(0); + long fatalWindowMs = TimeUnit.SECONDS.toMillis( + WatchdogProperties.fatal_window_second().orElse(0)); + if (fatalCount == 0 || fatalWindowMs == 0) { + if (fatalCount != fatalWindowMs) { + Slog.w(TAG, String.format("sysprops '%s' and '%s' should be set or unset together", + PROP_FATAL_LOOP_COUNT, PROP_FATAL_LOOP_WINDOWS_SECS)); + } + return false; + } + + // new-history = [last (fatalCount - 1) items in old-history] + [nowMs]. + long nowMs = SystemClock.elapsedRealtime(); // Time since boot including deep sleep. + String[] rawCrashHistory = readTimeoutHistory(); + ArrayList<String> crashHistory = new ArrayList<String>(Arrays.asList(Arrays.copyOfRange( + rawCrashHistory, + Math.max(0, rawCrashHistory.length - fatalCount - 1), + rawCrashHistory.length))); + // Something wrong here. + crashHistory.add(String.valueOf(nowMs)); + writeTimeoutHistory(crashHistory); + + // Returns false if the device has an active USB connection. + if (hasActiveUsbConnection()) { + return false; + } + + long firstCrashMs; + try { + firstCrashMs = Long.parseLong(crashHistory.get(0)); + } catch (NumberFormatException t) { + Slog.w(TAG, "Failed to parseLong " + crashHistory.get(0), t); + resetTimeoutHistory(); + return false; + } + return crashHistory.size() >= fatalCount && nowMs - firstCrashMs < fatalWindowMs; + } + + private void breakCrashLoop() { + try (FileWriter kmsg = new FileWriter("/dev/kmsg_debug", /* append= */ true)) { + kmsg.append("Fatal reset to escape the system_server crashing loop\n"); + } catch (IOException e) { + Slog.w(TAG, "Failed to append to kmsg", e); + } + doSysRq('c'); + } } |