summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-09-04 23:00:21 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-09-04 23:00:21 +0000
commit9f1e89dee9f1d4ad146e9c0577afecec13493acf (patch)
tree45eaf450516b47abfbcff12ed8b93ebd7e3dded3
parentd83b951fa3f6da1174faa10c85b6cf8626d0c05a (diff)
parent8992f92aab381f0dc85cfd4003d935263f8451c8 (diff)
Snap for 7705048 from 8992f92aab381f0dc85cfd4003d935263f8451c8 to sc-qpr1-release
Change-Id: I610491001866cec4160fbbd7af1d10e5759d74d2
-rw-r--r--openjdkjvm/OpenjdkJvm.cc1
-rw-r--r--openjdkjvmti/ti_thread.cc1
-rw-r--r--runtime/gc/collector/concurrent_copying.cc50
-rw-r--r--runtime/jit/profile_saver.cc2
-rw-r--r--runtime/native/dalvik_system_VMStack.cc1
-rw-r--r--runtime/native/java_lang_Thread.cc1
-rw-r--r--runtime/runtime.cc4
-rw-r--r--runtime/runtime.h14
-rw-r--r--runtime/thread.cc6
-rw-r--r--runtime/thread.h5
-rw-r--r--runtime/thread_list.cc30
-rw-r--r--runtime/thread_list.h5
-rw-r--r--test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc2
13 files changed, 78 insertions, 44 deletions
diff --git a/openjdkjvm/OpenjdkJvm.cc b/openjdkjvm/OpenjdkJvm.cc
index d64086d43c..9b514af452 100644
--- a/openjdkjvm/OpenjdkJvm.cc
+++ b/openjdkjvm/OpenjdkJvm.cc
@@ -423,7 +423,6 @@ JNIEXPORT void JVM_SetNativeThreadName(JNIEnv* env, jobject jthread, jstring jav
art::Thread* thread;
{
thread = thread_list->SuspendThreadByPeer(jthread,
- true,
art::SuspendReason::kInternal,
&timed_out);
}
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index bb8fa3b694..a9a6ee8895 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -898,7 +898,6 @@ jvmtiError ThreadUtil::SuspendOther(art::Thread* self,
bool timeout = true;
art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
target_jthread,
- /* request_suspension= */ true,
art::SuspendReason::kForUserCode,
&timeout);
if (ret_target == nullptr && !timeout) {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 538d7bef7c..936b9199b5 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1630,11 +1630,16 @@ void ConcurrentCopying::CopyingPhase() {
// for the last time before transitioning to the shared mark stack mode, which would process new
// refs that may have been concurrently pushed onto the mark stack during the ProcessMarkStack()
// call above. At the same time, disable weak ref accesses using a per-thread flag. It's
- // important to do these together in a single checkpoint so that we can ensure that mutators
- // won't newly gray objects and push new refs onto the mark stack due to weak ref accesses and
+ // important to do these together so that we can ensure that mutators won't
+ // newly gray objects and push new refs onto the mark stack due to weak ref accesses and
// mutators safely transition to the shared mark stack mode (without leaving unprocessed refs on
// the thread-local mark stacks), without a race. This is why we use a thread-local weak ref
// access flag Thread::tls32_.weak_ref_access_enabled_ instead of the global ones.
+ // We must use a stop-the-world pause to disable weak ref access. A checkpoint may lead to a
+ // deadlock if one mutator acquires a low-level mutex and then gets blocked while accessing
+ // a weak-ref (after participating in the checkpoint), and another mutator indefinitely waits
+ // for the mutex before it participates in the checkpoint. Consequently, the gc-thread blocks
+ // forever as the checkpoint never finishes (See runtime/mutator_gc_coord.md).
SwitchToSharedMarkStackMode();
CHECK(!self->GetWeakRefAccessEnabled());
// Now that weak refs accesses are disabled, once we exhaust the shared mark stack again here
@@ -2044,21 +2049,36 @@ class ConcurrentCopying::AssertToSpaceInvariantFieldVisitor {
void ConcurrentCopying::RevokeThreadLocalMarkStacks(bool disable_weak_ref_access,
Closure* checkpoint_callback) {
Thread* self = Thread::Current();
- RevokeThreadLocalMarkStackCheckpoint check_point(this, disable_weak_ref_access);
+ Locks::mutator_lock_->AssertSharedHeld(self);
ThreadList* thread_list = Runtime::Current()->GetThreadList();
- gc_barrier_->Init(self, 0);
- size_t barrier_count = thread_list->RunCheckpoint(&check_point, checkpoint_callback);
- // If there are no threads to wait which implys that all the checkpoint functions are finished,
- // then no need to release the mutator lock.
- if (barrier_count == 0) {
- return;
- }
- Locks::mutator_lock_->SharedUnlock(self);
- {
- ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
- gc_barrier_->Increment(self, barrier_count);
+ RevokeThreadLocalMarkStackCheckpoint check_point(this, disable_weak_ref_access);
+ if (disable_weak_ref_access) {
+ // We're the only thread that could possibly ask for exclusive access here.
+ Locks::mutator_lock_->SharedUnlock(self);
+ {
+ ScopedPause pause(this);
+ MutexLock mu(self, *Locks::thread_list_lock_);
+ checkpoint_callback->Run(self);
+ for (Thread* thread : thread_list->GetList()) {
+ check_point.Run(thread);
+ }
+ }
+ Locks::mutator_lock_->SharedLock(self);
+ } else {
+ gc_barrier_->Init(self, 0);
+ size_t barrier_count = thread_list->RunCheckpoint(&check_point, checkpoint_callback);
+ // If there are no threads to wait which implys that all the checkpoint functions are finished,
+ // then no need to release the mutator lock.
+ if (barrier_count == 0) {
+ return;
+ }
+ Locks::mutator_lock_->SharedUnlock(self);
+ {
+ ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+ gc_barrier_->Increment(self, barrier_count);
+ }
+ Locks::mutator_lock_->SharedLock(self);
}
- Locks::mutator_lock_->SharedLock(self);
}
void ConcurrentCopying::RevokeThreadLocalMarkStack(Thread* thread) {
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 425eadca41..b86badcba5 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -189,7 +189,7 @@ void ProfileSaver::Run() {
// We might have been woken up by a huge number of notifications to guarantee saving.
// If we didn't meet the minimum saving period go back to sleep (only if missed by
// a reasonable margin).
- uint64_t min_save_period_ns = options_.GetMinSavePeriodMs();
+ uint64_t min_save_period_ns = MsToNs(options_.GetMinSavePeriodMs());
while (min_save_period_ns * 0.9 > sleep_time) {
{
MutexLock mu(self, wait_lock_);
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 9d2dfac069..e88516e248 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -59,7 +59,6 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa,
ThreadList* thread_list = Runtime::Current()->GetThreadList();
bool timed_out;
Thread* thread = thread_list->SuspendThreadByPeer(peer,
- /* request_suspension= */ true,
SuspendReason::kInternal,
&timed_out);
if (thread != nullptr) {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 37b3fe642e..c3b4fe09de 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -148,7 +148,6 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) {
bool timed_out;
// Take suspend thread lock to avoid races with threads trying to suspend this one.
Thread* thread = thread_list->SuspendThreadByPeer(peer,
- /* request_suspension= */ true,
SuspendReason::kInternal,
&timed_out);
if (thread != nullptr) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 433f564b2d..6c99c1fa8d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -410,7 +410,7 @@ Runtime::~Runtime() {
while (threads_being_born_ > 0) {
shutdown_cond_->Wait(self);
}
- shutting_down_ = true;
+ SetShuttingDown();
}
// Shutdown and wait for the daemons.
CHECK(self != nullptr);
@@ -641,7 +641,7 @@ void Runtime::Abort(const char* msg) {
// May be coming from an unattached thread.
if (Thread::Current() == nullptr) {
Runtime* current = Runtime::Current();
- if (current != nullptr && current->IsStarted() && !current->IsShuttingDown(nullptr)) {
+ if (current != nullptr && current->IsStarted() && !current->IsShuttingDownUnsafe()) {
// We do not flag this to the unexpected-signal handler so that that may dump the stack.
abort();
UNREACHABLE();
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 68456cd37b..b2093a303c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -222,7 +222,13 @@ class Runtime {
bool IsShuttingDown(Thread* self);
bool IsShuttingDownLocked() const REQUIRES(Locks::runtime_shutdown_lock_) {
- return shutting_down_;
+ return shutting_down_.load(std::memory_order_relaxed);
+ }
+ bool IsShuttingDownUnsafe() const {
+ return shutting_down_.load(std::memory_order_relaxed);
+ }
+ void SetShuttingDown() REQUIRES(Locks::runtime_shutdown_lock_) {
+ shutting_down_.store(true, std::memory_order_relaxed);
}
size_t NumberOfThreadsBeingBorn() const REQUIRES(Locks::runtime_shutdown_lock_) {
@@ -1190,8 +1196,10 @@ class Runtime {
// Waited upon until no threads are being born.
std::unique_ptr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_);
- // Set when runtime shutdown is past the point that new threads may attach.
- bool shutting_down_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+ // Set when runtime shutdown is past the point that new threads may attach. Usually
+ // GUARDED_BY(Locks::runtime_shutdown_lock_). But we need to check it in Abort without the
+ // lock, because we may already own it.
+ std::atomic<bool> shutting_down_;
// The runtime is starting to shutdown but is blocked waiting on shutdown_cond_.
bool shutting_down_started_ GUARDED_BY(Locks::runtime_shutdown_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 46aa2b59ad..16a5f93be4 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4468,6 +4468,12 @@ bool Thread::IsSystemDaemon() const {
WellKnownClasses::java_lang_Thread_systemDaemon)->GetBoolean(GetPeer());
}
+std::string Thread::StateAndFlagsAsHexString() const {
+ std::stringstream result_stream;
+ result_stream << std::hex << tls32_.state_and_flags.as_atomic_int.load();
+ return result_stream.str();
+}
+
ScopedExceptionStorage::ScopedExceptionStorage(art::Thread* self)
: self_(self), hs_(self_), excp_(hs_.NewHandle<art::mirror::Throwable>(self_->GetException())) {
self_->ClearException();
diff --git a/runtime/thread.h b/runtime/thread.h
index 7a408021c1..676bfd81de 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -254,7 +254,7 @@ class Thread {
bool IsSuspended() const {
union StateAndFlags state_and_flags;
- state_and_flags.as_int = tls32_.state_and_flags.as_int;
+ state_and_flags.as_int = tls32_.state_and_flags.as_atomic_int.load(std::memory_order_relaxed);
return state_and_flags.as_struct.state != kRunnable &&
(state_and_flags.as_struct.flags & kSuspendRequest) != 0;
}
@@ -1517,6 +1517,9 @@ class Thread {
};
static_assert(sizeof(StateAndFlags) == sizeof(int32_t), "Weird state_and_flags size");
+ // Format state and flags as a hex string. For diagnostic output.
+ std::string StateAndFlagsAsHexString() const;
+
static void ThreadExitCallback(void* arg);
// Maximum number of suspend barriers.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index f8e99e8f62..84b7384c46 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -874,10 +874,11 @@ static void ThreadSuspendByPeerWarning(Thread* self,
}
Thread* ThreadList::SuspendThreadByPeer(jobject peer,
- bool request_suspension,
SuspendReason reason,
bool* timed_out) {
+ bool request_suspension = true;
const uint64_t start_time = NanoTime();
+ int self_suspend_count = 0;
useconds_t sleep_us = kThreadSuspendInitialSleepUs;
*timed_out = false;
Thread* const self = Thread::Current();
@@ -926,6 +927,7 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer,
// We hold the suspend count lock but another thread is trying to suspend us. Its not
// safe to try to suspend another thread in case we get a cycle. Start the loop again
// which will allow this thread to be suspended.
+ ++self_suspend_count;
continue;
}
CHECK(suspended_thread == nullptr);
@@ -957,20 +959,22 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer,
}
const uint64_t total_delay = NanoTime() - start_time;
if (total_delay >= thread_suspend_timeout_ns_) {
- ThreadSuspendByPeerWarning(self,
- ::android::base::FATAL,
- "Thread suspension timed out",
- peer);
- if (suspended_thread != nullptr) {
+ if (suspended_thread == nullptr) {
+ ThreadSuspendByPeerWarning(self,
+ ::android::base::FATAL,
+ "Failed to issue suspend request",
+ peer);
+ } else {
CHECK_EQ(suspended_thread, thread);
- bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
- -1,
- nullptr,
- reason);
- DCHECK(updated);
+ LOG(WARNING) << "Suspended thread state_and_flags: "
+ << suspended_thread->StateAndFlagsAsHexString()
+ << ", self_suspend_count = " << self_suspend_count;
+ ThreadSuspendByPeerWarning(self,
+ ::android::base::FATAL,
+ "Thread suspension timed out",
+ peer);
}
- *timed_out = true;
- return nullptr;
+ UNREACHABLE();
} else if (sleep_us == 0 &&
total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
// We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 87a4c8dc61..f5b58a0c54 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -81,11 +81,8 @@ class ThreadList {
// Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
// else null. The peer is used to identify the thread to avoid races with the thread terminating.
- // If the thread should be suspended then value of request_suspension should be true otherwise
- // the routine will wait for a previous suspend request. If the suspension times out then *timeout
- // is set to true.
+ // If the suspension times out then *timeout is set to true.
Thread* SuspendThreadByPeer(jobject peer,
- bool request_suspension,
SuspendReason reason,
bool* timed_out)
REQUIRES(!Locks::mutator_lock_,
diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
index a185446ca5..a10fe2e905 100644
--- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
+++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
@@ -81,7 +81,7 @@ extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobje
}
bool timed_out = false;
Thread* other = Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
- target, true, SuspendReason::kInternal, &timed_out);
+ target, SuspendReason::kInternal, &timed_out);
CHECK(!timed_out);
CHECK(other != nullptr);
ScopedSuspendAll ssa(__FUNCTION__);