diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2021-09-04 23:00:21 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2021-09-04 23:00:21 +0000 |
commit | 9f1e89dee9f1d4ad146e9c0577afecec13493acf (patch) | |
tree | 45eaf450516b47abfbcff12ed8b93ebd7e3dded3 | |
parent | d83b951fa3f6da1174faa10c85b6cf8626d0c05a (diff) | |
parent | 8992f92aab381f0dc85cfd4003d935263f8451c8 (diff) |
Snap for 7705048 from 8992f92aab381f0dc85cfd4003d935263f8451c8 to sc-qpr1-release
Change-Id: I610491001866cec4160fbbd7af1d10e5759d74d2
-rw-r--r-- | openjdkjvm/OpenjdkJvm.cc | 1 | ||||
-rw-r--r-- | openjdkjvmti/ti_thread.cc | 1 | ||||
-rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 50 | ||||
-rw-r--r-- | runtime/jit/profile_saver.cc | 2 | ||||
-rw-r--r-- | runtime/native/dalvik_system_VMStack.cc | 1 | ||||
-rw-r--r-- | runtime/native/java_lang_Thread.cc | 1 | ||||
-rw-r--r-- | runtime/runtime.cc | 4 | ||||
-rw-r--r-- | runtime/runtime.h | 14 | ||||
-rw-r--r-- | runtime/thread.cc | 6 | ||||
-rw-r--r-- | runtime/thread.h | 5 | ||||
-rw-r--r-- | runtime/thread_list.cc | 30 | ||||
-rw-r--r-- | runtime/thread_list.h | 5 | ||||
-rw-r--r-- | test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc | 2 |
13 files changed, 78 insertions, 44 deletions
diff --git a/openjdkjvm/OpenjdkJvm.cc b/openjdkjvm/OpenjdkJvm.cc index d64086d43c..9b514af452 100644 --- a/openjdkjvm/OpenjdkJvm.cc +++ b/openjdkjvm/OpenjdkJvm.cc @@ -423,7 +423,6 @@ JNIEXPORT void JVM_SetNativeThreadName(JNIEnv* env, jobject jthread, jstring jav art::Thread* thread; { thread = thread_list->SuspendThreadByPeer(jthread, - true, art::SuspendReason::kInternal, &timed_out); } diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc index bb8fa3b694..a9a6ee8895 100644 --- a/openjdkjvmti/ti_thread.cc +++ b/openjdkjvmti/ti_thread.cc @@ -898,7 +898,6 @@ jvmtiError ThreadUtil::SuspendOther(art::Thread* self, bool timeout = true; art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer( target_jthread, - /* request_suspension= */ true, art::SuspendReason::kForUserCode, &timeout); if (ret_target == nullptr && !timeout) { diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 538d7bef7c..936b9199b5 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1630,11 +1630,16 @@ void ConcurrentCopying::CopyingPhase() { // for the last time before transitioning to the shared mark stack mode, which would process new // refs that may have been concurrently pushed onto the mark stack during the ProcessMarkStack() // call above. At the same time, disable weak ref accesses using a per-thread flag. It's - // important to do these together in a single checkpoint so that we can ensure that mutators - // won't newly gray objects and push new refs onto the mark stack due to weak ref accesses and + // important to do these together so that we can ensure that mutators won't + // newly gray objects and push new refs onto the mark stack due to weak ref accesses and // mutators safely transition to the shared mark stack mode (without leaving unprocessed refs on // the thread-local mark stacks), without a race. This is why we use a thread-local weak ref // access flag Thread::tls32_.weak_ref_access_enabled_ instead of the global ones. + // We must use a stop-the-world pause to disable weak ref access. A checkpoint may lead to a + // deadlock if one mutator acquires a low-level mutex and then gets blocked while accessing + // a weak-ref (after participating in the checkpoint), and another mutator indefinitely waits + // for the mutex before it participates in the checkpoint. Consequently, the gc-thread blocks + // forever as the checkpoint never finishes (See runtime/mutator_gc_coord.md). SwitchToSharedMarkStackMode(); CHECK(!self->GetWeakRefAccessEnabled()); // Now that weak refs accesses are disabled, once we exhaust the shared mark stack again here @@ -2044,21 +2049,36 @@ class ConcurrentCopying::AssertToSpaceInvariantFieldVisitor { void ConcurrentCopying::RevokeThreadLocalMarkStacks(bool disable_weak_ref_access, Closure* checkpoint_callback) { Thread* self = Thread::Current(); - RevokeThreadLocalMarkStackCheckpoint check_point(this, disable_weak_ref_access); + Locks::mutator_lock_->AssertSharedHeld(self); ThreadList* thread_list = Runtime::Current()->GetThreadList(); - gc_barrier_->Init(self, 0); - size_t barrier_count = thread_list->RunCheckpoint(&check_point, checkpoint_callback); - // If there are no threads to wait which implys that all the checkpoint functions are finished, - // then no need to release the mutator lock. - if (barrier_count == 0) { - return; - } - Locks::mutator_lock_->SharedUnlock(self); - { - ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); - gc_barrier_->Increment(self, barrier_count); + RevokeThreadLocalMarkStackCheckpoint check_point(this, disable_weak_ref_access); + if (disable_weak_ref_access) { + // We're the only thread that could possibly ask for exclusive access here. + Locks::mutator_lock_->SharedUnlock(self); + { + ScopedPause pause(this); + MutexLock mu(self, *Locks::thread_list_lock_); + checkpoint_callback->Run(self); + for (Thread* thread : thread_list->GetList()) { + check_point.Run(thread); + } + } + Locks::mutator_lock_->SharedLock(self); + } else { + gc_barrier_->Init(self, 0); + size_t barrier_count = thread_list->RunCheckpoint(&check_point, checkpoint_callback); + // If there are no threads to wait which implys that all the checkpoint functions are finished, + // then no need to release the mutator lock. + if (barrier_count == 0) { + return; + } + Locks::mutator_lock_->SharedUnlock(self); + { + ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); + gc_barrier_->Increment(self, barrier_count); + } + Locks::mutator_lock_->SharedLock(self); } - Locks::mutator_lock_->SharedLock(self); } void ConcurrentCopying::RevokeThreadLocalMarkStack(Thread* thread) { diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc index 425eadca41..b86badcba5 100644 --- a/runtime/jit/profile_saver.cc +++ b/runtime/jit/profile_saver.cc @@ -189,7 +189,7 @@ void ProfileSaver::Run() { // We might have been woken up by a huge number of notifications to guarantee saving. // If we didn't meet the minimum saving period go back to sleep (only if missed by // a reasonable margin). - uint64_t min_save_period_ns = options_.GetMinSavePeriodMs(); + uint64_t min_save_period_ns = MsToNs(options_.GetMinSavePeriodMs()); while (min_save_period_ns * 0.9 > sleep_time) { { MutexLock mu(self, wait_lock_); diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc index 9d2dfac069..e88516e248 100644 --- a/runtime/native/dalvik_system_VMStack.cc +++ b/runtime/native/dalvik_system_VMStack.cc @@ -59,7 +59,6 @@ static ResultT GetThreadStack(const ScopedFastNativeObjectAccess& soa, ThreadList* thread_list = Runtime::Current()->GetThreadList(); bool timed_out; Thread* thread = thread_list->SuspendThreadByPeer(peer, - /* request_suspension= */ true, SuspendReason::kInternal, &timed_out); if (thread != nullptr) { diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc index 37b3fe642e..c3b4fe09de 100644 --- a/runtime/native/java_lang_Thread.cc +++ b/runtime/native/java_lang_Thread.cc @@ -148,7 +148,6 @@ static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) { bool timed_out; // Take suspend thread lock to avoid races with threads trying to suspend this one. Thread* thread = thread_list->SuspendThreadByPeer(peer, - /* request_suspension= */ true, SuspendReason::kInternal, &timed_out); if (thread != nullptr) { diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 433f564b2d..6c99c1fa8d 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -410,7 +410,7 @@ Runtime::~Runtime() { while (threads_being_born_ > 0) { shutdown_cond_->Wait(self); } - shutting_down_ = true; + SetShuttingDown(); } // Shutdown and wait for the daemons. CHECK(self != nullptr); @@ -641,7 +641,7 @@ void Runtime::Abort(const char* msg) { // May be coming from an unattached thread. if (Thread::Current() == nullptr) { Runtime* current = Runtime::Current(); - if (current != nullptr && current->IsStarted() && !current->IsShuttingDown(nullptr)) { + if (current != nullptr && current->IsStarted() && !current->IsShuttingDownUnsafe()) { // We do not flag this to the unexpected-signal handler so that that may dump the stack. abort(); UNREACHABLE(); diff --git a/runtime/runtime.h b/runtime/runtime.h index 68456cd37b..b2093a303c 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -222,7 +222,13 @@ class Runtime { bool IsShuttingDown(Thread* self); bool IsShuttingDownLocked() const REQUIRES(Locks::runtime_shutdown_lock_) { - return shutting_down_; + return shutting_down_.load(std::memory_order_relaxed); + } + bool IsShuttingDownUnsafe() const { + return shutting_down_.load(std::memory_order_relaxed); + } + void SetShuttingDown() REQUIRES(Locks::runtime_shutdown_lock_) { + shutting_down_.store(true, std::memory_order_relaxed); } size_t NumberOfThreadsBeingBorn() const REQUIRES(Locks::runtime_shutdown_lock_) { @@ -1190,8 +1196,10 @@ class Runtime { // Waited upon until no threads are being born. std::unique_ptr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_); - // Set when runtime shutdown is past the point that new threads may attach. - bool shutting_down_ GUARDED_BY(Locks::runtime_shutdown_lock_); + // Set when runtime shutdown is past the point that new threads may attach. Usually + // GUARDED_BY(Locks::runtime_shutdown_lock_). But we need to check it in Abort without the + // lock, because we may already own it. + std::atomic<bool> shutting_down_; // The runtime is starting to shutdown but is blocked waiting on shutdown_cond_. bool shutting_down_started_ GUARDED_BY(Locks::runtime_shutdown_lock_); diff --git a/runtime/thread.cc b/runtime/thread.cc index 46aa2b59ad..16a5f93be4 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -4468,6 +4468,12 @@ bool Thread::IsSystemDaemon() const { WellKnownClasses::java_lang_Thread_systemDaemon)->GetBoolean(GetPeer()); } +std::string Thread::StateAndFlagsAsHexString() const { + std::stringstream result_stream; + result_stream << std::hex << tls32_.state_and_flags.as_atomic_int.load(); + return result_stream.str(); +} + ScopedExceptionStorage::ScopedExceptionStorage(art::Thread* self) : self_(self), hs_(self_), excp_(hs_.NewHandle<art::mirror::Throwable>(self_->GetException())) { self_->ClearException(); diff --git a/runtime/thread.h b/runtime/thread.h index 7a408021c1..676bfd81de 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -254,7 +254,7 @@ class Thread { bool IsSuspended() const { union StateAndFlags state_and_flags; - state_and_flags.as_int = tls32_.state_and_flags.as_int; + state_and_flags.as_int = tls32_.state_and_flags.as_atomic_int.load(std::memory_order_relaxed); return state_and_flags.as_struct.state != kRunnable && (state_and_flags.as_struct.flags & kSuspendRequest) != 0; } @@ -1517,6 +1517,9 @@ class Thread { }; static_assert(sizeof(StateAndFlags) == sizeof(int32_t), "Weird state_and_flags size"); + // Format state and flags as a hex string. For diagnostic output. + std::string StateAndFlagsAsHexString() const; + static void ThreadExitCallback(void* arg); // Maximum number of suspend barriers. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index f8e99e8f62..84b7384c46 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -874,10 +874,11 @@ static void ThreadSuspendByPeerWarning(Thread* self, } Thread* ThreadList::SuspendThreadByPeer(jobject peer, - bool request_suspension, SuspendReason reason, bool* timed_out) { + bool request_suspension = true; const uint64_t start_time = NanoTime(); + int self_suspend_count = 0; useconds_t sleep_us = kThreadSuspendInitialSleepUs; *timed_out = false; Thread* const self = Thread::Current(); @@ -926,6 +927,7 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, // We hold the suspend count lock but another thread is trying to suspend us. Its not // safe to try to suspend another thread in case we get a cycle. Start the loop again // which will allow this thread to be suspended. + ++self_suspend_count; continue; } CHECK(suspended_thread == nullptr); @@ -957,20 +959,22 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, } const uint64_t total_delay = NanoTime() - start_time; if (total_delay >= thread_suspend_timeout_ns_) { - ThreadSuspendByPeerWarning(self, - ::android::base::FATAL, - "Thread suspension timed out", - peer); - if (suspended_thread != nullptr) { + if (suspended_thread == nullptr) { + ThreadSuspendByPeerWarning(self, + ::android::base::FATAL, + "Failed to issue suspend request", + peer); + } else { CHECK_EQ(suspended_thread, thread); - bool updated = suspended_thread->ModifySuspendCount(soa.Self(), - -1, - nullptr, - reason); - DCHECK(updated); + LOG(WARNING) << "Suspended thread state_and_flags: " + << suspended_thread->StateAndFlagsAsHexString() + << ", self_suspend_count = " << self_suspend_count; + ThreadSuspendByPeerWarning(self, + ::android::base::FATAL, + "Thread suspension timed out", + peer); } - *timed_out = true; - return nullptr; + UNREACHABLE(); } else if (sleep_us == 0 && total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 87a4c8dc61..f5b58a0c54 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -81,11 +81,8 @@ class ThreadList { // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success, // else null. The peer is used to identify the thread to avoid races with the thread terminating. - // If the thread should be suspended then value of request_suspension should be true otherwise - // the routine will wait for a previous suspend request. If the suspension times out then *timeout - // is set to true. + // If the suspension times out then *timeout is set to true. Thread* SuspendThreadByPeer(jobject peer, - bool request_suspension, SuspendReason reason, bool* timed_out) REQUIRES(!Locks::mutator_lock_, diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc index a185446ca5..a10fe2e905 100644 --- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc +++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc @@ -81,7 +81,7 @@ extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobje } bool timed_out = false; Thread* other = Runtime::Current()->GetThreadList()->SuspendThreadByPeer( - target, true, SuspendReason::kInternal, &timed_out); + target, SuspendReason::kInternal, &timed_out); CHECK(!timed_out); CHECK(other != nullptr); ScopedSuspendAll ssa(__FUNCTION__); |