diff options
author | Vladimir Marko <vmarko@google.com> | 2021-06-02 15:54:17 +0100 |
---|---|---|
committer | Vladimir Marko <vmarko@google.com> | 2021-06-03 12:41:24 +0000 |
commit | 8350b7caf464c5c6f20b850199831d9dd0b50f92 (patch) | |
tree | 5d10dea54dc660b521fd249aec5cf739e2e25698 /compiler | |
parent | 61437483485304896f12fe35c587d3d699e6f86e (diff) |
Reduce Partial LSE memory usage.
Instantiate ExecutionSubgraph only for partial singleton
candidates (currently NewInstance, possibly NewArray in the
future). This reduces "LSA" allocations.
Reserve memory for PartialLoadStoreEliminationHelper members
based on the number of partial singletons instead of the
number of reference infos. This reduces "LSE" allocations.
The peak scoped arena allocation for one compiled method
is reduced from
MEM: used: 97424004, allocated: 99006568, lost: 1115968
LSA 46015104
LSE 51408900
down to
MEM: used: 17000744, allocated: 26713880, lost: 3332496
GVN 17000744
where the LSA+LSE memory use is lower than GVN use.
(cherry picked from commit 5c824937bb82adbde857bc99cb03c769c9f68f7b)
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 33650849
Merged-In: I323b9f144b258f0fab034794770971547ce94b59
Change-Id: If3fc9787fc0dc4a1a33dd5ca0f2dc972998c4da7
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/execution_subgraph.cc | 10 | ||||
-rw-r--r-- | compiler/optimizing/execution_subgraph.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/execution_subgraph_test.cc | 32 | ||||
-rw-r--r-- | compiler/optimizing/load_store_analysis.cc | 11 | ||||
-rw-r--r-- | compiler/optimizing/load_store_analysis.h | 29 | ||||
-rw-r--r-- | compiler/optimizing/load_store_analysis_test.cc | 59 | ||||
-rw-r--r-- | compiler/optimizing/load_store_elimination.cc | 39 |
7 files changed, 85 insertions, 108 deletions
diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc index 5045e8db0b..6d105668c0 100644 --- a/compiler/optimizing/execution_subgraph.cc +++ b/compiler/optimizing/execution_subgraph.cc @@ -28,17 +28,15 @@ namespace art { -ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, - bool analysis_possible, - ScopedArenaAllocator* allocator) +ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator) : graph_(graph), allocator_(allocator), - allowed_successors_(analysis_possible ? graph_->GetBlocks().size() : 0, + allowed_successors_(graph_->GetBlocks().size(), ~(std::bitset<kMaxFilterableSuccessors> {}), allocator_->Adapter(kArenaAllocLSA)), unreachable_blocks_( - allocator_, analysis_possible ? graph_->GetBlocks().size() : 0, false, kArenaAllocLSA), - valid_(analysis_possible), + allocator_, graph_->GetBlocks().size(), /*expandable=*/ false, kArenaAllocLSA), + valid_(true), needs_prune_(false), finalized_(false) { if (valid_) { diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h index 7fabbaead1..05855c30d4 100644 --- a/compiler/optimizing/execution_subgraph.h +++ b/compiler/optimizing/execution_subgraph.h @@ -113,7 +113,7 @@ class BlockIdFilterThunk { // allocated in the entry block. This is a massively simplifying assumption but // means we can't partially remove objects that are repeatedly allocated in a // loop. -class ExecutionSubgraph : public ArenaObject<kArenaAllocLSA> { +class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> { public: using BitVecBlockRange = IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; @@ -222,12 +222,11 @@ class ExecutionSubgraph : public ArenaObject<kArenaAllocLSA> { // to have a constant branching factor. static constexpr uint32_t kMaxFilterableSuccessors = 8; - // Instantiate a subgraph. analysis_possible controls whether or not to even - // attempt partial-escape analysis. It should be false if partial-escape - // analysis is not desired (eg when being used for instruction scheduling) or - // when the branching factor in the graph is too high. This is calculated once - // and passed down for performance reasons. - ExecutionSubgraph(HGraph* graph, bool analysis_possible, ScopedArenaAllocator* allocator); + // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape + // analysis is desired (eg not when being used for instruction scheduling) and + // when the branching factor in the graph is not too high. These conditions + // are determined once and passed down for performance reasons. + ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); void Invalidate() { valid_ = false; diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc index 98e642f1a7..74c243b5b4 100644 --- a/compiler/optimizing/execution_subgraph_test.cc +++ b/compiler/optimizing/execution_subgraph_test.cc @@ -142,7 +142,7 @@ TEST_F(ExecutionSubgraphTest, Basic) { "exit", { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("left")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -229,7 +229,7 @@ TEST_F(ExecutionSubgraphTest, Propagation) { { "entry", "right" }, { "right", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("l2")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -292,7 +292,7 @@ TEST_F(ExecutionSubgraphTest, PropagationLoop) { { "entry", "right" }, { "right", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("l2")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -348,7 +348,7 @@ TEST_F(ExecutionSubgraphTest, PropagationLoop2) { { "entry", "right" }, { "right", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("l1")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -403,7 +403,7 @@ TEST_F(ExecutionSubgraphTest, PropagationLoop3) { { "entry", "right" }, { "right", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("l1loop")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -478,7 +478,7 @@ TEST_F(ExecutionSubgraphTest, PropagationLoop4) { {"entry", "right"}, {"right", "exit"}})); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("l1loop_left")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -545,7 +545,7 @@ TEST_F(ExecutionSubgraphTest, PropagationLoop5) { {"entry", "right"}, {"right", "exit"}})); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("l1loop_left")); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); @@ -575,7 +575,7 @@ TEST_F(ExecutionSubgraphTest, Invalid) { "exit", { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("left")); esg.RemoveBlock(blks.Get("right")); esg.Finalize(); @@ -598,7 +598,7 @@ TEST_F(ExecutionSubgraphTest, Exclusions) { { "b", "exit" }, { "c", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("a")); esg.RemoveBlock(blks.Get("c")); esg.Finalize(); @@ -703,7 +703,7 @@ TEST_F(ExecutionSubgraphTest, ExclusionExtended) { { "c_end_1", "exit" }, { "c_end_2", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("a")); esg.RemoveBlock(blks.Get("c_mid")); esg.Finalize(); @@ -787,7 +787,7 @@ TEST_F(ExecutionSubgraphTest, InAndOutEscape) { { "esc_bottom", "exit" } })); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("esc_top")); esg.RemoveBlock(blks.Get("esc_bottom")); esg.Finalize(); @@ -817,7 +817,7 @@ TEST_F(ExecutionSubgraphTest, BigNodes) { } AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); ASSERT_TRUE(IsValidSubgraph(esg)); @@ -846,7 +846,7 @@ TEST_F(ExecutionSubgraphTest, BigNodesMissing) { } AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.RemoveBlock(blks.Get("blk2")); esg.RemoveBlock(blks.Get("blk4")); esg.Finalize(); @@ -877,7 +877,7 @@ TEST_F(ExecutionSubgraphTest, BigNodesNoPath) { } AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); for (const auto& mid : mid_blocks) { esg.RemoveBlock(blks.Get(mid)); } @@ -907,7 +907,7 @@ TEST_F(ExecutionSubgraphTest, CanAnalyseBig) { AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges)); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); esg.Finalize(); ASSERT_TRUE(esg.IsValid()); ASSERT_TRUE(IsValidSubgraph(esg)); @@ -937,7 +937,7 @@ TEST_F(ExecutionSubgraphTest, CanAnalyseBig2) { edges.emplace_back(mid_blocks.front(), mid_blocks.back()); AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges)); ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + ExecutionSubgraph esg(graph_, GetScopedAllocator()); constexpr size_t kToRemoveIdx = kNumBlocks / 2; HBasicBlock* remove_implicit = blks.Get(mid_blocks[kToRemoveIdx]); for (HBasicBlock* pred : remove_implicit->GetPredecessors()) { diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 38ed98adaf..3fe42aff2e 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -94,7 +94,8 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, // Make sure we mark any writes/potential writes to heap-locations within partially // escaped values as escaping. void ReferenceInfo::PrunePartialEscapeWrites() { - if (!subgraph_.IsValid()) { + DCHECK(subgraph_ != nullptr); + if (!subgraph_->IsValid()) { // All paths escape. return; } @@ -104,12 +105,12 @@ void ReferenceInfo::PrunePartialEscapeWrites() { for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) { const HInstruction* user = use.GetUser(); if (!additional_exclusions.IsBitSet(user->GetBlock()->GetBlockId()) && - subgraph_.ContainsBlock(user->GetBlock()) && + subgraph_->ContainsBlock(user->GetBlock()) && (user->IsUnresolvedInstanceFieldSet() || user->IsUnresolvedStaticFieldSet() || user->IsInstanceFieldSet() || user->IsStaticFieldSet() || user->IsArraySet()) && (reference_ == user->InputAt(0)) && - std::any_of(subgraph_.UnreachableBlocks().begin(), - subgraph_.UnreachableBlocks().end(), + std::any_of(subgraph_->UnreachableBlocks().begin(), + subgraph_->UnreachableBlocks().end(), [&](const HBasicBlock* excluded) -> bool { return reference_->GetBlock()->GetGraph()->PathBetween(excluded, user->GetBlock()); @@ -122,7 +123,7 @@ void ReferenceInfo::PrunePartialEscapeWrites() { } if (UNLIKELY(additional_exclusions.IsAnyBitSet())) { for (uint32_t exc : additional_exclusions.Indexes()) { - subgraph_.RemoveBlock(graph->GetBlocks()[exc]); + subgraph_->RemoveBlock(graph->GetBlocks()[exc]); } } } diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 7e5b071483..4975bae2a2 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -50,15 +50,15 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { is_singleton_and_not_returned_(true), is_singleton_and_not_deopt_visible_(true), allocator_(allocator), - subgraph_(reference->GetBlock()->GetGraph(), - elimination_type != LoadStoreAnalysisType::kBasic, - allocator_) { + subgraph_(nullptr) { // TODO We can do this in one pass. // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads // for now just ignore it. bool can_be_partial = elimination_type != LoadStoreAnalysisType::kBasic && (/* reference_->IsNewArray() || */ reference_->IsNewInstance()); if (can_be_partial) { + subgraph_.reset( + new (allocator) ExecutionSubgraph(reference->GetBlock()->GetGraph(), allocator)); CollectPartialEscapes(reference_->GetBlock()->GetGraph()); } CalculateEscape(reference_, @@ -73,14 +73,16 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { // to see if the additional branches are worth it. PrunePartialEscapeWrites(); } - subgraph_.Finalize(); + DCHECK(subgraph_ != nullptr); + subgraph_->Finalize(); } else { - subgraph_.Invalidate(); + DCHECK(subgraph_ == nullptr); } } const ExecutionSubgraph* GetNoEscapeSubgraph() const { - return &subgraph_; + DCHECK(IsPartialSingleton()); + return subgraph_.get(); } HInstruction* GetReference() const { @@ -103,7 +105,9 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { auto ref = GetReference(); // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads // for now just ignore it. - return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && GetNoEscapeSubgraph()->IsValid(); + return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && + subgraph_ != nullptr && + subgraph_->IsValid(); } // Returns true if reference_ is a singleton and not returned to the caller or @@ -123,7 +127,8 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { private: void CollectPartialEscapes(HGraph* graph); void HandleEscape(HBasicBlock* escape) { - subgraph_.RemoveBlock(escape); + DCHECK(subgraph_ != nullptr); + subgraph_->RemoveBlock(escape); } void HandleEscape(HInstruction* escape) { HandleEscape(escape->GetBlock()); @@ -145,7 +150,7 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { ScopedArenaAllocator* allocator_; - ExecutionSubgraph subgraph_; + std::unique_ptr<ExecutionSubgraph> subgraph_; DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; @@ -264,8 +269,10 @@ class HeapLocationCollector : public HGraphVisitor { ref_info_array_.clear(); } - size_t GetNumberOfReferenceInfos() const { - return ref_info_array_.size(); + size_t CountPartialSingletons() const { + return std::count_if(ref_info_array_.begin(), + ref_info_array_.end(), + [](ReferenceInfo* ri) { return ri->IsPartialSingleton(); }); } size_t GetNumberOfHeapLocations() const { diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index cebc3f34f9..c6d22087f7 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -926,6 +926,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + ASSERT_TRUE(info->IsPartialSingleton()); const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); ASSERT_TRUE(esg->IsValid()); @@ -1034,6 +1035,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + ASSERT_TRUE(info->IsPartialSingleton()); const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); ASSERT_TRUE(esg->IsValid()); @@ -1156,6 +1158,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + ASSERT_TRUE(info->IsPartialSingleton()); const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); ASSERT_TRUE(esg->IsValid()); @@ -1235,6 +1238,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape4) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + ASSERT_TRUE(info->IsPartialSingleton()); const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); ASSERT_TRUE(esg->IsValid()); @@ -1322,6 +1326,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape5) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + ASSERT_TRUE(info->IsPartialSingleton()); const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); ASSERT_TRUE(esg->IsValid()); @@ -1437,18 +1442,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - EXPECT_FALSE(esg->IsValid()) << esg->GetExcludedCohorts(); - EXPECT_FALSE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - EXPECT_EQ(contents.size(), 0u); - EXPECT_TRUE(contents.find(blks.Get("left")) == contents.end()); - EXPECT_TRUE(contents.find(blks.Get("right")) == contents.end()); - EXPECT_TRUE(contents.find(blks.Get("entry")) == contents.end()); - EXPECT_TRUE(contents.find(blks.Get("exit")) == contents.end()); + ASSERT_FALSE(info->IsPartialSingleton()); } // With predicated-set we can (partially) remove the store as well. @@ -1548,6 +1542,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + ASSERT_TRUE(info->IsPartialSingleton()); const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); EXPECT_TRUE(esg->IsValid()) << esg->GetExcludedCohorts(); @@ -1668,18 +1663,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_FALSE(esg->IsValid()); - ASSERT_FALSE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 0u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("right")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) == contents.end()); + ASSERT_FALSE(info->IsPartialSingleton()); } // // ENTRY @@ -1734,16 +1718,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape2) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_FALSE(esg->IsValid()); - ASSERT_FALSE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 0u); - ASSERT_TRUE(contents.find(blks.Get("entry")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) == contents.end()); + ASSERT_FALSE(info->IsPartialSingleton()); } // // ENTRY @@ -1916,14 +1891,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_FALSE(esg->IsValid()); - ASSERT_FALSE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 0u); + ASSERT_FALSE(info->IsPartialSingleton()); } // // ENTRY @@ -2087,11 +2055,6 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) { const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 0u); - ASSERT_FALSE(esg->IsValid()); + ASSERT_FALSE(info->IsPartialSingleton()); } } // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index d7cae768d4..722cc83872 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -617,10 +617,13 @@ class LSEVisitor final : private HGraphDelegateVisitor { bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) { auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo(); - auto* sg = ri->GetNoEscapeSubgraph(); - return ri->IsPartialSingleton() && - std::none_of(sg->GetExcludedCohorts().cbegin(), - sg->GetExcludedCohorts().cend(), + if (!ri->IsPartialSingleton()) { + return false; + } + ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts = + ri->GetNoEscapeSubgraph()->GetExcludedCohorts(); + return std::none_of(cohorts.cbegin(), + cohorts.cend(), [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool { // Make sure we haven't yet and never will escape. return ex.PrecedesBlock(blk) || @@ -1096,8 +1099,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts = - ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts(); HBasicBlock* blk = instruction->GetBlock(); // We don't need to do anything if the reference has not escaped at this point. // This is true if either we (1) never escape or (2) sometimes escape but @@ -1105,14 +1106,22 @@ class LSEVisitor final : private HGraphDelegateVisitor { // We count being in the excluded cohort as escaping. Technically, this is // a bit over-conservative (since we can have multiple non-escaping calls // before a single escaping one) but this simplifies everything greatly. + auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) { + DCHECK(ref_info->IsPartialSingleton()); + if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) { + return false; + } + ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts = + ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts(); + return std::none_of(cohorts.begin(), + cohorts.end(), + [&](const ExecutionSubgraph::ExcludedCohort& cohort) { + return cohort.PrecedesBlock(blk); + }); + }; if (ref_info->IsSingleton() || // partial and we aren't currently escaping and we haven't escaped yet. - (ref_info->IsPartialSingleton() && ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk) && - std::none_of(cohorts.begin(), - cohorts.end(), - [&](const ExecutionSubgraph::ExcludedCohort& cohort) { - return cohort.PrecedesBlock(blk); - }))) { + (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk))) { // Singleton references cannot be seen by the callee. } else { if (side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) { @@ -2901,9 +2910,9 @@ class PartialLoadStoreEliminationHelper { nullptr, alloc_->Adapter(kArenaAllocLSE)), first_materialization_block_id_(GetGraph()->GetBlocks().size()) { - heap_refs_.reserve(lse_->heap_location_collector_.GetNumberOfReferenceInfos()); - new_ref_phis_.reserve(lse_->heap_location_collector_.GetNumberOfReferenceInfos() * - GetGraph()->GetBlocks().size()); + size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons(); + heap_refs_.reserve(num_partial_singletons); + new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size()); CollectInterestingHeapRefs(); } |