summaryrefslogtreecommitdiff
path: root/compiler/optimizing/load_store_analysis.h
diff options
context:
space:
mode:
authorAlex Light <allight@google.com>2020-07-09 13:24:56 -0700
committerTreehugger Robot <treehugger-gerrit@google.com>2020-11-12 02:08:44 +0000
commitbb6cda60e4418c0ab557ea4090e046bed8206763 (patch)
treef6b94510108cb653a80e0ea14d50ad6616c9f44a /compiler/optimizing/load_store_analysis.h
parent670ff8854cf075617e0abee77b2259903757d86e (diff)
Partial LSE analysis & store removal
This is the first piece of partial LSE for art. This CL adds analysis tools needed to implement partial LSE. More immediately, it improves LSE so that it will remove stores that are provably non-observable based on the location they occur. For example: ``` Foo o = new Foo(); if (xyz) { check(foo); foo.x++; } else { foo.x = 12; } return foo.x; ``` The store of 12 can be removed because the only escape in this method is unreachable and was not executed by the point we reach the store. The main purpose of this CL is to add the analysis tools needed to implement partial Load-Store elimination. Namely it includes tracking of which blocks are escaping and the groups of blocks that we cannot remove allocations from. The actual impact of this change is incredibly minor, being triggered only once in a AOSP code. go/lem shows only minor effects to compile-time and no effect on the compiled code. See go/lem-allight-partial-lse-2 for numbers. Compile time shows an average of 1.4% regression (max regression is 7% with 0.2 noise). This CL adds a new 'reachability' concept to the HGraph. If this has been calculated it allows one to quickly query whether there is any execution path containing two blocks in a given order. This is used to define a notion of sections of graph from which the escape of some allocation is inevitable. Test: art_compiler_tests Test: treehugger Bug: 67037140 Change-Id: I0edc8d6b73f7dd329cb1ea7923080a0abe913ea6
Diffstat (limited to 'compiler/optimizing/load_store_analysis.h')
-rw-r--r--compiler/optimizing/load_store_analysis.h103
1 files changed, 89 insertions, 14 deletions
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 882fe28cc7..5d2d841b37 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -17,29 +17,61 @@
#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+#include "base/arena_allocator.h"
+#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
+#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
+#include "base/stl_util.h"
#include "escape.h"
+#include "execution_subgraph.h"
#include "nodes.h"
-#include "optimization.h"
+#include "optimizing/optimizing_compiler_stats.h"
namespace art {
// A ReferenceInfo contains additional info about a reference such as
// whether it's a singleton, returned, etc.
-class ReferenceInfo : public ArenaObject<kArenaAllocLSA> {
+class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> {
public:
- ReferenceInfo(HInstruction* reference, size_t pos)
+ ReferenceInfo(HInstruction* reference,
+ ScopedArenaAllocator* allocator,
+ size_t pos,
+ bool for_partial_elimination)
: reference_(reference),
position_(pos),
is_singleton_(true),
is_singleton_and_not_returned_(true),
- is_singleton_and_not_deopt_visible_(true) {
+ is_singleton_and_not_deopt_visible_(true),
+ allocator_(allocator),
+ subgraph_(reference->GetBlock()->GetGraph(), for_partial_elimination, allocator_) {
+ // TODO We can do this in one pass.
+ // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads
+ // for now just ignore it.
+ bool can_be_partial =
+ for_partial_elimination && (/* reference_->IsNewArray() || */ reference_->IsNewInstance());
+ LambdaEscapeVisitor func([&](HInstruction* inst) { return HandleEscape(inst); });
+ if (can_be_partial) {
+ VisitEscapes(reference_, func);
+ }
CalculateEscape(reference_,
nullptr,
&is_singleton_,
&is_singleton_and_not_returned_,
&is_singleton_and_not_deopt_visible_);
+ if (can_be_partial) {
+ // This is to mark writes to partially escaped values as also part of the escaped subset.
+ // TODO We can avoid this if we have a 'ConditionalWrite' instruction. Will require testing
+ // to see if the additional branches are worth it.
+ PrunePartialEscapeWrites();
+ subgraph_.Finalize();
+ } else {
+ subgraph_.Invalidate();
+ }
+ }
+
+ const ExecutionSubgraph* GetNoEscapeSubgraph() const {
+ return &subgraph_;
}
HInstruction* GetReference() const {
@@ -57,6 +89,14 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> {
return is_singleton_;
}
+ // This is a singleton and there are paths that don't escape the method
+ bool IsPartialSingleton() const {
+ auto ref = GetReference();
+ // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads
+ // for now just ignore it.
+ return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && GetNoEscapeSubgraph()->IsValid();
+ }
+
// Returns true if reference_ is a singleton and not returned to the caller or
// used as an environment local of an HDeoptimize instruction.
// The allocation and stores into reference_ may be eliminated for such cases.
@@ -72,6 +112,15 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> {
}
private:
+ bool HandleEscape(HInstruction* escape) {
+ subgraph_.RemoveBlock(escape->GetBlock());
+ return true;
+ }
+
+ // Make sure we mark any writes/potential writes to heap-locations within partially
+ // escaped values as escaping.
+ void PrunePartialEscapeWrites();
+
HInstruction* const reference_;
const size_t position_; // position in HeapLocationCollector's ref_info_array_.
@@ -82,6 +131,10 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> {
// Is singleton and not used as an environment local of HDeoptimize.
bool is_singleton_and_not_deopt_visible_;
+ ScopedArenaAllocator* allocator_;
+
+ ExecutionSubgraph subgraph_;
+
DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
};
@@ -174,23 +227,28 @@ class HeapLocationCollector : public HGraphVisitor {
// aliasing matrix of 8 heap locations.
static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
- explicit HeapLocationCollector(HGraph* graph, ScopedArenaAllocator* allocator)
+ HeapLocationCollector(HGraph* graph,
+ ScopedArenaAllocator* allocator,
+ bool for_partial_elimination)
: HGraphVisitor(graph),
allocator_(allocator),
ref_info_array_(allocator->Adapter(kArenaAllocLSA)),
heap_locations_(allocator->Adapter(kArenaAllocLSA)),
- aliasing_matrix_(allocator,
- kInitialAliasingMatrixBitVectorSize,
- true,
- kArenaAllocLSA),
+ aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA),
has_heap_stores_(false),
has_volatile_(false),
- has_monitor_operations_(false) {
+ has_monitor_operations_(false),
+ for_partial_elimination_(for_partial_elimination) {
aliasing_matrix_.ClearAllBits();
}
+ ~HeapLocationCollector() {
+ CleanUp();
+ }
+
void CleanUp() {
heap_locations_.clear();
+ STLDeleteContainerPointers(ref_info_array_.begin(), ref_info_array_.end());
ref_info_array_.clear();
}
@@ -303,6 +361,11 @@ class HeapLocationCollector : public HGraphVisitor {
return kHeapLocationNotFound;
}
+ bool InstructionEligibleForLSERemoval(HInstruction* inst) const;
+
+ // Get some estimated statistics based on our analysis.
+ void DumpReferenceStats(OptimizingCompilerStats* stats);
+
// Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
bool MayAlias(size_t index1, size_t index2) const {
if (index1 < index2) {
@@ -417,7 +480,8 @@ class HeapLocationCollector : public HGraphVisitor {
ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
if (ref_info == nullptr) {
size_t pos = ref_info_array_.size();
- ref_info = new (allocator_) ReferenceInfo(instruction, pos);
+ ref_info =
+ new (allocator_) ReferenceInfo(instruction, allocator_, pos, for_partial_elimination_);
ref_info_array_.push_back(ref_info);
}
return ref_info;
@@ -554,15 +618,25 @@ class HeapLocationCollector : public HGraphVisitor {
// alias analysis and won't be as effective.
bool has_volatile_; // If there are volatile field accesses.
bool has_monitor_operations_; // If there are monitor operations.
+ bool for_partial_elimination_;
DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
};
class LoadStoreAnalysis {
public:
- explicit LoadStoreAnalysis(HGraph* graph, ScopedArenaAllocator* local_allocator)
- : graph_(graph),
- heap_location_collector_(graph, local_allocator) {}
+ // for_elimination controls whether we should keep track of escapes at a per-block level for
+ // partial LSE.
+ explicit LoadStoreAnalysis(HGraph* graph,
+ OptimizingCompilerStats* stats,
+ ScopedArenaAllocator* local_allocator,
+ bool for_elimination = true)
+ : graph_(graph),
+ stats_(stats),
+ heap_location_collector_(
+ graph,
+ local_allocator,
+ /*for_partial_elimination=*/for_elimination && ExecutionSubgraph::CanAnalyse(graph_)) {}
const HeapLocationCollector& GetHeapLocationCollector() const {
return heap_location_collector_;
@@ -572,6 +646,7 @@ class LoadStoreAnalysis {
private:
HGraph* graph_;
+ OptimizingCompilerStats* stats_;
HeapLocationCollector heap_location_collector_;
DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis);