diff options
author | Alex Light <allight@google.com> | 2020-07-09 13:24:56 -0700 |
---|---|---|
committer | Treehugger Robot <treehugger-gerrit@google.com> | 2020-11-12 02:08:44 +0000 |
commit | bb6cda60e4418c0ab557ea4090e046bed8206763 (patch) | |
tree | f6b94510108cb653a80e0ea14d50ad6616c9f44a /compiler/optimizing/load_store_analysis.h | |
parent | 670ff8854cf075617e0abee77b2259903757d86e (diff) |
Partial LSE analysis & store removal
This is the first piece of partial LSE for art. This CL adds analysis
tools needed to implement partial LSE. More immediately, it improves
LSE so that it will remove stores that are provably non-observable
based on the location they occur. For example:
```
Foo o = new Foo();
if (xyz) {
check(foo);
foo.x++;
} else {
foo.x = 12;
}
return foo.x;
```
The store of 12 can be removed because the only escape in this method
is unreachable and was not executed by the point we reach the store.
The main purpose of this CL is to add the analysis tools needed to
implement partial Load-Store elimination. Namely it includes tracking
of which blocks are escaping and the groups of blocks that we cannot
remove allocations from.
The actual impact of this change is incredibly minor, being triggered
only once in a AOSP code. go/lem shows only minor effects to
compile-time and no effect on the compiled code. See
go/lem-allight-partial-lse-2 for numbers. Compile time shows an
average of 1.4% regression (max regression is 7% with 0.2 noise).
This CL adds a new 'reachability' concept to the HGraph. If this has
been calculated it allows one to quickly query whether there is any
execution path containing two blocks in a given order. This is used to
define a notion of sections of graph from which the escape of some
allocation is inevitable.
Test: art_compiler_tests
Test: treehugger
Bug: 67037140
Change-Id: I0edc8d6b73f7dd329cb1ea7923080a0abe913ea6
Diffstat (limited to 'compiler/optimizing/load_store_analysis.h')
-rw-r--r-- | compiler/optimizing/load_store_analysis.h | 103 |
1 files changed, 89 insertions, 14 deletions
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 882fe28cc7..5d2d841b37 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -17,29 +17,61 @@ #ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ +#include "base/arena_allocator.h" +#include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "base/stl_util.h" #include "escape.h" +#include "execution_subgraph.h" #include "nodes.h" -#include "optimization.h" +#include "optimizing/optimizing_compiler_stats.h" namespace art { // A ReferenceInfo contains additional info about a reference such as // whether it's a singleton, returned, etc. -class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { +class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { public: - ReferenceInfo(HInstruction* reference, size_t pos) + ReferenceInfo(HInstruction* reference, + ScopedArenaAllocator* allocator, + size_t pos, + bool for_partial_elimination) : reference_(reference), position_(pos), is_singleton_(true), is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true) { + is_singleton_and_not_deopt_visible_(true), + allocator_(allocator), + subgraph_(reference->GetBlock()->GetGraph(), for_partial_elimination, allocator_) { + // TODO We can do this in one pass. + // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads + // for now just ignore it. + bool can_be_partial = + for_partial_elimination && (/* reference_->IsNewArray() || */ reference_->IsNewInstance()); + LambdaEscapeVisitor func([&](HInstruction* inst) { return HandleEscape(inst); }); + if (can_be_partial) { + VisitEscapes(reference_, func); + } CalculateEscape(reference_, nullptr, &is_singleton_, &is_singleton_and_not_returned_, &is_singleton_and_not_deopt_visible_); + if (can_be_partial) { + // This is to mark writes to partially escaped values as also part of the escaped subset. + // TODO We can avoid this if we have a 'ConditionalWrite' instruction. Will require testing + // to see if the additional branches are worth it. + PrunePartialEscapeWrites(); + subgraph_.Finalize(); + } else { + subgraph_.Invalidate(); + } + } + + const ExecutionSubgraph* GetNoEscapeSubgraph() const { + return &subgraph_; } HInstruction* GetReference() const { @@ -57,6 +89,14 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { return is_singleton_; } + // This is a singleton and there are paths that don't escape the method + bool IsPartialSingleton() const { + auto ref = GetReference(); + // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads + // for now just ignore it. + return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && GetNoEscapeSubgraph()->IsValid(); + } + // Returns true if reference_ is a singleton and not returned to the caller or // used as an environment local of an HDeoptimize instruction. // The allocation and stores into reference_ may be eliminated for such cases. @@ -72,6 +112,15 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { } private: + bool HandleEscape(HInstruction* escape) { + subgraph_.RemoveBlock(escape->GetBlock()); + return true; + } + + // Make sure we mark any writes/potential writes to heap-locations within partially + // escaped values as escaping. + void PrunePartialEscapeWrites(); + HInstruction* const reference_; const size_t position_; // position in HeapLocationCollector's ref_info_array_. @@ -82,6 +131,10 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { // Is singleton and not used as an environment local of HDeoptimize. bool is_singleton_and_not_deopt_visible_; + ScopedArenaAllocator* allocator_; + + ExecutionSubgraph subgraph_; + DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; @@ -174,23 +227,28 @@ class HeapLocationCollector : public HGraphVisitor { // aliasing matrix of 8 heap locations. static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; - explicit HeapLocationCollector(HGraph* graph, ScopedArenaAllocator* allocator) + HeapLocationCollector(HGraph* graph, + ScopedArenaAllocator* allocator, + bool for_partial_elimination) : HGraphVisitor(graph), allocator_(allocator), ref_info_array_(allocator->Adapter(kArenaAllocLSA)), heap_locations_(allocator->Adapter(kArenaAllocLSA)), - aliasing_matrix_(allocator, - kInitialAliasingMatrixBitVectorSize, - true, - kArenaAllocLSA), + aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA), has_heap_stores_(false), has_volatile_(false), - has_monitor_operations_(false) { + has_monitor_operations_(false), + for_partial_elimination_(for_partial_elimination) { aliasing_matrix_.ClearAllBits(); } + ~HeapLocationCollector() { + CleanUp(); + } + void CleanUp() { heap_locations_.clear(); + STLDeleteContainerPointers(ref_info_array_.begin(), ref_info_array_.end()); ref_info_array_.clear(); } @@ -303,6 +361,11 @@ class HeapLocationCollector : public HGraphVisitor { return kHeapLocationNotFound; } + bool InstructionEligibleForLSERemoval(HInstruction* inst) const; + + // Get some estimated statistics based on our analysis. + void DumpReferenceStats(OptimizingCompilerStats* stats); + // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. bool MayAlias(size_t index1, size_t index2) const { if (index1 < index2) { @@ -417,7 +480,8 @@ class HeapLocationCollector : public HGraphVisitor { ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); if (ref_info == nullptr) { size_t pos = ref_info_array_.size(); - ref_info = new (allocator_) ReferenceInfo(instruction, pos); + ref_info = + new (allocator_) ReferenceInfo(instruction, allocator_, pos, for_partial_elimination_); ref_info_array_.push_back(ref_info); } return ref_info; @@ -554,15 +618,25 @@ class HeapLocationCollector : public HGraphVisitor { // alias analysis and won't be as effective. bool has_volatile_; // If there are volatile field accesses. bool has_monitor_operations_; // If there are monitor operations. + bool for_partial_elimination_; DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); }; class LoadStoreAnalysis { public: - explicit LoadStoreAnalysis(HGraph* graph, ScopedArenaAllocator* local_allocator) - : graph_(graph), - heap_location_collector_(graph, local_allocator) {} + // for_elimination controls whether we should keep track of escapes at a per-block level for + // partial LSE. + explicit LoadStoreAnalysis(HGraph* graph, + OptimizingCompilerStats* stats, + ScopedArenaAllocator* local_allocator, + bool for_elimination = true) + : graph_(graph), + stats_(stats), + heap_location_collector_( + graph, + local_allocator, + /*for_partial_elimination=*/for_elimination && ExecutionSubgraph::CanAnalyse(graph_)) {} const HeapLocationCollector& GetHeapLocationCollector() const { return heap_location_collector_; @@ -572,6 +646,7 @@ class LoadStoreAnalysis { private: HGraph* graph_; + OptimizingCompilerStats* stats_; HeapLocationCollector heap_location_collector_; DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis); |