Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "gvn.h"
     18 
     19 #include "base/arena_bit_vector.h"
     20 #include "base/bit_vector-inl.h"
     21 #include "base/scoped_arena_allocator.h"
     22 #include "base/scoped_arena_containers.h"
     23 #include "base/utils.h"
     24 #include "side_effects_analysis.h"
     25 
     26 namespace art {
     27 
     28 /**
     29  * A ValueSet holds instructions that can replace other instructions. It is updated
     30  * through the `Add` method, and the `Kill` method. The `Kill` method removes
     31  * instructions that are affected by the given side effect.
     32  *
     33  * The `Lookup` method returns an equivalent instruction to the given instruction
     34  * if there is one in the set. In GVN, we would say those instructions have the
     35  * same "number".
     36  */
     37 class ValueSet : public ArenaObject<kArenaAllocGvn> {
     38  public:
     39   // Constructs an empty ValueSet which owns all its buckets.
     40   explicit ValueSet(ScopedArenaAllocator* allocator)
     41       : allocator_(allocator),
     42         num_buckets_(kMinimumNumberOfBuckets),
     43         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
     44         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
     45         num_entries_(0u) {
     46     // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
     47     DCHECK(IsPowerOfTwo(num_buckets_));
     48     std::fill_n(buckets_, num_buckets_, nullptr);
     49     buckets_owned_.SetInitialBits(num_buckets_);
     50   }
     51 
     52   // Copy constructor. Depending on the load factor, it will either make a deep
     53   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
     54   ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other)
     55       : allocator_(allocator),
     56         num_buckets_(other.IdealBucketCount()),
     57         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
     58         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
     59         num_entries_(0u) {
     60     // ArenaAllocator returns zeroed memory, so entries of buckets_ and
     61     // buckets_owned_ are initialized to null and false, respectively.
     62     DCHECK(IsPowerOfTwo(num_buckets_));
     63     PopulateFromInternal(other);
     64   }
     65 
     66   // Erases all values in this set and populates it with values from `other`.
     67   void PopulateFrom(const ValueSet& other) {
     68     if (this == &other) {
     69       return;
     70     }
     71     PopulateFromInternal(other);
     72   }
     73 
     74   // Returns true if `this` has enough buckets so that if `other` is copied into
     75   // it, the load factor will not cross the upper threshold.
     76   // If `exact_match` is set, true is returned only if `this` has the ideal
     77   // number of buckets. Larger number of buckets is allowed otherwise.
     78   bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
     79     if (exact_match) {
     80       return other.IdealBucketCount() == num_buckets_;
     81     } else {
     82       return other.IdealBucketCount() <= num_buckets_;
     83     }
     84   }
     85 
     86   // Adds an instruction in the set.
     87   void Add(HInstruction* instruction) {
     88     DCHECK(Lookup(instruction) == nullptr);
     89     size_t hash_code = HashCode(instruction);
     90     size_t index = BucketIndex(hash_code);
     91 
     92     if (!buckets_owned_.IsBitSet(index)) {
     93       CloneBucket(index);
     94     }
     95     buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
     96     ++num_entries_;
     97   }
     98 
     99   // If in the set, returns an equivalent instruction to the given instruction.
    100   // Returns null otherwise.
    101   HInstruction* Lookup(HInstruction* instruction) const {
    102     size_t hash_code = HashCode(instruction);
    103     size_t index = BucketIndex(hash_code);
    104 
    105     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
    106       if (node->GetHashCode() == hash_code) {
    107         HInstruction* existing = node->GetInstruction();
    108         if (existing->Equals(instruction)) {
    109           return existing;
    110         }
    111       }
    112     }
    113     return nullptr;
    114   }
    115 
    116   // Returns whether instruction is in the set.
    117   bool Contains(HInstruction* instruction) const {
    118     size_t hash_code = HashCode(instruction);
    119     size_t index = BucketIndex(hash_code);
    120 
    121     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
    122       if (node->GetInstruction() == instruction) {
    123         return true;
    124       }
    125     }
    126     return false;
    127   }
    128 
    129   // Removes all instructions in the set affected by the given side effects.
    130   void Kill(SideEffects side_effects) {
    131     DeleteAllImpureWhich([side_effects](Node* node) {
    132       return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
    133     });
    134   }
    135 
    136   void Clear() {
    137     num_entries_ = 0;
    138     for (size_t i = 0; i < num_buckets_; ++i) {
    139       buckets_[i] = nullptr;
    140     }
    141     buckets_owned_.SetInitialBits(num_buckets_);
    142   }
    143 
    144   // Updates this set by intersecting with instructions in a predecessor's set.
    145   void IntersectWith(ValueSet* predecessor) {
    146     if (IsEmpty()) {
    147       return;
    148     } else if (predecessor->IsEmpty()) {
    149       Clear();
    150     } else {
    151       // Pure instructions do not need to be tested because only impure
    152       // instructions can be killed.
    153       DeleteAllImpureWhich([predecessor](Node* node) {
    154         return !predecessor->Contains(node->GetInstruction());
    155       });
    156     }
    157   }
    158 
    159   bool IsEmpty() const { return num_entries_ == 0; }
    160   size_t GetNumberOfEntries() const { return num_entries_; }
    161 
    162  private:
    163   // Copies all entries from `other` to `this`.
    164   void PopulateFromInternal(const ValueSet& other) {
    165     DCHECK_NE(this, &other);
    166     DCHECK_GE(num_buckets_, other.IdealBucketCount());
    167 
    168     if (num_buckets_ == other.num_buckets_) {
    169       // Hash table remains the same size. We copy the bucket pointers and leave
    170       // all buckets_owned_ bits false.
    171       buckets_owned_.ClearAllBits();
    172       memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
    173     } else {
    174       // Hash table size changes. We copy and rehash all entries, and set all
    175       // buckets_owned_ bits to true.
    176       std::fill_n(buckets_, num_buckets_, nullptr);
    177       for (size_t i = 0; i < other.num_buckets_; ++i) {
    178         for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
    179           size_t new_index = BucketIndex(node->GetHashCode());
    180           buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
    181         }
    182       }
    183       buckets_owned_.SetInitialBits(num_buckets_);
    184     }
    185 
    186     num_entries_ = other.num_entries_;
    187   }
    188 
    189   class Node : public ArenaObject<kArenaAllocGvn> {
    190    public:
    191     Node(HInstruction* instruction, size_t hash_code, Node* next)
    192         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
    193 
    194     size_t GetHashCode() const { return hash_code_; }
    195     HInstruction* GetInstruction() const { return instruction_; }
    196     Node* GetNext() const { return next_; }
    197     void SetNext(Node* node) { next_ = node; }
    198 
    199     Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) {
    200       return new (allocator) Node(instruction_, hash_code_, new_next);
    201     }
    202 
    203    private:
    204     HInstruction* const instruction_;
    205     const size_t hash_code_;
    206     Node* next_;
    207 
    208     DISALLOW_COPY_AND_ASSIGN(Node);
    209   };
    210 
    211   // Creates our own copy of a bucket that is currently pointing to a parent.
    212   // This algorithm can be called while iterating over the bucket because it
    213   // preserves the order of entries in the bucket and will return the clone of
    214   // the given 'iterator'.
    215   Node* CloneBucket(size_t index, Node* iterator = nullptr) {
    216     DCHECK(!buckets_owned_.IsBitSet(index));
    217     Node* clone_current = nullptr;
    218     Node* clone_previous = nullptr;
    219     Node* clone_iterator = nullptr;
    220     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
    221       clone_current = node->Dup(allocator_, nullptr);
    222       if (node == iterator) {
    223         clone_iterator = clone_current;
    224       }
    225       if (clone_previous == nullptr) {
    226         buckets_[index] = clone_current;
    227       } else {
    228         clone_previous->SetNext(clone_current);
    229       }
    230       clone_previous = clone_current;
    231     }
    232     buckets_owned_.SetBit(index);
    233     return clone_iterator;
    234   }
    235 
    236   // Iterates over buckets with impure instructions (even indices) and deletes
    237   // the ones on which 'cond' returns true.
    238   template<typename Functor>
    239   void DeleteAllImpureWhich(Functor cond) {
    240     for (size_t i = 0; i < num_buckets_; i += 2) {
    241       Node* node = buckets_[i];
    242       Node* previous = nullptr;
    243 
    244       if (node == nullptr) {
    245         continue;
    246       }
    247 
    248       if (!buckets_owned_.IsBitSet(i)) {
    249         // Bucket is not owned but maybe we won't need to change it at all.
    250         // Iterate as long as the entries don't satisfy 'cond'.
    251         while (node != nullptr) {
    252           if (cond(node)) {
    253             // We do need to delete an entry but we do not own the bucket.
    254             // Clone the bucket, make sure 'previous' and 'node' point to
    255             // the cloned entries and break.
    256             previous = CloneBucket(i, previous);
    257             node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
    258             break;
    259           }
    260           previous = node;
    261           node = node->GetNext();
    262         }
    263       }
    264 
    265       // By this point we either own the bucket and can start deleting entries,
    266       // or we do not own it but no entries matched 'cond'.
    267       DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
    268 
    269       // We iterate over the remainder of entries and delete those that match
    270       // the given condition.
    271       while (node != nullptr) {
    272         Node* next = node->GetNext();
    273         if (cond(node)) {
    274           if (previous == nullptr) {
    275             buckets_[i] = next;
    276           } else {
    277             previous->SetNext(next);
    278           }
    279         } else {
    280           previous = node;
    281         }
    282         node = next;
    283       }
    284     }
    285   }
    286 
    287   // Computes a bucket count such that the load factor is reasonable.
    288   // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
    289   size_t IdealBucketCount() const {
    290     size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
    291     if (bucket_count > kMinimumNumberOfBuckets) {
    292       return bucket_count;
    293     } else {
    294       return kMinimumNumberOfBuckets;
    295     }
    296   }
    297 
    298   // Generates a hash code for an instruction.
    299   size_t HashCode(HInstruction* instruction) const {
    300     size_t hash_code = instruction->ComputeHashCode();
    301     // Pure instructions are put into odd buckets to speed up deletion. Note that in the
    302     // case of irreducible loops, we don't put pure instructions in odd buckets, as we
    303     // need to delete them when entering the loop.
    304     // ClinitCheck is treated as a pure instruction since it's only executed
    305     // once.
    306     bool pure = !instruction->GetSideEffects().HasDependencies() ||
    307                 instruction->IsClinitCheck();
    308     if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
    309       return (hash_code << 1) | 0;
    310     } else {
    311       return (hash_code << 1) | 1;
    312     }
    313   }
    314 
    315   // Converts a hash code to a bucket index.
    316   size_t BucketIndex(size_t hash_code) const {
    317     return hash_code & (num_buckets_ - 1);
    318   }
    319 
    320   ScopedArenaAllocator* const allocator_;
    321 
    322   // The internal bucket implementation of the set.
    323   size_t const num_buckets_;
    324   Node** const buckets_;
    325 
    326   // Flags specifying which buckets were copied into the set from its parent.
    327   // If a flag is not set, the corresponding bucket points to entries in the
    328   // parent and must be cloned prior to making changes.
    329   ArenaBitVector buckets_owned_;
    330 
    331   // The number of entries in the set.
    332   size_t num_entries_;
    333 
    334   static constexpr size_t kMinimumNumberOfBuckets = 8;
    335 
    336   DISALLOW_COPY_AND_ASSIGN(ValueSet);
    337 };
    338 
    339 /**
    340  * Optimization phase that removes redundant instruction.
    341  */
    342 class GlobalValueNumberer : public ValueObject {
    343  public:
    344   GlobalValueNumberer(HGraph* graph,
    345                       const SideEffectsAnalysis& side_effects)
    346       : graph_(graph),
    347         allocator_(graph->GetArenaStack()),
    348         side_effects_(side_effects),
    349         sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)),
    350         visited_blocks_(
    351             &allocator_, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {
    352     visited_blocks_.ClearAllBits();
    353   }
    354 
    355   void Run();
    356 
    357  private:
    358   // Per-block GVN. Will also update the ValueSet of the dominated and
    359   // successor blocks.
    360   void VisitBasicBlock(HBasicBlock* block);
    361 
    362   HGraph* graph_;
    363   ScopedArenaAllocator allocator_;
    364   const SideEffectsAnalysis& side_effects_;
    365 
    366   ValueSet* FindSetFor(HBasicBlock* block) const {
    367     ValueSet* result = sets_[block->GetBlockId()];
    368     DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
    369     return result;
    370   }
    371 
    372   void AbandonSetFor(HBasicBlock* block) {
    373     DCHECK(sets_[block->GetBlockId()] != nullptr)
    374         << "Block B" << block->GetBlockId() << " expected to have a set";
    375     sets_[block->GetBlockId()] = nullptr;
    376   }
    377 
    378   // Returns false if the GlobalValueNumberer has already visited all blocks
    379   // which may reference `block`.
    380   bool WillBeReferencedAgain(HBasicBlock* block) const;
    381 
    382   // Iterates over visited blocks and finds one which has a ValueSet such that:
    383   // (a) it will not be referenced in the future, and
    384   // (b) it can hold a copy of `reference_set` with a reasonable load factor.
    385   HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
    386                                                  const ValueSet& reference_set) const;
    387 
    388   // ValueSet for blocks. Initially null, but for an individual block they
    389   // are allocated and populated by the dominator, and updated by all blocks
    390   // in the path from the dominator to the block.
    391   ScopedArenaVector<ValueSet*> sets_;
    392 
    393   // BitVector which serves as a fast-access map from block id to
    394   // visited/unvisited Boolean.
    395   ArenaBitVector visited_blocks_;
    396 
    397   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
    398 };
    399 
    400 void GlobalValueNumberer::Run() {
    401   DCHECK(side_effects_.HasRun());
    402   sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_);
    403 
    404   // Use the reverse post order to ensure the non back-edge predecessors of a block are
    405   // visited before the block itself.
    406   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
    407     VisitBasicBlock(block);
    408   }
    409 }
    410 
    411 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
    412   ValueSet* set = nullptr;
    413 
    414   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
    415   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
    416     // The entry block should only accumulate constant instructions, and
    417     // the builder puts constants only in the entry block.
    418     // Therefore, there is no need to propagate the value set to the next block.
    419     set = new (&allocator_) ValueSet(&allocator_);
    420   } else {
    421     HBasicBlock* dominator = block->GetDominator();
    422     ValueSet* dominator_set = FindSetFor(dominator);
    423 
    424     if (dominator->GetSuccessors().size() == 1) {
    425       // `block` is a direct successor of its dominator. No need to clone the
    426       // dominator's set, `block` can take over its ownership including its buckets.
    427       DCHECK_EQ(dominator->GetSingleSuccessor(), block);
    428       AbandonSetFor(dominator);
    429       set = dominator_set;
    430     } else {
    431       // Try to find a basic block which will never be referenced again and whose
    432       // ValueSet can therefore be recycled. We will need to copy `dominator_set`
    433       // into the recycled set, so we pass `dominator_set` as a reference for size.
    434       HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
    435       if (recyclable == nullptr) {
    436         // No block with a suitable ValueSet found. Allocate a new one and
    437         // copy `dominator_set` into it.
    438         set = new (&allocator_) ValueSet(&allocator_, *dominator_set);
    439       } else {
    440         // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
    441         set = FindSetFor(recyclable);
    442         AbandonSetFor(recyclable);
    443         set->PopulateFrom(*dominator_set);
    444       }
    445     }
    446 
    447     if (!set->IsEmpty()) {
    448       if (block->IsLoopHeader()) {
    449         if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
    450           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
    451           // loop header. We clear the set at entry of irreducible loops and any loop containing
    452           // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
    453           // across the irreducible loop.
    454           // Note that, if we're not compiling OSR, we could still do GVN and introduce
    455           // phis at irreducible loop headers. We decided it was not worth the complexity.
    456           set->Clear();
    457         } else {
    458           DCHECK(!block->GetLoopInformation()->IsIrreducible());
    459           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
    460           set->Kill(side_effects_.GetLoopEffects(block));
    461         }
    462       } else if (predecessors.size() > 1) {
    463         for (HBasicBlock* predecessor : predecessors) {
    464           set->IntersectWith(FindSetFor(predecessor));
    465           if (set->IsEmpty()) {
    466             break;
    467           }
    468         }
    469       }
    470     }
    471   }
    472 
    473   sets_[block->GetBlockId()] = set;
    474 
    475   HInstruction* current = block->GetFirstInstruction();
    476   while (current != nullptr) {
    477     // Save the next instruction in case `current` is removed from the graph.
    478     HInstruction* next = current->GetNext();
    479     // Do not kill the set with the side effects of the instruction just now: if
    480     // the instruction is GVN'ed, we don't need to kill.
    481     if (current->CanBeMoved()) {
    482       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
    483         // For commutative ops, (x op y) will be treated the same as (y op x)
    484         // after fixed ordering.
    485         current->AsBinaryOperation()->OrderInputs();
    486       }
    487       HInstruction* existing = set->Lookup(current);
    488       if (existing != nullptr) {
    489         // This replacement doesn't make more OrderInputs() necessary since
    490         // current is either used by an instruction that it dominates,
    491         // which hasn't been visited yet due to the order we visit instructions.
    492         // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
    493         current->ReplaceWith(existing);
    494         current->GetBlock()->RemoveInstruction(current);
    495       } else {
    496         set->Kill(current->GetSideEffects());
    497         set->Add(current);
    498       }
    499     } else {
    500       set->Kill(current->GetSideEffects());
    501     }
    502     current = next;
    503   }
    504 
    505   visited_blocks_.SetBit(block->GetBlockId());
    506 }
    507 
    508 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
    509   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
    510 
    511   for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
    512     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
    513       return true;
    514     }
    515   }
    516 
    517   for (const HBasicBlock* successor : block->GetSuccessors()) {
    518     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
    519       return true;
    520     }
    521   }
    522 
    523   return false;
    524 }
    525 
    526 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
    527     HBasicBlock* block, const ValueSet& reference_set) const {
    528   HBasicBlock* secondary_match = nullptr;
    529 
    530   for (size_t block_id : visited_blocks_.Indexes()) {
    531     ValueSet* current_set = sets_[block_id];
    532     if (current_set == nullptr) {
    533       // Set was already recycled.
    534       continue;
    535     }
    536 
    537     HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
    538 
    539     // We test if `current_set` has enough buckets to store a copy of
    540     // `reference_set` with a reasonable load factor. If we find a set whose
    541     // number of buckets matches perfectly, we return right away. If we find one
    542     // that is larger, we return it if no perfectly-matching set is found.
    543     // Note that we defer testing WillBeReferencedAgain until all other criteria
    544     // have been satisfied because it might be expensive.
    545     if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
    546       if (!WillBeReferencedAgain(current_block)) {
    547         return current_block;
    548       }
    549     } else if (secondary_match == nullptr &&
    550                current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
    551       if (!WillBeReferencedAgain(current_block)) {
    552         secondary_match = current_block;
    553       }
    554     }
    555   }
    556 
    557   return secondary_match;
    558 }
    559 
    560 void GVNOptimization::Run() {
    561   GlobalValueNumberer gvn(graph_, side_effects_);
    562   gvn.Run();
    563 }
    564 
    565 }  // namespace art
    566