Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "gvn.h"
     18 
     19 #include "base/arena_bit_vector.h"
     20 #include "base/arena_containers.h"
     21 #include "base/bit_vector-inl.h"
     22 #include "side_effects_analysis.h"
     23 #include "utils.h"
     24 
     25 namespace art {
     26 
     27 /**
     28  * A ValueSet holds instructions that can replace other instructions. It is updated
     29  * through the `Add` method, and the `Kill` method. The `Kill` method removes
     30  * instructions that are affected by the given side effect.
     31  *
     32  * The `Lookup` method returns an equivalent instruction to the given instruction
     33  * if there is one in the set. In GVN, we would say those instructions have the
     34  * same "number".
     35  */
     36 class ValueSet : public ArenaObject<kArenaAllocGvn> {
     37  public:
     38   // Constructs an empty ValueSet which owns all its buckets.
     39   explicit ValueSet(ArenaAllocator* allocator)
     40       : allocator_(allocator),
     41         num_buckets_(kMinimumNumberOfBuckets),
     42         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
     43         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
     44         num_entries_(0u) {
     45     // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
     46     DCHECK(IsPowerOfTwo(num_buckets_));
     47     buckets_owned_.SetInitialBits(num_buckets_);
     48   }
     49 
     50   // Copy constructor. Depending on the load factor, it will either make a deep
     51   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
     52   ValueSet(ArenaAllocator* allocator, const ValueSet& other)
     53       : allocator_(allocator),
     54         num_buckets_(other.IdealBucketCount()),
     55         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
     56         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
     57         num_entries_(0u) {
     58     // ArenaAllocator returns zeroed memory, so entries of buckets_ and
     59     // buckets_owned_ are initialized to null and false, respectively.
     60     DCHECK(IsPowerOfTwo(num_buckets_));
     61     PopulateFromInternal(other, /* is_dirty */ false);
     62   }
     63 
     64   // Erases all values in this set and populates it with values from `other`.
     65   void PopulateFrom(const ValueSet& other) {
     66     if (this == &other) {
     67       return;
     68     }
     69     PopulateFromInternal(other, /* is_dirty */ true);
     70   }
     71 
     72   // Returns true if `this` has enough buckets so that if `other` is copied into
     73   // it, the load factor will not cross the upper threshold.
     74   // If `exact_match` is set, true is returned only if `this` has the ideal
     75   // number of buckets. Larger number of buckets is allowed otherwise.
     76   bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
     77     if (exact_match) {
     78       return other.IdealBucketCount() == num_buckets_;
     79     } else {
     80       return other.IdealBucketCount() <= num_buckets_;
     81     }
     82   }
     83 
     84   // Adds an instruction in the set.
     85   void Add(HInstruction* instruction) {
     86     DCHECK(Lookup(instruction) == nullptr);
     87     size_t hash_code = HashCode(instruction);
     88     size_t index = BucketIndex(hash_code);
     89 
     90     if (!buckets_owned_.IsBitSet(index)) {
     91       CloneBucket(index);
     92     }
     93     buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
     94     ++num_entries_;
     95   }
     96 
     97   // If in the set, returns an equivalent instruction to the given instruction.
     98   // Returns null otherwise.
     99   HInstruction* Lookup(HInstruction* instruction) const {
    100     size_t hash_code = HashCode(instruction);
    101     size_t index = BucketIndex(hash_code);
    102 
    103     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
    104       if (node->GetHashCode() == hash_code) {
    105         HInstruction* existing = node->GetInstruction();
    106         if (existing->Equals(instruction)) {
    107           return existing;
    108         }
    109       }
    110     }
    111     return nullptr;
    112   }
    113 
    114   // Returns whether instruction is in the set.
    115   bool Contains(HInstruction* instruction) const {
    116     size_t hash_code = HashCode(instruction);
    117     size_t index = BucketIndex(hash_code);
    118 
    119     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
    120       if (node->GetInstruction() == instruction) {
    121         return true;
    122       }
    123     }
    124     return false;
    125   }
    126 
    127   // Removes all instructions in the set affected by the given side effects.
    128   void Kill(SideEffects side_effects) {
    129     DeleteAllImpureWhich([side_effects](Node* node) {
    130       return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
    131     });
    132   }
    133 
    134   void Clear() {
    135     num_entries_ = 0;
    136     for (size_t i = 0; i < num_buckets_; ++i) {
    137       buckets_[i] = nullptr;
    138     }
    139     buckets_owned_.SetInitialBits(num_buckets_);
    140   }
    141 
    142   // Updates this set by intersecting with instructions in a predecessor's set.
    143   void IntersectWith(ValueSet* predecessor) {
    144     if (IsEmpty()) {
    145       return;
    146     } else if (predecessor->IsEmpty()) {
    147       Clear();
    148     } else {
    149       // Pure instructions do not need to be tested because only impure
    150       // instructions can be killed.
    151       DeleteAllImpureWhich([predecessor](Node* node) {
    152         return !predecessor->Contains(node->GetInstruction());
    153       });
    154     }
    155   }
    156 
    157   bool IsEmpty() const { return num_entries_ == 0; }
    158   size_t GetNumberOfEntries() const { return num_entries_; }
    159 
    160  private:
    161   // Copies all entries from `other` to `this`.
    162   // If `is_dirty` is set to true, existing data will be wiped first. It is
    163   // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise.
    164   void PopulateFromInternal(const ValueSet& other, bool is_dirty) {
    165     DCHECK_NE(this, &other);
    166     DCHECK_GE(num_buckets_, other.IdealBucketCount());
    167 
    168     if (num_buckets_ == other.num_buckets_) {
    169       // Hash table remains the same size. We copy the bucket pointers and leave
    170       // all buckets_owned_ bits false.
    171       if (is_dirty) {
    172         buckets_owned_.ClearAllBits();
    173       } else {
    174         DCHECK_EQ(buckets_owned_.NumSetBits(), 0u);
    175       }
    176       memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
    177     } else {
    178       // Hash table size changes. We copy and rehash all entries, and set all
    179       // buckets_owned_ bits to true.
    180       if (is_dirty) {
    181         memset(buckets_, 0, num_buckets_ * sizeof(Node*));
    182       } else {
    183         if (kIsDebugBuild) {
    184           for (size_t i = 0; i < num_buckets_; ++i) {
    185             DCHECK(buckets_[i] == nullptr) << i;
    186           }
    187         }
    188       }
    189       for (size_t i = 0; i < other.num_buckets_; ++i) {
    190         for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
    191           size_t new_index = BucketIndex(node->GetHashCode());
    192           buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
    193         }
    194       }
    195       buckets_owned_.SetInitialBits(num_buckets_);
    196     }
    197 
    198     num_entries_ = other.num_entries_;
    199   }
    200 
    201   class Node : public ArenaObject<kArenaAllocGvn> {
    202    public:
    203     Node(HInstruction* instruction, size_t hash_code, Node* next)
    204         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
    205 
    206     size_t GetHashCode() const { return hash_code_; }
    207     HInstruction* GetInstruction() const { return instruction_; }
    208     Node* GetNext() const { return next_; }
    209     void SetNext(Node* node) { next_ = node; }
    210 
    211     Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) {
    212       return new (allocator) Node(instruction_, hash_code_, new_next);
    213     }
    214 
    215    private:
    216     HInstruction* const instruction_;
    217     const size_t hash_code_;
    218     Node* next_;
    219 
    220     DISALLOW_COPY_AND_ASSIGN(Node);
    221   };
    222 
    223   // Creates our own copy of a bucket that is currently pointing to a parent.
    224   // This algorithm can be called while iterating over the bucket because it
    225   // preserves the order of entries in the bucket and will return the clone of
    226   // the given 'iterator'.
    227   Node* CloneBucket(size_t index, Node* iterator = nullptr) {
    228     DCHECK(!buckets_owned_.IsBitSet(index));
    229     Node* clone_current = nullptr;
    230     Node* clone_previous = nullptr;
    231     Node* clone_iterator = nullptr;
    232     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
    233       clone_current = node->Dup(allocator_, nullptr);
    234       if (node == iterator) {
    235         clone_iterator = clone_current;
    236       }
    237       if (clone_previous == nullptr) {
    238         buckets_[index] = clone_current;
    239       } else {
    240         clone_previous->SetNext(clone_current);
    241       }
    242       clone_previous = clone_current;
    243     }
    244     buckets_owned_.SetBit(index);
    245     return clone_iterator;
    246   }
    247 
    248   // Iterates over buckets with impure instructions (even indices) and deletes
    249   // the ones on which 'cond' returns true.
    250   template<typename Functor>
    251   void DeleteAllImpureWhich(Functor cond) {
    252     for (size_t i = 0; i < num_buckets_; i += 2) {
    253       Node* node = buckets_[i];
    254       Node* previous = nullptr;
    255 
    256       if (node == nullptr) {
    257         continue;
    258       }
    259 
    260       if (!buckets_owned_.IsBitSet(i)) {
    261         // Bucket is not owned but maybe we won't need to change it at all.
    262         // Iterate as long as the entries don't satisfy 'cond'.
    263         while (node != nullptr) {
    264           if (cond(node)) {
    265             // We do need to delete an entry but we do not own the bucket.
    266             // Clone the bucket, make sure 'previous' and 'node' point to
    267             // the cloned entries and break.
    268             previous = CloneBucket(i, previous);
    269             node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
    270             break;
    271           }
    272           previous = node;
    273           node = node->GetNext();
    274         }
    275       }
    276 
    277       // By this point we either own the bucket and can start deleting entries,
    278       // or we do not own it but no entries matched 'cond'.
    279       DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
    280 
    281       // We iterate over the remainder of entries and delete those that match
    282       // the given condition.
    283       while (node != nullptr) {
    284         Node* next = node->GetNext();
    285         if (cond(node)) {
    286           if (previous == nullptr) {
    287             buckets_[i] = next;
    288           } else {
    289             previous->SetNext(next);
    290           }
    291         } else {
    292           previous = node;
    293         }
    294         node = next;
    295       }
    296     }
    297   }
    298 
    299   // Computes a bucket count such that the load factor is reasonable.
    300   // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
    301   size_t IdealBucketCount() const {
    302     size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
    303     if (bucket_count > kMinimumNumberOfBuckets) {
    304       return bucket_count;
    305     } else {
    306       return kMinimumNumberOfBuckets;
    307     }
    308   }
    309 
    310   // Generates a hash code for an instruction.
    311   size_t HashCode(HInstruction* instruction) const {
    312     size_t hash_code = instruction->ComputeHashCode();
    313     // Pure instructions are put into odd buckets to speed up deletion. Note that in the
    314     // case of irreducible loops, we don't put pure instructions in odd buckets, as we
    315     // need to delete them when entering the loop.
    316     if (instruction->GetSideEffects().HasDependencies() ||
    317         instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
    318       return (hash_code << 1) | 0;
    319     } else {
    320       return (hash_code << 1) | 1;
    321     }
    322   }
    323 
    324   // Converts a hash code to a bucket index.
    325   size_t BucketIndex(size_t hash_code) const {
    326     return hash_code & (num_buckets_ - 1);
    327   }
    328 
    329   ArenaAllocator* const allocator_;
    330 
    331   // The internal bucket implementation of the set.
    332   size_t const num_buckets_;
    333   Node** const buckets_;
    334 
    335   // Flags specifying which buckets were copied into the set from its parent.
    336   // If a flag is not set, the corresponding bucket points to entries in the
    337   // parent and must be cloned prior to making changes.
    338   ArenaBitVector buckets_owned_;
    339 
    340   // The number of entries in the set.
    341   size_t num_entries_;
    342 
    343   static constexpr size_t kMinimumNumberOfBuckets = 8;
    344 
    345   DISALLOW_COPY_AND_ASSIGN(ValueSet);
    346 };
    347 
    348 /**
    349  * Optimization phase that removes redundant instruction.
    350  */
    351 class GlobalValueNumberer : public ValueObject {
    352  public:
    353   GlobalValueNumberer(ArenaAllocator* allocator,
    354                       HGraph* graph,
    355                       const SideEffectsAnalysis& side_effects)
    356       : graph_(graph),
    357         allocator_(allocator),
    358         side_effects_(side_effects),
    359         sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)),
    360         visited_blocks_(
    361             allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {}
    362 
    363   void Run();
    364 
    365  private:
    366   // Per-block GVN. Will also update the ValueSet of the dominated and
    367   // successor blocks.
    368   void VisitBasicBlock(HBasicBlock* block);
    369 
    370   HGraph* graph_;
    371   ArenaAllocator* const allocator_;
    372   const SideEffectsAnalysis& side_effects_;
    373 
    374   ValueSet* FindSetFor(HBasicBlock* block) const {
    375     ValueSet* result = sets_[block->GetBlockId()];
    376     DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
    377     return result;
    378   }
    379 
    380   void AbandonSetFor(HBasicBlock* block) {
    381     DCHECK(sets_[block->GetBlockId()] != nullptr)
    382         << "Block B" << block->GetBlockId() << " expected to have a set";
    383     sets_[block->GetBlockId()] = nullptr;
    384   }
    385 
    386   // Returns false if the GlobalValueNumberer has already visited all blocks
    387   // which may reference `block`.
    388   bool WillBeReferencedAgain(HBasicBlock* block) const;
    389 
    390   // Iterates over visited blocks and finds one which has a ValueSet such that:
    391   // (a) it will not be referenced in the future, and
    392   // (b) it can hold a copy of `reference_set` with a reasonable load factor.
    393   HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
    394                                                  const ValueSet& reference_set) const;
    395 
    396   // ValueSet for blocks. Initially null, but for an individual block they
    397   // are allocated and populated by the dominator, and updated by all blocks
    398   // in the path from the dominator to the block.
    399   ArenaVector<ValueSet*> sets_;
    400 
    401   // BitVector which serves as a fast-access map from block id to
    402   // visited/unvisited boolean.
    403   ArenaBitVector visited_blocks_;
    404 
    405   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
    406 };
    407 
    408 void GlobalValueNumberer::Run() {
    409   DCHECK(side_effects_.HasRun());
    410   sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_);
    411 
    412   // Use the reverse post order to ensure the non back-edge predecessors of a block are
    413   // visited before the block itself.
    414   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
    415     VisitBasicBlock(it.Current());
    416   }
    417 }
    418 
    419 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
    420   ValueSet* set = nullptr;
    421 
    422   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
    423   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
    424     // The entry block should only accumulate constant instructions, and
    425     // the builder puts constants only in the entry block.
    426     // Therefore, there is no need to propagate the value set to the next block.
    427     set = new (allocator_) ValueSet(allocator_);
    428   } else {
    429     HBasicBlock* dominator = block->GetDominator();
    430     ValueSet* dominator_set = FindSetFor(dominator);
    431 
    432     if (dominator->GetSuccessors().size() == 1) {
    433       // `block` is a direct successor of its dominator. No need to clone the
    434       // dominator's set, `block` can take over its ownership including its buckets.
    435       DCHECK_EQ(dominator->GetSingleSuccessor(), block);
    436       AbandonSetFor(dominator);
    437       set = dominator_set;
    438     } else {
    439       // Try to find a basic block which will never be referenced again and whose
    440       // ValueSet can therefore be recycled. We will need to copy `dominator_set`
    441       // into the recycled set, so we pass `dominator_set` as a reference for size.
    442       HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
    443       if (recyclable == nullptr) {
    444         // No block with a suitable ValueSet found. Allocate a new one and
    445         // copy `dominator_set` into it.
    446         set = new (allocator_) ValueSet(allocator_, *dominator_set);
    447       } else {
    448         // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
    449         set = FindSetFor(recyclable);
    450         AbandonSetFor(recyclable);
    451         set->PopulateFrom(*dominator_set);
    452       }
    453     }
    454 
    455     if (!set->IsEmpty()) {
    456       if (block->IsLoopHeader()) {
    457         if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
    458           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
    459           // loop header. We clear the set at entry of irreducible loops and any loop containing
    460           // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
    461           // across the irreducible loop.
    462           // Note that, if we're not compiling OSR, we could still do GVN and introduce
    463           // phis at irreducible loop headers. We decided it was not worth the complexity.
    464           set->Clear();
    465         } else {
    466           DCHECK(!block->GetLoopInformation()->IsIrreducible());
    467           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
    468           set->Kill(side_effects_.GetLoopEffects(block));
    469         }
    470       } else if (predecessors.size() > 1) {
    471         for (HBasicBlock* predecessor : predecessors) {
    472           set->IntersectWith(FindSetFor(predecessor));
    473           if (set->IsEmpty()) {
    474             break;
    475           }
    476         }
    477       }
    478     }
    479   }
    480 
    481   sets_[block->GetBlockId()] = set;
    482 
    483   HInstruction* current = block->GetFirstInstruction();
    484   while (current != nullptr) {
    485     // Save the next instruction in case `current` is removed from the graph.
    486     HInstruction* next = current->GetNext();
    487     // Do not kill the set with the side effects of the instruction just now: if
    488     // the instruction is GVN'ed, we don't need to kill.
    489     if (current->CanBeMoved()) {
    490       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
    491         // For commutative ops, (x op y) will be treated the same as (y op x)
    492         // after fixed ordering.
    493         current->AsBinaryOperation()->OrderInputs();
    494       }
    495       HInstruction* existing = set->Lookup(current);
    496       if (existing != nullptr) {
    497         // This replacement doesn't make more OrderInputs() necessary since
    498         // current is either used by an instruction that it dominates,
    499         // which hasn't been visited yet due to the order we visit instructions.
    500         // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
    501         current->ReplaceWith(existing);
    502         current->GetBlock()->RemoveInstruction(current);
    503       } else {
    504         set->Kill(current->GetSideEffects());
    505         set->Add(current);
    506       }
    507     } else {
    508       set->Kill(current->GetSideEffects());
    509     }
    510     current = next;
    511   }
    512 
    513   visited_blocks_.SetBit(block->GetBlockId());
    514 }
    515 
    516 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
    517   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
    518 
    519   for (auto dominated_block : block->GetDominatedBlocks()) {
    520     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
    521       return true;
    522     }
    523   }
    524 
    525   for (auto successor : block->GetSuccessors()) {
    526     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
    527       return true;
    528     }
    529   }
    530 
    531   return false;
    532 }
    533 
    534 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
    535     HBasicBlock* block, const ValueSet& reference_set) const {
    536   HBasicBlock* secondary_match = nullptr;
    537 
    538   for (size_t block_id : visited_blocks_.Indexes()) {
    539     ValueSet* current_set = sets_[block_id];
    540     if (current_set == nullptr) {
    541       // Set was already recycled.
    542       continue;
    543     }
    544 
    545     HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
    546 
    547     // We test if `current_set` has enough buckets to store a copy of
    548     // `reference_set` with a reasonable load factor. If we find a set whose
    549     // number of buckets matches perfectly, we return right away. If we find one
    550     // that is larger, we return it if no perfectly-matching set is found.
    551     // Note that we defer testing WillBeReferencedAgain until all other criteria
    552     // have been satisfied because it might be expensive.
    553     if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
    554       if (!WillBeReferencedAgain(current_block)) {
    555         return current_block;
    556       }
    557     } else if (secondary_match == nullptr &&
    558                current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
    559       if (!WillBeReferencedAgain(current_block)) {
    560         secondary_match = current_block;
    561       }
    562     }
    563   }
    564 
    565   return secondary_match;
    566 }
    567 
    568 void GVNOptimization::Run() {
    569   GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_);
    570   gvn.Run();
    571 }
    572 
    573 }  // namespace art
    574