1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "gvn.h" 18 19 #include "base/arena_bit_vector.h" 20 #include "base/arena_containers.h" 21 #include "base/bit_vector-inl.h" 22 #include "side_effects_analysis.h" 23 #include "utils.h" 24 25 namespace art { 26 27 /** 28 * A ValueSet holds instructions that can replace other instructions. It is updated 29 * through the `Add` method, and the `Kill` method. The `Kill` method removes 30 * instructions that are affected by the given side effect. 31 * 32 * The `Lookup` method returns an equivalent instruction to the given instruction 33 * if there is one in the set. In GVN, we would say those instructions have the 34 * same "number". 35 */ 36 class ValueSet : public ArenaObject<kArenaAllocGvn> { 37 public: 38 // Constructs an empty ValueSet which owns all its buckets. 39 explicit ValueSet(ArenaAllocator* allocator) 40 : allocator_(allocator), 41 num_buckets_(kMinimumNumberOfBuckets), 42 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), 43 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), 44 num_entries_(0u) { 45 // ArenaAllocator returns zeroed memory, so no need to set buckets to null. 46 DCHECK(IsPowerOfTwo(num_buckets_)); 47 buckets_owned_.SetInitialBits(num_buckets_); 48 } 49 50 // Copy constructor. Depending on the load factor, it will either make a deep 51 // copy (all buckets owned) or a shallow one (buckets pointing to the parent). 52 ValueSet(ArenaAllocator* allocator, const ValueSet& other) 53 : allocator_(allocator), 54 num_buckets_(other.IdealBucketCount()), 55 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), 56 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), 57 num_entries_(0u) { 58 // ArenaAllocator returns zeroed memory, so entries of buckets_ and 59 // buckets_owned_ are initialized to null and false, respectively. 60 DCHECK(IsPowerOfTwo(num_buckets_)); 61 PopulateFromInternal(other, /* is_dirty */ false); 62 } 63 64 // Erases all values in this set and populates it with values from `other`. 65 void PopulateFrom(const ValueSet& other) { 66 if (this == &other) { 67 return; 68 } 69 PopulateFromInternal(other, /* is_dirty */ true); 70 } 71 72 // Returns true if `this` has enough buckets so that if `other` is copied into 73 // it, the load factor will not cross the upper threshold. 74 // If `exact_match` is set, true is returned only if `this` has the ideal 75 // number of buckets. Larger number of buckets is allowed otherwise. 76 bool CanHoldCopyOf(const ValueSet& other, bool exact_match) { 77 if (exact_match) { 78 return other.IdealBucketCount() == num_buckets_; 79 } else { 80 return other.IdealBucketCount() <= num_buckets_; 81 } 82 } 83 84 // Adds an instruction in the set. 85 void Add(HInstruction* instruction) { 86 DCHECK(Lookup(instruction) == nullptr); 87 size_t hash_code = HashCode(instruction); 88 size_t index = BucketIndex(hash_code); 89 90 if (!buckets_owned_.IsBitSet(index)) { 91 CloneBucket(index); 92 } 93 buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]); 94 ++num_entries_; 95 } 96 97 // If in the set, returns an equivalent instruction to the given instruction. 98 // Returns null otherwise. 99 HInstruction* Lookup(HInstruction* instruction) const { 100 size_t hash_code = HashCode(instruction); 101 size_t index = BucketIndex(hash_code); 102 103 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { 104 if (node->GetHashCode() == hash_code) { 105 HInstruction* existing = node->GetInstruction(); 106 if (existing->Equals(instruction)) { 107 return existing; 108 } 109 } 110 } 111 return nullptr; 112 } 113 114 // Returns whether instruction is in the set. 115 bool Contains(HInstruction* instruction) const { 116 size_t hash_code = HashCode(instruction); 117 size_t index = BucketIndex(hash_code); 118 119 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { 120 if (node->GetInstruction() == instruction) { 121 return true; 122 } 123 } 124 return false; 125 } 126 127 // Removes all instructions in the set affected by the given side effects. 128 void Kill(SideEffects side_effects) { 129 DeleteAllImpureWhich([side_effects](Node* node) { 130 return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects); 131 }); 132 } 133 134 void Clear() { 135 num_entries_ = 0; 136 for (size_t i = 0; i < num_buckets_; ++i) { 137 buckets_[i] = nullptr; 138 } 139 buckets_owned_.SetInitialBits(num_buckets_); 140 } 141 142 // Updates this set by intersecting with instructions in a predecessor's set. 143 void IntersectWith(ValueSet* predecessor) { 144 if (IsEmpty()) { 145 return; 146 } else if (predecessor->IsEmpty()) { 147 Clear(); 148 } else { 149 // Pure instructions do not need to be tested because only impure 150 // instructions can be killed. 151 DeleteAllImpureWhich([predecessor](Node* node) { 152 return !predecessor->Contains(node->GetInstruction()); 153 }); 154 } 155 } 156 157 bool IsEmpty() const { return num_entries_ == 0; } 158 size_t GetNumberOfEntries() const { return num_entries_; } 159 160 private: 161 // Copies all entries from `other` to `this`. 162 // If `is_dirty` is set to true, existing data will be wiped first. It is 163 // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise. 164 void PopulateFromInternal(const ValueSet& other, bool is_dirty) { 165 DCHECK_NE(this, &other); 166 DCHECK_GE(num_buckets_, other.IdealBucketCount()); 167 168 if (num_buckets_ == other.num_buckets_) { 169 // Hash table remains the same size. We copy the bucket pointers and leave 170 // all buckets_owned_ bits false. 171 if (is_dirty) { 172 buckets_owned_.ClearAllBits(); 173 } else { 174 DCHECK_EQ(buckets_owned_.NumSetBits(), 0u); 175 } 176 memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*)); 177 } else { 178 // Hash table size changes. We copy and rehash all entries, and set all 179 // buckets_owned_ bits to true. 180 if (is_dirty) { 181 memset(buckets_, 0, num_buckets_ * sizeof(Node*)); 182 } else { 183 if (kIsDebugBuild) { 184 for (size_t i = 0; i < num_buckets_; ++i) { 185 DCHECK(buckets_[i] == nullptr) << i; 186 } 187 } 188 } 189 for (size_t i = 0; i < other.num_buckets_; ++i) { 190 for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) { 191 size_t new_index = BucketIndex(node->GetHashCode()); 192 buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]); 193 } 194 } 195 buckets_owned_.SetInitialBits(num_buckets_); 196 } 197 198 num_entries_ = other.num_entries_; 199 } 200 201 class Node : public ArenaObject<kArenaAllocGvn> { 202 public: 203 Node(HInstruction* instruction, size_t hash_code, Node* next) 204 : instruction_(instruction), hash_code_(hash_code), next_(next) {} 205 206 size_t GetHashCode() const { return hash_code_; } 207 HInstruction* GetInstruction() const { return instruction_; } 208 Node* GetNext() const { return next_; } 209 void SetNext(Node* node) { next_ = node; } 210 211 Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) { 212 return new (allocator) Node(instruction_, hash_code_, new_next); 213 } 214 215 private: 216 HInstruction* const instruction_; 217 const size_t hash_code_; 218 Node* next_; 219 220 DISALLOW_COPY_AND_ASSIGN(Node); 221 }; 222 223 // Creates our own copy of a bucket that is currently pointing to a parent. 224 // This algorithm can be called while iterating over the bucket because it 225 // preserves the order of entries in the bucket and will return the clone of 226 // the given 'iterator'. 227 Node* CloneBucket(size_t index, Node* iterator = nullptr) { 228 DCHECK(!buckets_owned_.IsBitSet(index)); 229 Node* clone_current = nullptr; 230 Node* clone_previous = nullptr; 231 Node* clone_iterator = nullptr; 232 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { 233 clone_current = node->Dup(allocator_, nullptr); 234 if (node == iterator) { 235 clone_iterator = clone_current; 236 } 237 if (clone_previous == nullptr) { 238 buckets_[index] = clone_current; 239 } else { 240 clone_previous->SetNext(clone_current); 241 } 242 clone_previous = clone_current; 243 } 244 buckets_owned_.SetBit(index); 245 return clone_iterator; 246 } 247 248 // Iterates over buckets with impure instructions (even indices) and deletes 249 // the ones on which 'cond' returns true. 250 template<typename Functor> 251 void DeleteAllImpureWhich(Functor cond) { 252 for (size_t i = 0; i < num_buckets_; i += 2) { 253 Node* node = buckets_[i]; 254 Node* previous = nullptr; 255 256 if (node == nullptr) { 257 continue; 258 } 259 260 if (!buckets_owned_.IsBitSet(i)) { 261 // Bucket is not owned but maybe we won't need to change it at all. 262 // Iterate as long as the entries don't satisfy 'cond'. 263 while (node != nullptr) { 264 if (cond(node)) { 265 // We do need to delete an entry but we do not own the bucket. 266 // Clone the bucket, make sure 'previous' and 'node' point to 267 // the cloned entries and break. 268 previous = CloneBucket(i, previous); 269 node = (previous == nullptr) ? buckets_[i] : previous->GetNext(); 270 break; 271 } 272 previous = node; 273 node = node->GetNext(); 274 } 275 } 276 277 // By this point we either own the bucket and can start deleting entries, 278 // or we do not own it but no entries matched 'cond'. 279 DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr); 280 281 // We iterate over the remainder of entries and delete those that match 282 // the given condition. 283 while (node != nullptr) { 284 Node* next = node->GetNext(); 285 if (cond(node)) { 286 if (previous == nullptr) { 287 buckets_[i] = next; 288 } else { 289 previous->SetNext(next); 290 } 291 } else { 292 previous = node; 293 } 294 node = next; 295 } 296 } 297 } 298 299 // Computes a bucket count such that the load factor is reasonable. 300 // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2. 301 size_t IdealBucketCount() const { 302 size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1)); 303 if (bucket_count > kMinimumNumberOfBuckets) { 304 return bucket_count; 305 } else { 306 return kMinimumNumberOfBuckets; 307 } 308 } 309 310 // Generates a hash code for an instruction. 311 size_t HashCode(HInstruction* instruction) const { 312 size_t hash_code = instruction->ComputeHashCode(); 313 // Pure instructions are put into odd buckets to speed up deletion. Note that in the 314 // case of irreducible loops, we don't put pure instructions in odd buckets, as we 315 // need to delete them when entering the loop. 316 if (instruction->GetSideEffects().HasDependencies() || 317 instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { 318 return (hash_code << 1) | 0; 319 } else { 320 return (hash_code << 1) | 1; 321 } 322 } 323 324 // Converts a hash code to a bucket index. 325 size_t BucketIndex(size_t hash_code) const { 326 return hash_code & (num_buckets_ - 1); 327 } 328 329 ArenaAllocator* const allocator_; 330 331 // The internal bucket implementation of the set. 332 size_t const num_buckets_; 333 Node** const buckets_; 334 335 // Flags specifying which buckets were copied into the set from its parent. 336 // If a flag is not set, the corresponding bucket points to entries in the 337 // parent and must be cloned prior to making changes. 338 ArenaBitVector buckets_owned_; 339 340 // The number of entries in the set. 341 size_t num_entries_; 342 343 static constexpr size_t kMinimumNumberOfBuckets = 8; 344 345 DISALLOW_COPY_AND_ASSIGN(ValueSet); 346 }; 347 348 /** 349 * Optimization phase that removes redundant instruction. 350 */ 351 class GlobalValueNumberer : public ValueObject { 352 public: 353 GlobalValueNumberer(ArenaAllocator* allocator, 354 HGraph* graph, 355 const SideEffectsAnalysis& side_effects) 356 : graph_(graph), 357 allocator_(allocator), 358 side_effects_(side_effects), 359 sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)), 360 visited_blocks_( 361 allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {} 362 363 void Run(); 364 365 private: 366 // Per-block GVN. Will also update the ValueSet of the dominated and 367 // successor blocks. 368 void VisitBasicBlock(HBasicBlock* block); 369 370 HGraph* graph_; 371 ArenaAllocator* const allocator_; 372 const SideEffectsAnalysis& side_effects_; 373 374 ValueSet* FindSetFor(HBasicBlock* block) const { 375 ValueSet* result = sets_[block->GetBlockId()]; 376 DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId(); 377 return result; 378 } 379 380 void AbandonSetFor(HBasicBlock* block) { 381 DCHECK(sets_[block->GetBlockId()] != nullptr) 382 << "Block B" << block->GetBlockId() << " expected to have a set"; 383 sets_[block->GetBlockId()] = nullptr; 384 } 385 386 // Returns false if the GlobalValueNumberer has already visited all blocks 387 // which may reference `block`. 388 bool WillBeReferencedAgain(HBasicBlock* block) const; 389 390 // Iterates over visited blocks and finds one which has a ValueSet such that: 391 // (a) it will not be referenced in the future, and 392 // (b) it can hold a copy of `reference_set` with a reasonable load factor. 393 HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block, 394 const ValueSet& reference_set) const; 395 396 // ValueSet for blocks. Initially null, but for an individual block they 397 // are allocated and populated by the dominator, and updated by all blocks 398 // in the path from the dominator to the block. 399 ArenaVector<ValueSet*> sets_; 400 401 // BitVector which serves as a fast-access map from block id to 402 // visited/unvisited boolean. 403 ArenaBitVector visited_blocks_; 404 405 DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); 406 }; 407 408 void GlobalValueNumberer::Run() { 409 DCHECK(side_effects_.HasRun()); 410 sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_); 411 412 // Use the reverse post order to ensure the non back-edge predecessors of a block are 413 // visited before the block itself. 414 for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { 415 VisitBasicBlock(it.Current()); 416 } 417 } 418 419 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { 420 ValueSet* set = nullptr; 421 422 const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); 423 if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) { 424 // The entry block should only accumulate constant instructions, and 425 // the builder puts constants only in the entry block. 426 // Therefore, there is no need to propagate the value set to the next block. 427 set = new (allocator_) ValueSet(allocator_); 428 } else { 429 HBasicBlock* dominator = block->GetDominator(); 430 ValueSet* dominator_set = FindSetFor(dominator); 431 432 if (dominator->GetSuccessors().size() == 1) { 433 // `block` is a direct successor of its dominator. No need to clone the 434 // dominator's set, `block` can take over its ownership including its buckets. 435 DCHECK_EQ(dominator->GetSingleSuccessor(), block); 436 AbandonSetFor(dominator); 437 set = dominator_set; 438 } else { 439 // Try to find a basic block which will never be referenced again and whose 440 // ValueSet can therefore be recycled. We will need to copy `dominator_set` 441 // into the recycled set, so we pass `dominator_set` as a reference for size. 442 HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set); 443 if (recyclable == nullptr) { 444 // No block with a suitable ValueSet found. Allocate a new one and 445 // copy `dominator_set` into it. 446 set = new (allocator_) ValueSet(allocator_, *dominator_set); 447 } else { 448 // Block with a recyclable ValueSet found. Clone `dominator_set` into it. 449 set = FindSetFor(recyclable); 450 AbandonSetFor(recyclable); 451 set->PopulateFrom(*dominator_set); 452 } 453 } 454 455 if (!set->IsEmpty()) { 456 if (block->IsLoopHeader()) { 457 if (block->GetLoopInformation()->ContainsIrreducibleLoop()) { 458 // To satisfy our linear scan algorithm, no instruction should flow in an irreducible 459 // loop header. We clear the set at entry of irreducible loops and any loop containing 460 // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction 461 // across the irreducible loop. 462 // Note that, if we're not compiling OSR, we could still do GVN and introduce 463 // phis at irreducible loop headers. We decided it was not worth the complexity. 464 set->Clear(); 465 } else { 466 DCHECK(!block->GetLoopInformation()->IsIrreducible()); 467 DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); 468 set->Kill(side_effects_.GetLoopEffects(block)); 469 } 470 } else if (predecessors.size() > 1) { 471 for (HBasicBlock* predecessor : predecessors) { 472 set->IntersectWith(FindSetFor(predecessor)); 473 if (set->IsEmpty()) { 474 break; 475 } 476 } 477 } 478 } 479 } 480 481 sets_[block->GetBlockId()] = set; 482 483 HInstruction* current = block->GetFirstInstruction(); 484 while (current != nullptr) { 485 // Save the next instruction in case `current` is removed from the graph. 486 HInstruction* next = current->GetNext(); 487 // Do not kill the set with the side effects of the instruction just now: if 488 // the instruction is GVN'ed, we don't need to kill. 489 if (current->CanBeMoved()) { 490 if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { 491 // For commutative ops, (x op y) will be treated the same as (y op x) 492 // after fixed ordering. 493 current->AsBinaryOperation()->OrderInputs(); 494 } 495 HInstruction* existing = set->Lookup(current); 496 if (existing != nullptr) { 497 // This replacement doesn't make more OrderInputs() necessary since 498 // current is either used by an instruction that it dominates, 499 // which hasn't been visited yet due to the order we visit instructions. 500 // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway. 501 current->ReplaceWith(existing); 502 current->GetBlock()->RemoveInstruction(current); 503 } else { 504 set->Kill(current->GetSideEffects()); 505 set->Add(current); 506 } 507 } else { 508 set->Kill(current->GetSideEffects()); 509 } 510 current = next; 511 } 512 513 visited_blocks_.SetBit(block->GetBlockId()); 514 } 515 516 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { 517 DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); 518 519 for (auto dominated_block : block->GetDominatedBlocks()) { 520 if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { 521 return true; 522 } 523 } 524 525 for (auto successor : block->GetSuccessors()) { 526 if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { 527 return true; 528 } 529 } 530 531 return false; 532 } 533 534 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( 535 HBasicBlock* block, const ValueSet& reference_set) const { 536 HBasicBlock* secondary_match = nullptr; 537 538 for (size_t block_id : visited_blocks_.Indexes()) { 539 ValueSet* current_set = sets_[block_id]; 540 if (current_set == nullptr) { 541 // Set was already recycled. 542 continue; 543 } 544 545 HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id]; 546 547 // We test if `current_set` has enough buckets to store a copy of 548 // `reference_set` with a reasonable load factor. If we find a set whose 549 // number of buckets matches perfectly, we return right away. If we find one 550 // that is larger, we return it if no perfectly-matching set is found. 551 // Note that we defer testing WillBeReferencedAgain until all other criteria 552 // have been satisfied because it might be expensive. 553 if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) { 554 if (!WillBeReferencedAgain(current_block)) { 555 return current_block; 556 } 557 } else if (secondary_match == nullptr && 558 current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) { 559 if (!WillBeReferencedAgain(current_block)) { 560 secondary_match = current_block; 561 } 562 } 563 } 564 565 return secondary_match; 566 } 567 568 void GVNOptimization::Run() { 569 GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_); 570 gvn.Run(); 571 } 572 573 } // namespace art 574