1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "gvn.h" 18 19 #include "base/arena_bit_vector.h" 20 #include "base/bit_vector-inl.h" 21 #include "base/scoped_arena_allocator.h" 22 #include "base/scoped_arena_containers.h" 23 #include "base/utils.h" 24 #include "side_effects_analysis.h" 25 26 namespace art { 27 28 /** 29 * A ValueSet holds instructions that can replace other instructions. It is updated 30 * through the `Add` method, and the `Kill` method. The `Kill` method removes 31 * instructions that are affected by the given side effect. 32 * 33 * The `Lookup` method returns an equivalent instruction to the given instruction 34 * if there is one in the set. In GVN, we would say those instructions have the 35 * same "number". 36 */ 37 class ValueSet : public ArenaObject<kArenaAllocGvn> { 38 public: 39 // Constructs an empty ValueSet which owns all its buckets. 40 explicit ValueSet(ScopedArenaAllocator* allocator) 41 : allocator_(allocator), 42 num_buckets_(kMinimumNumberOfBuckets), 43 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), 44 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), 45 num_entries_(0u) { 46 // ArenaAllocator returns zeroed memory, so no need to set buckets to null. 47 DCHECK(IsPowerOfTwo(num_buckets_)); 48 std::fill_n(buckets_, num_buckets_, nullptr); 49 buckets_owned_.SetInitialBits(num_buckets_); 50 } 51 52 // Copy constructor. Depending on the load factor, it will either make a deep 53 // copy (all buckets owned) or a shallow one (buckets pointing to the parent). 54 ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other) 55 : allocator_(allocator), 56 num_buckets_(other.IdealBucketCount()), 57 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), 58 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), 59 num_entries_(0u) { 60 // ArenaAllocator returns zeroed memory, so entries of buckets_ and 61 // buckets_owned_ are initialized to null and false, respectively. 62 DCHECK(IsPowerOfTwo(num_buckets_)); 63 PopulateFromInternal(other); 64 } 65 66 // Erases all values in this set and populates it with values from `other`. 67 void PopulateFrom(const ValueSet& other) { 68 if (this == &other) { 69 return; 70 } 71 PopulateFromInternal(other); 72 } 73 74 // Returns true if `this` has enough buckets so that if `other` is copied into 75 // it, the load factor will not cross the upper threshold. 76 // If `exact_match` is set, true is returned only if `this` has the ideal 77 // number of buckets. Larger number of buckets is allowed otherwise. 78 bool CanHoldCopyOf(const ValueSet& other, bool exact_match) { 79 if (exact_match) { 80 return other.IdealBucketCount() == num_buckets_; 81 } else { 82 return other.IdealBucketCount() <= num_buckets_; 83 } 84 } 85 86 // Adds an instruction in the set. 87 void Add(HInstruction* instruction) { 88 DCHECK(Lookup(instruction) == nullptr); 89 size_t hash_code = HashCode(instruction); 90 size_t index = BucketIndex(hash_code); 91 92 if (!buckets_owned_.IsBitSet(index)) { 93 CloneBucket(index); 94 } 95 buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]); 96 ++num_entries_; 97 } 98 99 // If in the set, returns an equivalent instruction to the given instruction. 100 // Returns null otherwise. 101 HInstruction* Lookup(HInstruction* instruction) const { 102 size_t hash_code = HashCode(instruction); 103 size_t index = BucketIndex(hash_code); 104 105 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { 106 if (node->GetHashCode() == hash_code) { 107 HInstruction* existing = node->GetInstruction(); 108 if (existing->Equals(instruction)) { 109 return existing; 110 } 111 } 112 } 113 return nullptr; 114 } 115 116 // Returns whether instruction is in the set. 117 bool Contains(HInstruction* instruction) const { 118 size_t hash_code = HashCode(instruction); 119 size_t index = BucketIndex(hash_code); 120 121 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { 122 if (node->GetInstruction() == instruction) { 123 return true; 124 } 125 } 126 return false; 127 } 128 129 // Removes all instructions in the set affected by the given side effects. 130 void Kill(SideEffects side_effects) { 131 DeleteAllImpureWhich([side_effects](Node* node) { 132 return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects); 133 }); 134 } 135 136 void Clear() { 137 num_entries_ = 0; 138 for (size_t i = 0; i < num_buckets_; ++i) { 139 buckets_[i] = nullptr; 140 } 141 buckets_owned_.SetInitialBits(num_buckets_); 142 } 143 144 // Updates this set by intersecting with instructions in a predecessor's set. 145 void IntersectWith(ValueSet* predecessor) { 146 if (IsEmpty()) { 147 return; 148 } else if (predecessor->IsEmpty()) { 149 Clear(); 150 } else { 151 // Pure instructions do not need to be tested because only impure 152 // instructions can be killed. 153 DeleteAllImpureWhich([predecessor](Node* node) { 154 return !predecessor->Contains(node->GetInstruction()); 155 }); 156 } 157 } 158 159 bool IsEmpty() const { return num_entries_ == 0; } 160 size_t GetNumberOfEntries() const { return num_entries_; } 161 162 private: 163 // Copies all entries from `other` to `this`. 164 void PopulateFromInternal(const ValueSet& other) { 165 DCHECK_NE(this, &other); 166 DCHECK_GE(num_buckets_, other.IdealBucketCount()); 167 168 if (num_buckets_ == other.num_buckets_) { 169 // Hash table remains the same size. We copy the bucket pointers and leave 170 // all buckets_owned_ bits false. 171 buckets_owned_.ClearAllBits(); 172 memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*)); 173 } else { 174 // Hash table size changes. We copy and rehash all entries, and set all 175 // buckets_owned_ bits to true. 176 std::fill_n(buckets_, num_buckets_, nullptr); 177 for (size_t i = 0; i < other.num_buckets_; ++i) { 178 for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) { 179 size_t new_index = BucketIndex(node->GetHashCode()); 180 buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]); 181 } 182 } 183 buckets_owned_.SetInitialBits(num_buckets_); 184 } 185 186 num_entries_ = other.num_entries_; 187 } 188 189 class Node : public ArenaObject<kArenaAllocGvn> { 190 public: 191 Node(HInstruction* instruction, size_t hash_code, Node* next) 192 : instruction_(instruction), hash_code_(hash_code), next_(next) {} 193 194 size_t GetHashCode() const { return hash_code_; } 195 HInstruction* GetInstruction() const { return instruction_; } 196 Node* GetNext() const { return next_; } 197 void SetNext(Node* node) { next_ = node; } 198 199 Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) { 200 return new (allocator) Node(instruction_, hash_code_, new_next); 201 } 202 203 private: 204 HInstruction* const instruction_; 205 const size_t hash_code_; 206 Node* next_; 207 208 DISALLOW_COPY_AND_ASSIGN(Node); 209 }; 210 211 // Creates our own copy of a bucket that is currently pointing to a parent. 212 // This algorithm can be called while iterating over the bucket because it 213 // preserves the order of entries in the bucket and will return the clone of 214 // the given 'iterator'. 215 Node* CloneBucket(size_t index, Node* iterator = nullptr) { 216 DCHECK(!buckets_owned_.IsBitSet(index)); 217 Node* clone_current = nullptr; 218 Node* clone_previous = nullptr; 219 Node* clone_iterator = nullptr; 220 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { 221 clone_current = node->Dup(allocator_, nullptr); 222 if (node == iterator) { 223 clone_iterator = clone_current; 224 } 225 if (clone_previous == nullptr) { 226 buckets_[index] = clone_current; 227 } else { 228 clone_previous->SetNext(clone_current); 229 } 230 clone_previous = clone_current; 231 } 232 buckets_owned_.SetBit(index); 233 return clone_iterator; 234 } 235 236 // Iterates over buckets with impure instructions (even indices) and deletes 237 // the ones on which 'cond' returns true. 238 template<typename Functor> 239 void DeleteAllImpureWhich(Functor cond) { 240 for (size_t i = 0; i < num_buckets_; i += 2) { 241 Node* node = buckets_[i]; 242 Node* previous = nullptr; 243 244 if (node == nullptr) { 245 continue; 246 } 247 248 if (!buckets_owned_.IsBitSet(i)) { 249 // Bucket is not owned but maybe we won't need to change it at all. 250 // Iterate as long as the entries don't satisfy 'cond'. 251 while (node != nullptr) { 252 if (cond(node)) { 253 // We do need to delete an entry but we do not own the bucket. 254 // Clone the bucket, make sure 'previous' and 'node' point to 255 // the cloned entries and break. 256 previous = CloneBucket(i, previous); 257 node = (previous == nullptr) ? buckets_[i] : previous->GetNext(); 258 break; 259 } 260 previous = node; 261 node = node->GetNext(); 262 } 263 } 264 265 // By this point we either own the bucket and can start deleting entries, 266 // or we do not own it but no entries matched 'cond'. 267 DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr); 268 269 // We iterate over the remainder of entries and delete those that match 270 // the given condition. 271 while (node != nullptr) { 272 Node* next = node->GetNext(); 273 if (cond(node)) { 274 if (previous == nullptr) { 275 buckets_[i] = next; 276 } else { 277 previous->SetNext(next); 278 } 279 } else { 280 previous = node; 281 } 282 node = next; 283 } 284 } 285 } 286 287 // Computes a bucket count such that the load factor is reasonable. 288 // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2. 289 size_t IdealBucketCount() const { 290 size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1)); 291 if (bucket_count > kMinimumNumberOfBuckets) { 292 return bucket_count; 293 } else { 294 return kMinimumNumberOfBuckets; 295 } 296 } 297 298 // Generates a hash code for an instruction. 299 size_t HashCode(HInstruction* instruction) const { 300 size_t hash_code = instruction->ComputeHashCode(); 301 // Pure instructions are put into odd buckets to speed up deletion. Note that in the 302 // case of irreducible loops, we don't put pure instructions in odd buckets, as we 303 // need to delete them when entering the loop. 304 // ClinitCheck is treated as a pure instruction since it's only executed 305 // once. 306 bool pure = !instruction->GetSideEffects().HasDependencies() || 307 instruction->IsClinitCheck(); 308 if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { 309 return (hash_code << 1) | 0; 310 } else { 311 return (hash_code << 1) | 1; 312 } 313 } 314 315 // Converts a hash code to a bucket index. 316 size_t BucketIndex(size_t hash_code) const { 317 return hash_code & (num_buckets_ - 1); 318 } 319 320 ScopedArenaAllocator* const allocator_; 321 322 // The internal bucket implementation of the set. 323 size_t const num_buckets_; 324 Node** const buckets_; 325 326 // Flags specifying which buckets were copied into the set from its parent. 327 // If a flag is not set, the corresponding bucket points to entries in the 328 // parent and must be cloned prior to making changes. 329 ArenaBitVector buckets_owned_; 330 331 // The number of entries in the set. 332 size_t num_entries_; 333 334 static constexpr size_t kMinimumNumberOfBuckets = 8; 335 336 DISALLOW_COPY_AND_ASSIGN(ValueSet); 337 }; 338 339 /** 340 * Optimization phase that removes redundant instruction. 341 */ 342 class GlobalValueNumberer : public ValueObject { 343 public: 344 GlobalValueNumberer(HGraph* graph, 345 const SideEffectsAnalysis& side_effects) 346 : graph_(graph), 347 allocator_(graph->GetArenaStack()), 348 side_effects_(side_effects), 349 sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)), 350 visited_blocks_( 351 &allocator_, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) { 352 visited_blocks_.ClearAllBits(); 353 } 354 355 void Run(); 356 357 private: 358 // Per-block GVN. Will also update the ValueSet of the dominated and 359 // successor blocks. 360 void VisitBasicBlock(HBasicBlock* block); 361 362 HGraph* graph_; 363 ScopedArenaAllocator allocator_; 364 const SideEffectsAnalysis& side_effects_; 365 366 ValueSet* FindSetFor(HBasicBlock* block) const { 367 ValueSet* result = sets_[block->GetBlockId()]; 368 DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId(); 369 return result; 370 } 371 372 void AbandonSetFor(HBasicBlock* block) { 373 DCHECK(sets_[block->GetBlockId()] != nullptr) 374 << "Block B" << block->GetBlockId() << " expected to have a set"; 375 sets_[block->GetBlockId()] = nullptr; 376 } 377 378 // Returns false if the GlobalValueNumberer has already visited all blocks 379 // which may reference `block`. 380 bool WillBeReferencedAgain(HBasicBlock* block) const; 381 382 // Iterates over visited blocks and finds one which has a ValueSet such that: 383 // (a) it will not be referenced in the future, and 384 // (b) it can hold a copy of `reference_set` with a reasonable load factor. 385 HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block, 386 const ValueSet& reference_set) const; 387 388 // ValueSet for blocks. Initially null, but for an individual block they 389 // are allocated and populated by the dominator, and updated by all blocks 390 // in the path from the dominator to the block. 391 ScopedArenaVector<ValueSet*> sets_; 392 393 // BitVector which serves as a fast-access map from block id to 394 // visited/unvisited Boolean. 395 ArenaBitVector visited_blocks_; 396 397 DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); 398 }; 399 400 void GlobalValueNumberer::Run() { 401 DCHECK(side_effects_.HasRun()); 402 sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_); 403 404 // Use the reverse post order to ensure the non back-edge predecessors of a block are 405 // visited before the block itself. 406 for (HBasicBlock* block : graph_->GetReversePostOrder()) { 407 VisitBasicBlock(block); 408 } 409 } 410 411 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { 412 ValueSet* set = nullptr; 413 414 const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); 415 if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) { 416 // The entry block should only accumulate constant instructions, and 417 // the builder puts constants only in the entry block. 418 // Therefore, there is no need to propagate the value set to the next block. 419 set = new (&allocator_) ValueSet(&allocator_); 420 } else { 421 HBasicBlock* dominator = block->GetDominator(); 422 ValueSet* dominator_set = FindSetFor(dominator); 423 424 if (dominator->GetSuccessors().size() == 1) { 425 // `block` is a direct successor of its dominator. No need to clone the 426 // dominator's set, `block` can take over its ownership including its buckets. 427 DCHECK_EQ(dominator->GetSingleSuccessor(), block); 428 AbandonSetFor(dominator); 429 set = dominator_set; 430 } else { 431 // Try to find a basic block which will never be referenced again and whose 432 // ValueSet can therefore be recycled. We will need to copy `dominator_set` 433 // into the recycled set, so we pass `dominator_set` as a reference for size. 434 HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set); 435 if (recyclable == nullptr) { 436 // No block with a suitable ValueSet found. Allocate a new one and 437 // copy `dominator_set` into it. 438 set = new (&allocator_) ValueSet(&allocator_, *dominator_set); 439 } else { 440 // Block with a recyclable ValueSet found. Clone `dominator_set` into it. 441 set = FindSetFor(recyclable); 442 AbandonSetFor(recyclable); 443 set->PopulateFrom(*dominator_set); 444 } 445 } 446 447 if (!set->IsEmpty()) { 448 if (block->IsLoopHeader()) { 449 if (block->GetLoopInformation()->ContainsIrreducibleLoop()) { 450 // To satisfy our linear scan algorithm, no instruction should flow in an irreducible 451 // loop header. We clear the set at entry of irreducible loops and any loop containing 452 // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction 453 // across the irreducible loop. 454 // Note that, if we're not compiling OSR, we could still do GVN and introduce 455 // phis at irreducible loop headers. We decided it was not worth the complexity. 456 set->Clear(); 457 } else { 458 DCHECK(!block->GetLoopInformation()->IsIrreducible()); 459 DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); 460 set->Kill(side_effects_.GetLoopEffects(block)); 461 } 462 } else if (predecessors.size() > 1) { 463 for (HBasicBlock* predecessor : predecessors) { 464 set->IntersectWith(FindSetFor(predecessor)); 465 if (set->IsEmpty()) { 466 break; 467 } 468 } 469 } 470 } 471 } 472 473 sets_[block->GetBlockId()] = set; 474 475 HInstruction* current = block->GetFirstInstruction(); 476 while (current != nullptr) { 477 // Save the next instruction in case `current` is removed from the graph. 478 HInstruction* next = current->GetNext(); 479 // Do not kill the set with the side effects of the instruction just now: if 480 // the instruction is GVN'ed, we don't need to kill. 481 if (current->CanBeMoved()) { 482 if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { 483 // For commutative ops, (x op y) will be treated the same as (y op x) 484 // after fixed ordering. 485 current->AsBinaryOperation()->OrderInputs(); 486 } 487 HInstruction* existing = set->Lookup(current); 488 if (existing != nullptr) { 489 // This replacement doesn't make more OrderInputs() necessary since 490 // current is either used by an instruction that it dominates, 491 // which hasn't been visited yet due to the order we visit instructions. 492 // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway. 493 current->ReplaceWith(existing); 494 current->GetBlock()->RemoveInstruction(current); 495 } else { 496 set->Kill(current->GetSideEffects()); 497 set->Add(current); 498 } 499 } else { 500 set->Kill(current->GetSideEffects()); 501 } 502 current = next; 503 } 504 505 visited_blocks_.SetBit(block->GetBlockId()); 506 } 507 508 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { 509 DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); 510 511 for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) { 512 if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { 513 return true; 514 } 515 } 516 517 for (const HBasicBlock* successor : block->GetSuccessors()) { 518 if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { 519 return true; 520 } 521 } 522 523 return false; 524 } 525 526 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( 527 HBasicBlock* block, const ValueSet& reference_set) const { 528 HBasicBlock* secondary_match = nullptr; 529 530 for (size_t block_id : visited_blocks_.Indexes()) { 531 ValueSet* current_set = sets_[block_id]; 532 if (current_set == nullptr) { 533 // Set was already recycled. 534 continue; 535 } 536 537 HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id]; 538 539 // We test if `current_set` has enough buckets to store a copy of 540 // `reference_set` with a reasonable load factor. If we find a set whose 541 // number of buckets matches perfectly, we return right away. If we find one 542 // that is larger, we return it if no perfectly-matching set is found. 543 // Note that we defer testing WillBeReferencedAgain until all other criteria 544 // have been satisfied because it might be expensive. 545 if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) { 546 if (!WillBeReferencedAgain(current_block)) { 547 return current_block; 548 } 549 } else if (secondary_match == nullptr && 550 current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) { 551 if (!WillBeReferencedAgain(current_block)) { 552 secondary_match = current_block; 553 } 554 } 555 } 556 557 return secondary_match; 558 } 559 560 void GVNOptimization::Run() { 561 GlobalValueNumberer gvn(graph_, side_effects_); 562 gvn.Run(); 563 } 564 565 } // namespace art 566