1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "register_allocator_graph_color.h" 18 19 #include "code_generator.h" 20 #include "linear_order.h" 21 #include "register_allocation_resolver.h" 22 #include "ssa_liveness_analysis.h" 23 #include "thread-current-inl.h" 24 25 namespace art { 26 27 // Highest number of registers that we support for any platform. This can be used for std::bitset, 28 // for example, which needs to know its size at compile time. 29 static constexpr size_t kMaxNumRegs = 32; 30 31 // The maximum number of graph coloring attempts before triggering a DCHECK. 32 // This is meant to catch changes to the graph coloring algorithm that undermine its forward 33 // progress guarantees. Forward progress for the algorithm means splitting live intervals on 34 // every graph coloring attempt so that eventually the interference graph will be sparse enough 35 // to color. The main threat to forward progress is trying to split short intervals which cannot be 36 // split further; this could cause infinite looping because the interference graph would never 37 // change. This is avoided by prioritizing short intervals before long ones, so that long 38 // intervals are split when coloring fails. 39 static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; 40 41 // We always want to avoid spilling inside loops. 42 static constexpr size_t kLoopSpillWeightMultiplier = 10; 43 44 // If we avoid moves in single jump blocks, we can avoid jumps to jumps. 45 static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; 46 47 // We avoid moves in blocks that dominate the exit block, since these blocks will 48 // be executed on every path through the method. 49 static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; 50 51 enum class CoalesceKind { 52 kAdjacentSibling, // Prevents moves at interval split points. 53 kFixedOutputSibling, // Prevents moves from a fixed output location. 54 kFixedInput, // Prevents moves into a fixed input location. 55 kNonlinearControlFlow, // Prevents moves between blocks. 56 kPhi, // Prevents phi resolution moves. 57 kFirstInput, // Prevents a single input move. 58 kAnyInput, // May lead to better instruction selection / smaller encodings. 59 }; 60 61 std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { 62 return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); 63 } 64 65 static size_t LoopDepthAt(HBasicBlock* block) { 66 HLoopInformation* loop_info = block->GetLoopInformation(); 67 size_t depth = 0; 68 while (loop_info != nullptr) { 69 ++depth; 70 loop_info = loop_info->GetPreHeader()->GetLoopInformation(); 71 } 72 return depth; 73 } 74 75 // Return the runtime cost of inserting a move instruction at the specified location. 76 static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { 77 HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); 78 DCHECK(block != nullptr); 79 size_t cost = 1; 80 if (block->IsSingleJump()) { 81 cost *= kSingleJumpBlockWeightMultiplier; 82 } 83 if (block->Dominates(block->GetGraph()->GetExitBlock())) { 84 cost *= kDominatesExitBlockWeightMultiplier; 85 } 86 for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { 87 cost *= kLoopSpillWeightMultiplier; 88 } 89 return cost; 90 } 91 92 // In general, we estimate coalesce priority by whether it will definitely avoid a move, 93 // and by how likely it is to create an interference graph that's harder to color. 94 static size_t ComputeCoalescePriority(CoalesceKind kind, 95 size_t position, 96 const SsaLivenessAnalysis& liveness) { 97 if (kind == CoalesceKind::kAnyInput) { 98 // This type of coalescing can affect instruction selection, but not moves, so we 99 // give it the lowest priority. 100 return 0; 101 } else { 102 return CostForMoveAt(position, liveness); 103 } 104 } 105 106 enum class CoalesceStage { 107 kWorklist, // Currently in the iterative coalescing worklist. 108 kActive, // Not in a worklist, but could be considered again during iterative coalescing. 109 kInactive, // No longer considered until last-chance coalescing. 110 kDefunct, // Either the two nodes interfere, or have already been coalesced. 111 }; 112 113 std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { 114 return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); 115 } 116 117 // Represents a coalesce opportunity between two nodes. 118 struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { 119 CoalesceOpportunity(InterferenceNode* a, 120 InterferenceNode* b, 121 CoalesceKind kind, 122 size_t position, 123 const SsaLivenessAnalysis& liveness) 124 : node_a(a), 125 node_b(b), 126 stage(CoalesceStage::kWorklist), 127 priority(ComputeCoalescePriority(kind, position, liveness)) {} 128 129 // Compare two coalesce opportunities based on their priority. 130 // Return true if lhs has a lower priority than that of rhs. 131 static bool CmpPriority(const CoalesceOpportunity* lhs, 132 const CoalesceOpportunity* rhs) { 133 return lhs->priority < rhs->priority; 134 } 135 136 InterferenceNode* const node_a; 137 InterferenceNode* const node_b; 138 139 // The current stage of this coalesce opportunity, indicating whether it is in a worklist, 140 // and whether it should still be considered. 141 CoalesceStage stage; 142 143 // The priority of this coalesce opportunity, based on heuristics. 144 const size_t priority; 145 }; 146 147 enum class NodeStage { 148 kInitial, // Uninitialized. 149 kPrecolored, // Marks fixed nodes. 150 kSafepoint, // Marks safepoint nodes. 151 kPrunable, // Marks uncolored nodes in the interference graph. 152 kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. 153 kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. 154 kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. 155 kPruned // Marks nodes already pruned from the interference graph. 156 }; 157 158 std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { 159 return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); 160 } 161 162 // Returns the estimated cost of spilling a particular live interval. 163 static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { 164 if (interval->HasRegister()) { 165 // Intervals with a fixed register cannot be spilled. 166 return std::numeric_limits<float>::min(); 167 } 168 169 size_t length = interval->GetLength(); 170 if (length == 1) { 171 // Tiny intervals should have maximum priority, since they cannot be split any further. 172 return std::numeric_limits<float>::max(); 173 } 174 175 size_t use_weight = 0; 176 if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { 177 // Cost for spilling at a register definition point. 178 use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); 179 } 180 181 // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. 182 // [interval->GetStart() + 1, interval->GetEnd() + 1) 183 auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), 184 interval->GetUses().end(), 185 interval->GetStart() + 1u, 186 interval->GetEnd() + 1u); 187 for (const UsePosition& use : matching_use_range) { 188 if (use.GetUser() != nullptr && use.RequiresRegister()) { 189 // Cost for spilling at a register use point. 190 use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness); 191 } 192 } 193 194 // We divide by the length of the interval because we want to prioritize 195 // short intervals; we do not benefit much if we split them further. 196 return static_cast<float>(use_weight) / static_cast<float>(length); 197 } 198 199 // Interference nodes make up the interference graph, which is the primary data structure in 200 // graph coloring register allocation. Each node represents a single live interval, and contains 201 // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, 202 // pre-colored nodes never contain outgoing edges (only incoming ones). 203 // 204 // As nodes are pruned from the interference graph, incoming edges of the pruned node are removed, 205 // but outgoing edges remain in order to later color the node based on the colors of its neighbors. 206 // 207 // Note that a pair interval is represented by a single node in the interference graph, which 208 // essentially requires two colors. One consequence of this is that the degree of a node is not 209 // necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum 210 // number of colors with which a node could interfere. We model this by giving edges different 211 // weights (1 or 2) to control how much it increases the degree of adjacent nodes. 212 // For example, the edge between two single nodes will have weight 1. On the other hand, 213 // the edge between a single node and a pair node will have weight 2. This is because the pair 214 // node could block up to two colors for the single node, and because the single node could 215 // block an entire two-register aligned slot for the pair node. 216 // The degree is defined this way because we use it to decide whether a node is guaranteed a color, 217 // and thus whether it is safe to prune it from the interference graph early on. 218 class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { 219 public: 220 InterferenceNode(LiveInterval* interval, 221 const SsaLivenessAnalysis& liveness) 222 : stage(NodeStage::kInitial), 223 interval_(interval), 224 adjacent_nodes_(nullptr), 225 coalesce_opportunities_(nullptr), 226 out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), 227 alias_(this), 228 spill_weight_(ComputeSpillWeight(interval, liveness)), 229 requires_color_(interval->RequiresRegister()), 230 needs_spill_slot_(false) { 231 DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; 232 } 233 234 void AddInterference(InterferenceNode* other, 235 bool guaranteed_not_interfering_yet, 236 ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) { 237 DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; 238 DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; 239 DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; 240 DCHECK_NE(stage, NodeStage::kPruned); 241 DCHECK_NE(other->stage, NodeStage::kPruned); 242 if (adjacent_nodes_ == nullptr) { 243 ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator()); 244 storage->emplace_back(adapter); 245 adjacent_nodes_ = &storage->back(); 246 } 247 if (guaranteed_not_interfering_yet) { 248 DCHECK(!ContainsElement(GetAdjacentNodes(), other)); 249 adjacent_nodes_->push_back(other); 250 out_degree_ += EdgeWeightWith(other); 251 } else { 252 if (!ContainsElement(GetAdjacentNodes(), other)) { 253 adjacent_nodes_->push_back(other); 254 out_degree_ += EdgeWeightWith(other); 255 } 256 } 257 } 258 259 void RemoveInterference(InterferenceNode* other) { 260 DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; 261 DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; 262 if (adjacent_nodes_ != nullptr) { 263 auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other); 264 if (it != adjacent_nodes_->end()) { 265 adjacent_nodes_->erase(it); 266 out_degree_ -= EdgeWeightWith(other); 267 } 268 } 269 } 270 271 bool ContainsInterference(InterferenceNode* other) const { 272 DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; 273 DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; 274 return ContainsElement(GetAdjacentNodes(), other); 275 } 276 277 LiveInterval* GetInterval() const { 278 return interval_; 279 } 280 281 ArrayRef<InterferenceNode*> GetAdjacentNodes() const { 282 return adjacent_nodes_ != nullptr 283 ? ArrayRef<InterferenceNode*>(*adjacent_nodes_) 284 : ArrayRef<InterferenceNode*>(); 285 } 286 287 size_t GetOutDegree() const { 288 // Pre-colored nodes have infinite degree. 289 DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max()); 290 return out_degree_; 291 } 292 293 void AddCoalesceOpportunity(CoalesceOpportunity* opportunity, 294 ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) { 295 if (coalesce_opportunities_ == nullptr) { 296 ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator()); 297 storage->emplace_back(adapter); 298 coalesce_opportunities_ = &storage->back(); 299 } 300 coalesce_opportunities_->push_back(opportunity); 301 } 302 303 void ClearCoalesceOpportunities() { 304 coalesce_opportunities_ = nullptr; 305 } 306 307 bool IsMoveRelated() const { 308 for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) { 309 if (opportunity->stage == CoalesceStage::kWorklist || 310 opportunity->stage == CoalesceStage::kActive) { 311 return true; 312 } 313 } 314 return false; 315 } 316 317 // Return whether this node already has a color. 318 // Used to find fixed nodes in the interference graph before coloring. 319 bool IsPrecolored() const { 320 return interval_->HasRegister(); 321 } 322 323 bool IsPair() const { 324 return interval_->HasHighInterval(); 325 } 326 327 void SetAlias(InterferenceNode* rep) { 328 DCHECK_NE(rep->stage, NodeStage::kPruned); 329 DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; 330 alias_ = rep; 331 } 332 333 InterferenceNode* GetAlias() { 334 if (alias_ != this) { 335 // Recurse in order to flatten tree of alias pointers. 336 alias_ = alias_->GetAlias(); 337 } 338 return alias_; 339 } 340 341 ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const { 342 return coalesce_opportunities_ != nullptr 343 ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_) 344 : ArrayRef<CoalesceOpportunity*>(); 345 } 346 347 float GetSpillWeight() const { 348 return spill_weight_; 349 } 350 351 bool RequiresColor() const { 352 return requires_color_; 353 } 354 355 // We give extra weight to edges adjacent to pair nodes. See the general comment on the 356 // interference graph above. 357 size_t EdgeWeightWith(const InterferenceNode* other) const { 358 return (IsPair() || other->IsPair()) ? 2 : 1; 359 } 360 361 bool NeedsSpillSlot() const { 362 return needs_spill_slot_; 363 } 364 365 void SetNeedsSpillSlot() { 366 needs_spill_slot_ = true; 367 } 368 369 // The current stage of this node, indicating which worklist it belongs to. 370 NodeStage stage; 371 372 private: 373 // The live interval that this node represents. 374 LiveInterval* const interval_; 375 376 // All nodes interfering with this one. 377 // We use an unsorted vector as a set, since a tree or hash set is too heavy for the 378 // set sizes that we encounter. Using a vector leads to much better performance. 379 ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration. 380 381 // Interference nodes that this node should be coalesced with to reduce moves. 382 ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration. 383 384 // The maximum number of colors with which this node could interfere. This could be more than 385 // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. 386 // We use "out" degree because incoming edges come from nodes already pruned from the graph, 387 // and do not affect the coloring of this node. 388 // Pre-colored nodes are treated as having infinite degree. 389 size_t out_degree_; 390 391 // The node representing this node in the interference graph. 392 // Initially set to `this`, and only changed if this node is coalesced into another. 393 InterferenceNode* alias_; 394 395 // The cost of splitting and spilling this interval to the stack. 396 // Nodes with a higher spill weight should be prioritized when assigning registers. 397 // This is essentially based on use density and location; short intervals with many uses inside 398 // deeply nested loops have a high spill weight. 399 const float spill_weight_; 400 401 const bool requires_color_; 402 403 bool needs_spill_slot_; 404 405 DISALLOW_COPY_AND_ASSIGN(InterferenceNode); 406 }; 407 408 // The order in which we color nodes is important. To guarantee forward progress, 409 // we prioritize intervals that require registers, and after that we prioritize 410 // short intervals. That way, if we fail to color a node, it either won't require a 411 // register, or it will be a long interval that can be split in order to make the 412 // interference graph sparser. 413 // To improve code quality, we prioritize intervals used frequently in deeply nested loops. 414 // (This metric is secondary to the forward progress requirements above.) 415 // TODO: May also want to consider: 416 // - Constants (since they can be rematerialized) 417 // - Allocated spill slots 418 static bool HasGreaterNodePriority(const InterferenceNode* lhs, 419 const InterferenceNode* rhs) { 420 // (1) Prioritize the node that requires a color. 421 if (lhs->RequiresColor() != rhs->RequiresColor()) { 422 return lhs->RequiresColor(); 423 } 424 425 // (2) Prioritize the interval that has a higher spill weight. 426 return lhs->GetSpillWeight() > rhs->GetSpillWeight(); 427 } 428 429 // A ColoringIteration holds the many data structures needed for a single graph coloring attempt, 430 // and provides methods for each phase of the attempt. 431 class ColoringIteration { 432 public: 433 ColoringIteration(RegisterAllocatorGraphColor* register_allocator, 434 ScopedArenaAllocator* allocator, 435 bool processing_core_regs, 436 size_t num_regs) 437 : register_allocator_(register_allocator), 438 allocator_(allocator), 439 processing_core_regs_(processing_core_regs), 440 num_regs_(num_regs), 441 interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), 442 prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), 443 pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), 444 simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), 445 freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), 446 spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), 447 coalesce_worklist_(CoalesceOpportunity::CmpPriority, 448 allocator->Adapter(kArenaAllocRegisterAllocator)), 449 adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)), 450 coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {} 451 452 // Use the intervals collected from instructions to construct an 453 // interference graph mapping intervals to adjacency lists. 454 // Also, collect synthesized safepoint nodes, used to keep 455 // track of live intervals across safepoints. 456 // TODO: Should build safepoints elsewhere. 457 void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals, 458 const ScopedArenaVector<InterferenceNode*>& physical_nodes); 459 460 // Add coalesce opportunities to interference nodes. 461 void FindCoalesceOpportunities(); 462 463 // Prune nodes from the interference graph to be colored later. Build 464 // a stack (pruned_nodes) containing these intervals in an order determined 465 // by various heuristics. 466 void PruneInterferenceGraph(); 467 468 // Process pruned_intervals_ to color the interference graph, spilling when 469 // necessary. Returns true if successful. Else, some intervals have been 470 // split, and the interference graph should be rebuilt for another attempt. 471 bool ColorInterferenceGraph(); 472 473 // Return prunable nodes. 474 // The register allocator will need to access prunable nodes after coloring 475 // in order to tell the code generator which registers have been assigned. 476 ArrayRef<InterferenceNode* const> GetPrunableNodes() const { 477 return ArrayRef<InterferenceNode* const>(prunable_nodes_); 478 } 479 480 private: 481 // Create a coalesce opportunity between two nodes. 482 void CreateCoalesceOpportunity(InterferenceNode* a, 483 InterferenceNode* b, 484 CoalesceKind kind, 485 size_t position); 486 487 // Add an edge in the interference graph, if valid. 488 // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion 489 // when possible. 490 void AddPotentialInterference(InterferenceNode* from, 491 InterferenceNode* to, 492 bool guaranteed_not_interfering_yet, 493 bool both_directions = true); 494 495 // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) 496 // may be pruned from the interference graph. 497 void FreezeMoves(InterferenceNode* node); 498 499 // Prune a node from the interference graph, updating worklists if necessary. 500 void PruneNode(InterferenceNode* node); 501 502 // Add coalesce opportunities associated with this node to the coalesce worklist. 503 void EnableCoalesceOpportunities(InterferenceNode* node); 504 505 // If needed, from `node` from the freeze worklist to the simplify worklist. 506 void CheckTransitionFromFreezeWorklist(InterferenceNode* node); 507 508 // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. 509 bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into); 510 511 // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. 512 bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into); 513 514 void Coalesce(CoalesceOpportunity* opportunity); 515 516 // Merge `from` into `into` in the interference graph. 517 void Combine(InterferenceNode* from, InterferenceNode* into); 518 519 // A reference to the register allocator instance, 520 // needed to split intervals and assign spill slots. 521 RegisterAllocatorGraphColor* register_allocator_; 522 523 // A scoped arena allocator used for a single graph coloring attempt. 524 ScopedArenaAllocator* allocator_; 525 526 const bool processing_core_regs_; 527 528 const size_t num_regs_; 529 530 // A map from live intervals to interference nodes. 531 ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; 532 533 // Uncolored nodes that should be pruned from the interference graph. 534 ScopedArenaVector<InterferenceNode*> prunable_nodes_; 535 536 // A stack of nodes pruned from the interference graph, waiting to be pruned. 537 ScopedArenaStdStack<InterferenceNode*> pruned_nodes_; 538 539 // A queue containing low degree, non-move-related nodes that can pruned immediately. 540 ScopedArenaDeque<InterferenceNode*> simplify_worklist_; 541 542 // A queue containing low degree, move-related nodes. 543 ScopedArenaDeque<InterferenceNode*> freeze_worklist_; 544 545 // A queue containing high degree nodes. 546 // If we have to prune from the spill worklist, we cannot guarantee 547 // the pruned node a color, so we order the worklist by priority. 548 ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; 549 550 // A queue containing coalesce opportunities. 551 // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those 552 // inside of loops) are more important than others. 553 ScopedArenaPriorityQueue<CoalesceOpportunity*, 554 decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; 555 556 // Storage for links to adjacent nodes for interference nodes. 557 // Using std::deque so that elements do not move when adding new ones. 558 ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_; 559 560 // Storage for links to coalesce opportunities for interference nodes. 561 // Using std::deque so that elements do not move when adding new ones. 562 ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_; 563 564 DISALLOW_COPY_AND_ASSIGN(ColoringIteration); 565 }; 566 567 static bool IsCoreInterval(LiveInterval* interval) { 568 return !DataType::IsFloatingPointType(interval->GetType()); 569 } 570 571 static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { 572 return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize; 573 } 574 575 RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, 576 CodeGenerator* codegen, 577 const SsaLivenessAnalysis& liveness, 578 bool iterative_move_coalescing) 579 : RegisterAllocator(allocator, codegen, liveness), 580 iterative_move_coalescing_(iterative_move_coalescing), 581 core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), 582 fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), 583 temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), 584 safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), 585 physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), 586 physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), 587 num_int_spill_slots_(0), 588 num_double_spill_slots_(0), 589 num_float_spill_slots_(0), 590 num_long_spill_slots_(0), 591 catch_phi_spill_slot_counter_(0), 592 reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)), 593 reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) { 594 // Before we ask for blocked registers, set them up in the code generator. 595 codegen->SetupBlockedRegisters(); 596 597 // Initialize physical core register live intervals and blocked registers. 598 // This includes globally blocked registers, such as the stack pointer. 599 physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); 600 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { 601 LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32); 602 physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); 603 physical_core_nodes_[i]->stage = NodeStage::kPrecolored; 604 core_intervals_.push_back(interval); 605 if (codegen_->IsBlockedCoreRegister(i)) { 606 interval->AddRange(0, liveness.GetMaxLifetimePosition()); 607 } 608 } 609 // Initialize physical floating point register live intervals and blocked registers. 610 physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); 611 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { 612 LiveInterval* interval = 613 LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32); 614 physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); 615 physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; 616 fp_intervals_.push_back(interval); 617 if (codegen_->IsBlockedFloatingPointRegister(i)) { 618 interval->AddRange(0, liveness.GetMaxLifetimePosition()); 619 } 620 } 621 } 622 623 RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {} 624 625 void RegisterAllocatorGraphColor::AllocateRegisters() { 626 // (1) Collect and prepare live intervals. 627 ProcessInstructions(); 628 629 for (bool processing_core_regs : {true, false}) { 630 ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs 631 ? core_intervals_ 632 : fp_intervals_; 633 size_t num_registers = processing_core_regs 634 ? codegen_->GetNumberOfCoreRegisters() 635 : codegen_->GetNumberOfFloatingPointRegisters(); 636 637 size_t attempt = 0; 638 while (true) { 639 ++attempt; 640 DCHECK(attempt <= kMaxGraphColoringAttemptsDebug) 641 << "Exceeded debug max graph coloring register allocation attempts. " 642 << "This could indicate that the register allocator is not making forward progress, " 643 << "which could be caused by prioritizing the wrong live intervals. (Short intervals " 644 << "should be prioritized over long ones, because they cannot be split further.)"; 645 646 // Many data structures are cleared between graph coloring attempts, so we reduce 647 // total memory usage by using a new scoped arena allocator for each attempt. 648 ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack()); 649 ColoringIteration iteration(this, 650 &coloring_attempt_allocator, 651 processing_core_regs, 652 num_registers); 653 654 // (2) Build the interference graph. 655 ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs 656 ? physical_core_nodes_ 657 : physical_fp_nodes_; 658 iteration.BuildInterferenceGraph(intervals, physical_nodes); 659 660 // (3) Add coalesce opportunities. 661 // If we have tried coloring the graph a suspiciously high number of times, give 662 // up on move coalescing, just in case the coalescing heuristics are not conservative. 663 // (This situation will be caught if DCHECKs are turned on.) 664 if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { 665 iteration.FindCoalesceOpportunities(); 666 } 667 668 // (4) Prune all uncolored nodes from interference graph. 669 iteration.PruneInterferenceGraph(); 670 671 // (5) Color pruned nodes based on interferences. 672 bool successful = iteration.ColorInterferenceGraph(); 673 674 // We manually clear coalesce opportunities for physical nodes, 675 // since they persist across coloring attempts. 676 for (InterferenceNode* node : physical_core_nodes_) { 677 node->ClearCoalesceOpportunities(); 678 } 679 for (InterferenceNode* node : physical_fp_nodes_) { 680 node->ClearCoalesceOpportunities(); 681 } 682 683 if (successful) { 684 // Assign spill slots. 685 AllocateSpillSlots(iteration.GetPrunableNodes()); 686 687 // Tell the code generator which registers were allocated. 688 // We only look at prunable_nodes because we already told the code generator about 689 // fixed intervals while processing instructions. We also ignore the fixed intervals 690 // placed at the top of catch blocks. 691 for (InterferenceNode* node : iteration.GetPrunableNodes()) { 692 LiveInterval* interval = node->GetInterval(); 693 if (interval->HasRegister()) { 694 Location low_reg = processing_core_regs 695 ? Location::RegisterLocation(interval->GetRegister()) 696 : Location::FpuRegisterLocation(interval->GetRegister()); 697 codegen_->AddAllocatedRegister(low_reg); 698 if (interval->HasHighInterval()) { 699 LiveInterval* high = interval->GetHighInterval(); 700 DCHECK(high->HasRegister()); 701 Location high_reg = processing_core_regs 702 ? Location::RegisterLocation(high->GetRegister()) 703 : Location::FpuRegisterLocation(high->GetRegister()); 704 codegen_->AddAllocatedRegister(high_reg); 705 } 706 } else { 707 DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister()); 708 } 709 } 710 711 break; 712 } 713 } // while unsuccessful 714 } // for processing_core_instructions 715 716 // (6) Resolve locations and deconstruct SSA form. 717 RegisterAllocationResolver(codegen_, liveness_) 718 .Resolve(ArrayRef<HInstruction* const>(safepoints_), 719 reserved_art_method_slots_ + reserved_out_slots_, 720 num_int_spill_slots_, 721 num_long_spill_slots_, 722 num_float_spill_slots_, 723 num_double_spill_slots_, 724 catch_phi_spill_slot_counter_, 725 ArrayRef<LiveInterval* const>(temp_intervals_)); 726 727 if (kIsDebugBuild) { 728 Validate(/*log_fatal_on_failure*/ true); 729 } 730 } 731 732 bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { 733 for (bool processing_core_regs : {true, false}) { 734 ScopedArenaAllocator allocator(allocator_->GetArenaStack()); 735 ScopedArenaVector<LiveInterval*> intervals( 736 allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); 737 for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { 738 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); 739 LiveInterval* interval = instruction->GetLiveInterval(); 740 if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) { 741 intervals.push_back(instruction->GetLiveInterval()); 742 } 743 } 744 745 ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs 746 ? physical_core_nodes_ 747 : physical_fp_nodes_; 748 for (InterferenceNode* fixed : physical_nodes) { 749 LiveInterval* interval = fixed->GetInterval(); 750 if (interval->GetFirstRange() != nullptr) { 751 // Ideally we would check fixed ranges as well, but currently there are times when 752 // two fixed intervals for the same register will overlap. For example, a fixed input 753 // and a fixed output may sometimes share the same register, in which there will be two 754 // fixed intervals for the same place. 755 } 756 } 757 758 for (LiveInterval* temp : temp_intervals_) { 759 if (IsCoreInterval(temp) == processing_core_regs) { 760 intervals.push_back(temp); 761 } 762 } 763 764 size_t spill_slots = num_int_spill_slots_ 765 + num_long_spill_slots_ 766 + num_float_spill_slots_ 767 + num_double_spill_slots_ 768 + catch_phi_spill_slot_counter_; 769 bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), 770 spill_slots, 771 reserved_art_method_slots_ + reserved_out_slots_, 772 *codegen_, 773 processing_core_regs, 774 log_fatal_on_failure); 775 if (!ok) { 776 return false; 777 } 778 } // for processing_core_regs 779 780 return true; 781 } 782 783 void RegisterAllocatorGraphColor::ProcessInstructions() { 784 for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { 785 // Note that we currently depend on this ordering, since some helper 786 // code is designed for linear scan register allocation. 787 for (HBackwardInstructionIterator instr_it(block->GetInstructions()); 788 !instr_it.Done(); 789 instr_it.Advance()) { 790 ProcessInstruction(instr_it.Current()); 791 } 792 793 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { 794 ProcessInstruction(phi_it.Current()); 795 } 796 797 if (block->IsCatchBlock() 798 || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { 799 // By blocking all registers at the top of each catch block or irreducible loop, we force 800 // intervals belonging to the live-in set of the catch/header block to be spilled. 801 // TODO(ngeoffray): Phis in this block could be allocated in register. 802 size_t position = block->GetLifetimeStart(); 803 BlockRegisters(position, position + 1); 804 } 805 } 806 } 807 808 void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) { 809 LocationSummary* locations = instruction->GetLocations(); 810 if (locations == nullptr) { 811 return; 812 } 813 if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) { 814 // We do this here because we do not want the suspend check to artificially 815 // create live registers. 816 DCHECK(instruction->IsSuspendCheckEntry()); 817 DCHECK_EQ(locations->GetTempCount(), 0u); 818 instruction->GetBlock()->RemoveInstruction(instruction); 819 return; 820 } 821 822 CheckForTempLiveIntervals(instruction); 823 CheckForSafepoint(instruction); 824 if (instruction->GetLocations()->WillCall()) { 825 // If a call will happen, create fixed intervals for caller-save registers. 826 // TODO: Note that it may be beneficial to later split intervals at this point, 827 // so that we allow last-minute moves from a caller-save register 828 // to a callee-save register. 829 BlockRegisters(instruction->GetLifetimePosition(), 830 instruction->GetLifetimePosition() + 1, 831 /*caller_save_only*/ true); 832 } 833 CheckForFixedInputs(instruction); 834 835 LiveInterval* interval = instruction->GetLiveInterval(); 836 if (interval == nullptr) { 837 // Instructions lacking a valid output location do not have a live interval. 838 DCHECK(!locations->Out().IsValid()); 839 return; 840 } 841 842 // Low intervals act as representatives for their corresponding high interval. 843 DCHECK(!interval->IsHighInterval()); 844 if (codegen_->NeedsTwoRegisters(interval->GetType())) { 845 interval->AddHighInterval(); 846 } 847 AddSafepointsFor(instruction); 848 CheckForFixedOutput(instruction); 849 AllocateSpillSlotForCatchPhi(instruction); 850 851 ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) 852 ? core_intervals_ 853 : fp_intervals_; 854 if (interval->HasSpillSlot() || instruction->IsConstant()) { 855 // Note that if an interval already has a spill slot, then its value currently resides 856 // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first 857 // register use. This is also true for constants, which can be materialized at any point. 858 size_t first_register_use = interval->FirstRegisterUse(); 859 if (first_register_use != kNoLifetime) { 860 LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1); 861 intervals.push_back(split); 862 } else { 863 // We won't allocate a register for this value. 864 } 865 } else { 866 intervals.push_back(interval); 867 } 868 } 869 870 void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) { 871 // We simply block physical registers where necessary. 872 // TODO: Ideally we would coalesce the physical register with the register 873 // allocated to the input value, but this can be tricky if, e.g., there 874 // could be multiple physical register uses of the same value at the 875 // same instruction. Furthermore, there's currently no distinction between 876 // fixed inputs to a call (which will be clobbered) and other fixed inputs (which 877 // may not be clobbered). 878 LocationSummary* locations = instruction->GetLocations(); 879 size_t position = instruction->GetLifetimePosition(); 880 for (size_t i = 0; i < locations->GetInputCount(); ++i) { 881 Location input = locations->InAt(i); 882 if (input.IsRegister() || input.IsFpuRegister()) { 883 BlockRegister(input, position, position + 1); 884 codegen_->AddAllocatedRegister(input); 885 } else if (input.IsPair()) { 886 BlockRegister(input.ToLow(), position, position + 1); 887 BlockRegister(input.ToHigh(), position, position + 1); 888 codegen_->AddAllocatedRegister(input.ToLow()); 889 codegen_->AddAllocatedRegister(input.ToHigh()); 890 } 891 } 892 } 893 894 void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) { 895 // If an instruction has a fixed output location, we give the live interval a register and then 896 // proactively split it just after the definition point to avoid creating too many interferences 897 // with a fixed node. 898 LiveInterval* interval = instruction->GetLiveInterval(); 899 Location out = interval->GetDefinedBy()->GetLocations()->Out(); 900 size_t position = instruction->GetLifetimePosition(); 901 DCHECK_GE(interval->GetEnd() - position, 2u); 902 903 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { 904 out = instruction->GetLocations()->InAt(0); 905 } 906 907 if (out.IsRegister() || out.IsFpuRegister()) { 908 interval->SetRegister(out.reg()); 909 codegen_->AddAllocatedRegister(out); 910 Split(interval, position + 1); 911 } else if (out.IsPair()) { 912 interval->SetRegister(out.low()); 913 interval->GetHighInterval()->SetRegister(out.high()); 914 codegen_->AddAllocatedRegister(out.ToLow()); 915 codegen_->AddAllocatedRegister(out.ToHigh()); 916 Split(interval, position + 1); 917 } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) { 918 interval->SetSpillSlot(out.GetStackIndex()); 919 } else { 920 DCHECK(out.IsUnallocated() || out.IsConstant()); 921 } 922 } 923 924 void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) { 925 LiveInterval* interval = instruction->GetLiveInterval(); 926 for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { 927 HInstruction* safepoint = safepoints_[safepoint_index - 1u]; 928 size_t safepoint_position = safepoint->GetLifetimePosition(); 929 930 // Test that safepoints_ are ordered in the optimal way. 931 DCHECK(safepoint_index == safepoints_.size() || 932 safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); 933 934 if (safepoint_position == interval->GetStart()) { 935 // The safepoint is for this instruction, so the location of the instruction 936 // does not need to be saved. 937 DCHECK_EQ(safepoint_index, safepoints_.size()); 938 DCHECK_EQ(safepoint, instruction); 939 continue; 940 } else if (interval->IsDeadAt(safepoint_position)) { 941 break; 942 } else if (!interval->Covers(safepoint_position)) { 943 // Hole in the interval. 944 continue; 945 } 946 interval->AddSafepoint(safepoint); 947 } 948 } 949 950 void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) { 951 LocationSummary* locations = instruction->GetLocations(); 952 size_t position = instruction->GetLifetimePosition(); 953 for (size_t i = 0; i < locations->GetTempCount(); ++i) { 954 Location temp = locations->GetTemp(i); 955 if (temp.IsRegister() || temp.IsFpuRegister()) { 956 BlockRegister(temp, position, position + 1); 957 codegen_->AddAllocatedRegister(temp); 958 } else { 959 DCHECK(temp.IsUnallocated()); 960 switch (temp.GetPolicy()) { 961 case Location::kRequiresRegister: { 962 LiveInterval* interval = 963 LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32); 964 interval->AddTempUse(instruction, i); 965 core_intervals_.push_back(interval); 966 temp_intervals_.push_back(interval); 967 break; 968 } 969 970 case Location::kRequiresFpuRegister: { 971 LiveInterval* interval = 972 LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64); 973 interval->AddTempUse(instruction, i); 974 fp_intervals_.push_back(interval); 975 temp_intervals_.push_back(interval); 976 if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { 977 interval->AddHighInterval(/*is_temp*/ true); 978 temp_intervals_.push_back(interval->GetHighInterval()); 979 } 980 break; 981 } 982 983 default: 984 LOG(FATAL) << "Unexpected policy for temporary location " 985 << temp.GetPolicy(); 986 } 987 } 988 } 989 } 990 991 void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) { 992 LocationSummary* locations = instruction->GetLocations(); 993 994 if (locations->NeedsSafepoint()) { 995 safepoints_.push_back(instruction); 996 } 997 } 998 999 LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) { 1000 if (interval->GetStart() < position && position < interval->GetEnd()) { 1001 return Split(interval, position); 1002 } else { 1003 return interval; 1004 } 1005 } 1006 1007 void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { 1008 DCHECK(!interval->IsHighInterval()); 1009 1010 // Split just after a register definition. 1011 if (interval->IsParent() && interval->DefinitionRequiresRegister()) { 1012 interval = TrySplit(interval, interval->GetStart() + 1); 1013 } 1014 1015 // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e. 1016 // [interval->GetStart(), interval->GetEnd() + 1) 1017 auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), 1018 interval->GetUses().end(), 1019 interval->GetStart(), 1020 interval->GetEnd() + 1u); 1021 // Split around register uses. 1022 for (const UsePosition& use : matching_use_range) { 1023 if (use.RequiresRegister()) { 1024 size_t position = use.GetPosition(); 1025 interval = TrySplit(interval, position - 1); 1026 if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) { 1027 // If we are at the very end of a basic block, we cannot split right 1028 // at the use. Split just after instead. 1029 interval = TrySplit(interval, position + 1); 1030 } else { 1031 interval = TrySplit(interval, position); 1032 } 1033 } 1034 } 1035 } 1036 1037 void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) { 1038 if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { 1039 HPhi* phi = instruction->AsPhi(); 1040 LiveInterval* interval = phi->GetLiveInterval(); 1041 1042 HInstruction* previous_phi = phi->GetPrevious(); 1043 DCHECK(previous_phi == nullptr || 1044 previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) 1045 << "Phis expected to be sorted by vreg number, " 1046 << "so that equivalent phis are adjacent."; 1047 1048 if (phi->IsVRegEquivalentOf(previous_phi)) { 1049 // Assign the same spill slot. 1050 DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); 1051 interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); 1052 } else { 1053 interval->SetSpillSlot(catch_phi_spill_slot_counter_); 1054 catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); 1055 } 1056 } 1057 } 1058 1059 void RegisterAllocatorGraphColor::BlockRegister(Location location, 1060 size_t start, 1061 size_t end) { 1062 DCHECK(location.IsRegister() || location.IsFpuRegister()); 1063 int reg = location.reg(); 1064 LiveInterval* interval = location.IsRegister() 1065 ? physical_core_nodes_[reg]->GetInterval() 1066 : physical_fp_nodes_[reg]->GetInterval(); 1067 DCHECK(interval->GetRegister() == reg); 1068 bool blocked_by_codegen = location.IsRegister() 1069 ? codegen_->IsBlockedCoreRegister(reg) 1070 : codegen_->IsBlockedFloatingPointRegister(reg); 1071 if (blocked_by_codegen) { 1072 // We've already blocked this register for the entire method. (And adding a 1073 // range inside another range violates the preconditions of AddRange). 1074 } else { 1075 interval->AddRange(start, end); 1076 } 1077 } 1078 1079 void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) { 1080 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { 1081 if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { 1082 BlockRegister(Location::RegisterLocation(i), start, end); 1083 } 1084 } 1085 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { 1086 if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { 1087 BlockRegister(Location::FpuRegisterLocation(i), start, end); 1088 } 1089 } 1090 } 1091 1092 void ColoringIteration::AddPotentialInterference(InterferenceNode* from, 1093 InterferenceNode* to, 1094 bool guaranteed_not_interfering_yet, 1095 bool both_directions) { 1096 if (from->IsPrecolored()) { 1097 // We save space by ignoring outgoing edges from fixed nodes. 1098 } else if (to->IsPrecolored()) { 1099 // It is important that only a single node represents a given fixed register in the 1100 // interference graph. We retrieve that node here. 1101 const ScopedArenaVector<InterferenceNode*>& physical_nodes = 1102 to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_ 1103 : register_allocator_->physical_core_nodes_; 1104 InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; 1105 from->AddInterference( 1106 physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); 1107 DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); 1108 DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; 1109 1110 // If a node interferes with a fixed pair node, the weight of the edge may 1111 // be inaccurate after using the alias of the pair node, because the alias of the pair node 1112 // is a singular node. 1113 // We could make special pair fixed nodes, but that ends up being too conservative because 1114 // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of 1115 // three rather than two. 1116 // Instead, we explicitly add an interference with the high node of the fixed pair node. 1117 // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals 1118 // can be unaligned on x86 complicates things. 1119 if (to->IsPair()) { 1120 InterferenceNode* high_node = 1121 physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; 1122 DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), 1123 high_node->GetInterval()->GetRegister()); 1124 from->AddInterference( 1125 high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); 1126 } 1127 } else { 1128 // Standard interference between two uncolored nodes. 1129 from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_); 1130 } 1131 1132 if (both_directions) { 1133 AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); 1134 } 1135 } 1136 1137 // Returns true if `in_node` represents an input interval of `out_node`, and the output interval 1138 // is allowed to have the same register as the input interval. 1139 // TODO: Ideally we should just produce correct intervals in liveness analysis. 1140 // We would need to refactor the current live interval layout to do so, which is 1141 // no small task. 1142 static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { 1143 LiveInterval* output_interval = out_node->GetInterval(); 1144 HInstruction* defined_by = output_interval->GetDefinedBy(); 1145 if (defined_by == nullptr) { 1146 // This must not be a definition point. 1147 return false; 1148 } 1149 1150 LocationSummary* locations = defined_by->GetLocations(); 1151 if (locations->OutputCanOverlapWithInputs()) { 1152 // This instruction does not allow the output to reuse a register from an input. 1153 return false; 1154 } 1155 1156 LiveInterval* input_interval = in_node->GetInterval(); 1157 LiveInterval* next_sibling = input_interval->GetNextSibling(); 1158 size_t def_position = defined_by->GetLifetimePosition(); 1159 size_t use_position = def_position + 1; 1160 if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { 1161 // The next sibling starts at the use position, so reusing the input register in the output 1162 // would clobber the input before it's moved into the sibling interval location. 1163 return false; 1164 } 1165 1166 if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { 1167 // The input interval is live after the use position. 1168 return false; 1169 } 1170 1171 HInputsRef inputs = defined_by->GetInputs(); 1172 for (size_t i = 0; i < inputs.size(); ++i) { 1173 if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { 1174 DCHECK(input_interval->SameRegisterKind(*output_interval)); 1175 return true; 1176 } 1177 } 1178 1179 // The input interval was not an input for this instruction. 1180 return false; 1181 } 1182 1183 void ColoringIteration::BuildInterferenceGraph( 1184 const ScopedArenaVector<LiveInterval*>& intervals, 1185 const ScopedArenaVector<InterferenceNode*>& physical_nodes) { 1186 DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); 1187 // Build the interference graph efficiently by ordering range endpoints 1188 // by position and doing a linear sweep to find interferences. (That is, we 1189 // jump from endpoint to endpoint, maintaining a set of intervals live at each 1190 // point. If two nodes are ever in the live set at the same time, then they 1191 // interfere with each other.) 1192 // 1193 // We order by both position and (secondarily) by whether the endpoint 1194 // begins or ends a range; we want to process range endings before range 1195 // beginnings at the same position because they should not conflict. 1196 // 1197 // For simplicity, we create a tuple for each endpoint, and then sort the tuples. 1198 // Tuple contents: (position, is_range_beginning, node). 1199 ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( 1200 allocator_->Adapter(kArenaAllocRegisterAllocator)); 1201 1202 // We reserve plenty of space to avoid excessive copying. 1203 range_endpoints.reserve(4 * prunable_nodes_.size()); 1204 1205 for (LiveInterval* parent : intervals) { 1206 for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { 1207 LiveRange* range = sibling->GetFirstRange(); 1208 if (range != nullptr) { 1209 InterferenceNode* node = 1210 new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); 1211 interval_node_map_.Insert(std::make_pair(sibling, node)); 1212 1213 if (sibling->HasRegister()) { 1214 // Fixed nodes should alias the canonical node for the corresponding register. 1215 node->stage = NodeStage::kPrecolored; 1216 InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; 1217 node->SetAlias(physical_node); 1218 DCHECK_EQ(node->GetInterval()->GetRegister(), 1219 physical_node->GetInterval()->GetRegister()); 1220 } else { 1221 node->stage = NodeStage::kPrunable; 1222 prunable_nodes_.push_back(node); 1223 } 1224 1225 while (range != nullptr) { 1226 range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node)); 1227 range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node)); 1228 range = range->GetNext(); 1229 } 1230 } 1231 } 1232 } 1233 1234 // Sort the endpoints. 1235 // We explicitly ignore the third entry of each tuple (the node pointer) in order 1236 // to maintain determinism. 1237 std::sort(range_endpoints.begin(), range_endpoints.end(), 1238 [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, 1239 const std::tuple<size_t, bool, InterferenceNode*>& rhs) { 1240 return std::tie(std::get<0>(lhs), std::get<1>(lhs)) 1241 < std::tie(std::get<0>(rhs), std::get<1>(rhs)); 1242 }); 1243 1244 // Nodes live at the current position in the linear sweep. 1245 ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator)); 1246 1247 // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the 1248 // live set. When we encounter the end of a range, we remove the corresponding node 1249 // from the live set. Nodes interfere if they are in the live set at the same time. 1250 for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { 1251 bool is_range_beginning; 1252 InterferenceNode* node; 1253 size_t position; 1254 // Extract information from the tuple, including the node this tuple represents. 1255 std::tie(position, is_range_beginning, node) = *it; 1256 1257 if (is_range_beginning) { 1258 bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); 1259 for (InterferenceNode* conflicting : live) { 1260 DCHECK_NE(node, conflicting); 1261 if (CheckInputOutputCanOverlap(conflicting, node)) { 1262 // We do not add an interference, because the instruction represented by `node` allows 1263 // its output to share a register with an input, represented here by `conflicting`. 1264 } else { 1265 AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); 1266 } 1267 } 1268 DCHECK(std::find(live.begin(), live.end(), node) == live.end()); 1269 live.push_back(node); 1270 } else { 1271 // End of range. 1272 auto live_it = std::find(live.begin(), live.end(), node); 1273 DCHECK(live_it != live.end()); 1274 live.erase(live_it); 1275 } 1276 } 1277 DCHECK(live.empty()); 1278 } 1279 1280 void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, 1281 InterferenceNode* b, 1282 CoalesceKind kind, 1283 size_t position) { 1284 DCHECK_EQ(a->IsPair(), b->IsPair()) 1285 << "Nodes of different memory widths should never be coalesced"; 1286 CoalesceOpportunity* opportunity = 1287 new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); 1288 a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); 1289 b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); 1290 coalesce_worklist_.push(opportunity); 1291 } 1292 1293 // When looking for coalesce opportunities, we use the interval_node_map_ to find the node 1294 // corresponding to an interval. Note that not all intervals are in this map, notably the parents 1295 // of constants and stack arguments. (However, these interval should not be involved in coalesce 1296 // opportunities anyway, because they're not going to be in registers.) 1297 void ColoringIteration::FindCoalesceOpportunities() { 1298 DCHECK(coalesce_worklist_.empty()); 1299 1300 for (InterferenceNode* node : prunable_nodes_) { 1301 LiveInterval* interval = node->GetInterval(); 1302 1303 // Coalesce siblings. 1304 LiveInterval* next_sibling = interval->GetNextSibling(); 1305 if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { 1306 auto it = interval_node_map_.Find(next_sibling); 1307 if (it != interval_node_map_.end()) { 1308 InterferenceNode* sibling_node = it->second; 1309 CreateCoalesceOpportunity(node, 1310 sibling_node, 1311 CoalesceKind::kAdjacentSibling, 1312 interval->GetEnd()); 1313 } 1314 } 1315 1316 // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. 1317 LiveInterval* parent = interval->GetParent(); 1318 if (parent->HasRegister() 1319 && parent->GetNextSibling() == interval 1320 && parent->GetEnd() == interval->GetStart()) { 1321 auto it = interval_node_map_.Find(parent); 1322 if (it != interval_node_map_.end()) { 1323 InterferenceNode* parent_node = it->second; 1324 CreateCoalesceOpportunity(node, 1325 parent_node, 1326 CoalesceKind::kFixedOutputSibling, 1327 parent->GetEnd()); 1328 } 1329 } 1330 1331 // Try to prevent moves across blocks. 1332 // Note that this does not lead to many succeeding coalesce attempts, so could be removed 1333 // if found to add to compile time. 1334 const SsaLivenessAnalysis& liveness = register_allocator_->liveness_; 1335 if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) { 1336 // If the start of this interval is at a block boundary, we look at the 1337 // location of the interval in blocks preceding the block this interval 1338 // starts at. This can avoid a move between the two blocks. 1339 HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2); 1340 for (HBasicBlock* predecessor : block->GetPredecessors()) { 1341 size_t position = predecessor->GetLifetimeEnd() - 1; 1342 LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); 1343 if (existing != nullptr) { 1344 auto it = interval_node_map_.Find(existing); 1345 if (it != interval_node_map_.end()) { 1346 InterferenceNode* existing_node = it->second; 1347 CreateCoalesceOpportunity(node, 1348 existing_node, 1349 CoalesceKind::kNonlinearControlFlow, 1350 position); 1351 } 1352 } 1353 } 1354 } 1355 1356 // Coalesce phi inputs with the corresponding output. 1357 HInstruction* defined_by = interval->GetDefinedBy(); 1358 if (defined_by != nullptr && defined_by->IsPhi()) { 1359 ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors()); 1360 HInputsRef inputs = defined_by->GetInputs(); 1361 1362 for (size_t i = 0, e = inputs.size(); i < e; ++i) { 1363 // We want the sibling at the end of the appropriate predecessor block. 1364 size_t position = predecessors[i]->GetLifetimeEnd() - 1; 1365 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); 1366 1367 auto it = interval_node_map_.Find(input_interval); 1368 if (it != interval_node_map_.end()) { 1369 InterferenceNode* input_node = it->second; 1370 CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); 1371 } 1372 } 1373 } 1374 1375 // Coalesce output with first input when policy is kSameAsFirstInput. 1376 if (defined_by != nullptr) { 1377 Location out = defined_by->GetLocations()->Out(); 1378 if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { 1379 LiveInterval* input_interval 1380 = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); 1381 // TODO: Could we consider lifetime holes here? 1382 if (input_interval->GetEnd() == interval->GetStart()) { 1383 auto it = interval_node_map_.Find(input_interval); 1384 if (it != interval_node_map_.end()) { 1385 InterferenceNode* input_node = it->second; 1386 CreateCoalesceOpportunity(node, 1387 input_node, 1388 CoalesceKind::kFirstInput, 1389 interval->GetStart()); 1390 } 1391 } 1392 } 1393 } 1394 1395 // An interval that starts an instruction (that is, it is not split), may 1396 // re-use the registers used by the inputs of that instruction, based on the 1397 // location summary. 1398 if (defined_by != nullptr) { 1399 DCHECK(!interval->IsSplit()); 1400 LocationSummary* locations = defined_by->GetLocations(); 1401 if (!locations->OutputCanOverlapWithInputs()) { 1402 HInputsRef inputs = defined_by->GetInputs(); 1403 for (size_t i = 0; i < inputs.size(); ++i) { 1404 size_t def_point = defined_by->GetLifetimePosition(); 1405 // TODO: Getting the sibling at the def_point might not be quite what we want 1406 // for fixed inputs, since the use will be *at* the def_point rather than after. 1407 LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); 1408 if (input_interval != nullptr && 1409 input_interval->HasHighInterval() == interval->HasHighInterval()) { 1410 auto it = interval_node_map_.Find(input_interval); 1411 if (it != interval_node_map_.end()) { 1412 InterferenceNode* input_node = it->second; 1413 CreateCoalesceOpportunity(node, 1414 input_node, 1415 CoalesceKind::kAnyInput, 1416 interval->GetStart()); 1417 } 1418 } 1419 } 1420 } 1421 } 1422 1423 // Try to prevent moves into fixed input locations. 1424 // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. 1425 // [interval->GetStart() + 1, interval->GetEnd() + 1) 1426 auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), 1427 interval->GetUses().end(), 1428 interval->GetStart() + 1u, 1429 interval->GetEnd() + 1u); 1430 for (const UsePosition& use : matching_use_range) { 1431 HInstruction* user = use.GetUser(); 1432 if (user == nullptr) { 1433 // User may be null for certain intervals, such as temp intervals. 1434 continue; 1435 } 1436 LocationSummary* locations = user->GetLocations(); 1437 Location input = locations->InAt(use.GetInputIndex()); 1438 if (input.IsRegister() || input.IsFpuRegister()) { 1439 // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes 1440 // is currently not supported. 1441 InterferenceNode* fixed_node = input.IsRegister() 1442 ? register_allocator_->physical_core_nodes_[input.reg()] 1443 : register_allocator_->physical_fp_nodes_[input.reg()]; 1444 CreateCoalesceOpportunity(node, 1445 fixed_node, 1446 CoalesceKind::kFixedInput, 1447 user->GetLifetimePosition()); 1448 } 1449 } 1450 } // for node in prunable_nodes 1451 } 1452 1453 static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { 1454 return node->GetOutDegree() < num_regs; 1455 } 1456 1457 static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { 1458 return !IsLowDegreeNode(node, num_regs); 1459 } 1460 1461 void ColoringIteration::PruneInterferenceGraph() { 1462 DCHECK(pruned_nodes_.empty() 1463 && simplify_worklist_.empty() 1464 && freeze_worklist_.empty() 1465 && spill_worklist_.empty()); 1466 // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, 1467 // and all others as high degree nodes. The distinction is important: low degree nodes are 1468 // guaranteed a color, while high degree nodes are not. 1469 1470 // Build worklists. Note that the coalesce worklist has already been 1471 // filled by FindCoalesceOpportunities(). 1472 for (InterferenceNode* node : prunable_nodes_) { 1473 DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; 1474 if (IsLowDegreeNode(node, num_regs_)) { 1475 if (node->GetCoalesceOpportunities().empty()) { 1476 // Simplify Worklist. 1477 node->stage = NodeStage::kSimplifyWorklist; 1478 simplify_worklist_.push_back(node); 1479 } else { 1480 // Freeze Worklist. 1481 node->stage = NodeStage::kFreezeWorklist; 1482 freeze_worklist_.push_back(node); 1483 } 1484 } else { 1485 // Spill worklist. 1486 node->stage = NodeStage::kSpillWorklist; 1487 spill_worklist_.push(node); 1488 } 1489 } 1490 1491 // Prune graph. 1492 // Note that we do not remove a node from its current worklist if it moves to another, so it may 1493 // be in multiple worklists at once; the node's `phase` says which worklist it is really in. 1494 while (true) { 1495 if (!simplify_worklist_.empty()) { 1496 // Prune low-degree nodes. 1497 // TODO: pop_back() should work as well, but it didn't; we get a 1498 // failed check while pruning. We should look into this. 1499 InterferenceNode* node = simplify_worklist_.front(); 1500 simplify_worklist_.pop_front(); 1501 DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; 1502 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree"; 1503 DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; 1504 PruneNode(node); 1505 } else if (!coalesce_worklist_.empty()) { 1506 // Coalesce. 1507 CoalesceOpportunity* opportunity = coalesce_worklist_.top(); 1508 coalesce_worklist_.pop(); 1509 if (opportunity->stage == CoalesceStage::kWorklist) { 1510 Coalesce(opportunity); 1511 } 1512 } else if (!freeze_worklist_.empty()) { 1513 // Freeze moves and prune a low-degree move-related node. 1514 InterferenceNode* node = freeze_worklist_.front(); 1515 freeze_worklist_.pop_front(); 1516 if (node->stage == NodeStage::kFreezeWorklist) { 1517 DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree"; 1518 DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; 1519 FreezeMoves(node); 1520 PruneNode(node); 1521 } 1522 } else if (!spill_worklist_.empty()) { 1523 // We spill the lowest-priority node, because pruning a node earlier 1524 // gives it a higher chance of being spilled. 1525 InterferenceNode* node = spill_worklist_.top(); 1526 spill_worklist_.pop(); 1527 if (node->stage == NodeStage::kSpillWorklist) { 1528 DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree"; 1529 FreezeMoves(node); 1530 PruneNode(node); 1531 } 1532 } else { 1533 // Pruning complete. 1534 break; 1535 } 1536 } 1537 DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); 1538 } 1539 1540 void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) { 1541 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { 1542 if (opportunity->stage == CoalesceStage::kActive) { 1543 opportunity->stage = CoalesceStage::kWorklist; 1544 coalesce_worklist_.push(opportunity); 1545 } 1546 } 1547 } 1548 1549 void ColoringIteration::PruneNode(InterferenceNode* node) { 1550 DCHECK_NE(node->stage, NodeStage::kPruned); 1551 DCHECK(!node->IsPrecolored()); 1552 node->stage = NodeStage::kPruned; 1553 pruned_nodes_.push(node); 1554 1555 for (InterferenceNode* adj : node->GetAdjacentNodes()) { 1556 DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; 1557 1558 if (adj->IsPrecolored()) { 1559 // No effect on pre-colored nodes; they're never pruned. 1560 } else { 1561 // Remove the interference. 1562 bool was_high_degree = IsHighDegreeNode(adj, num_regs_); 1563 DCHECK(adj->ContainsInterference(node)) 1564 << "Missing reflexive interference from non-fixed node"; 1565 adj->RemoveInterference(node); 1566 1567 // Handle transitions from high degree to low degree. 1568 if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) { 1569 EnableCoalesceOpportunities(adj); 1570 for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { 1571 EnableCoalesceOpportunities(adj_adj); 1572 } 1573 1574 DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); 1575 if (adj->IsMoveRelated()) { 1576 adj->stage = NodeStage::kFreezeWorklist; 1577 freeze_worklist_.push_back(adj); 1578 } else { 1579 adj->stage = NodeStage::kSimplifyWorklist; 1580 simplify_worklist_.push_back(adj); 1581 } 1582 } 1583 } 1584 } 1585 } 1586 1587 void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) { 1588 if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) { 1589 DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); 1590 node->stage = NodeStage::kSimplifyWorklist; 1591 simplify_worklist_.push_back(node); 1592 } 1593 } 1594 1595 void ColoringIteration::FreezeMoves(InterferenceNode* node) { 1596 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { 1597 if (opportunity->stage == CoalesceStage::kDefunct) { 1598 // Constrained moves should remain constrained, since they will not be considered 1599 // during last-chance coalescing. 1600 } else { 1601 opportunity->stage = CoalesceStage::kInactive; 1602 } 1603 InterferenceNode* other = opportunity->node_a->GetAlias() == node 1604 ? opportunity->node_b->GetAlias() 1605 : opportunity->node_a->GetAlias(); 1606 if (other != node && other->stage == NodeStage::kFreezeWorklist) { 1607 DCHECK(IsLowDegreeNode(node, num_regs_)); 1608 CheckTransitionFromFreezeWorklist(other); 1609 } 1610 } 1611 } 1612 1613 bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from, 1614 InterferenceNode* into) { 1615 if (!into->IsPrecolored()) { 1616 // The uncolored heuristic will cover this case. 1617 return false; 1618 } 1619 if (from->IsPair() || into->IsPair()) { 1620 // TODO: Merging from a pair node is currently not supported, since fixed pair nodes 1621 // are currently represented as two single fixed nodes in the graph, and `into` is 1622 // only one of them. (We may lose the implicit connections to the second one in a merge.) 1623 return false; 1624 } 1625 1626 // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. 1627 // Reasons an adjacent node `adj` can be "ok": 1628 // (1) If `adj` is low degree, interference with `into` will not affect its existing 1629 // colorable guarantee. (Notice that coalescing cannot increase its degree.) 1630 // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). 1631 // (3) If there's already an interference with `into`, coalescing will not add interferences. 1632 for (InterferenceNode* adj : from->GetAdjacentNodes()) { 1633 if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) { 1634 // Ok. 1635 } else { 1636 return false; 1637 } 1638 } 1639 return true; 1640 } 1641 1642 bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from, 1643 InterferenceNode* into) { 1644 if (into->IsPrecolored()) { 1645 // The pre-colored heuristic will handle this case. 1646 return false; 1647 } 1648 1649 // Arbitrary cap to improve compile time. Tests show that this has negligible affect 1650 // on generated code. 1651 if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) { 1652 return false; 1653 } 1654 1655 // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors 1656 // of high degree. (Low degree neighbors can be ignored, because they will eventually be 1657 // pruned from the interference graph in the simplify stage.) 1658 size_t high_degree_interferences = 0; 1659 for (InterferenceNode* adj : from->GetAdjacentNodes()) { 1660 if (IsHighDegreeNode(adj, num_regs_)) { 1661 high_degree_interferences += from->EdgeWeightWith(adj); 1662 } 1663 } 1664 for (InterferenceNode* adj : into->GetAdjacentNodes()) { 1665 if (IsHighDegreeNode(adj, num_regs_)) { 1666 if (from->ContainsInterference(adj)) { 1667 // We've already counted this adjacent node. 1668 // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that 1669 // we should not have counted it at all. (This extends the textbook Briggs coalescing test, 1670 // but remains conservative.) 1671 if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) { 1672 high_degree_interferences -= from->EdgeWeightWith(adj); 1673 } 1674 } else { 1675 high_degree_interferences += into->EdgeWeightWith(adj); 1676 } 1677 } 1678 } 1679 1680 return high_degree_interferences < num_regs_; 1681 } 1682 1683 void ColoringIteration::Combine(InterferenceNode* from, 1684 InterferenceNode* into) { 1685 from->SetAlias(into); 1686 1687 // Add interferences. 1688 for (InterferenceNode* adj : from->GetAdjacentNodes()) { 1689 bool was_low_degree = IsLowDegreeNode(adj, num_regs_); 1690 AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); 1691 if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) { 1692 // This is a (temporary) transition to a high degree node. Its degree will decrease again 1693 // when we prune `from`, but it's best to be consistent about the current worklist. 1694 adj->stage = NodeStage::kSpillWorklist; 1695 spill_worklist_.push(adj); 1696 } 1697 } 1698 1699 // Add coalesce opportunities. 1700 for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { 1701 if (opportunity->stage != CoalesceStage::kDefunct) { 1702 into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); 1703 } 1704 } 1705 EnableCoalesceOpportunities(from); 1706 1707 // Prune and update worklists. 1708 PruneNode(from); 1709 if (IsLowDegreeNode(into, num_regs_)) { 1710 // Coalesce(...) takes care of checking for a transition to the simplify worklist. 1711 DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); 1712 } else if (into->stage == NodeStage::kFreezeWorklist) { 1713 // This is a transition to a high degree node. 1714 into->stage = NodeStage::kSpillWorklist; 1715 spill_worklist_.push(into); 1716 } else { 1717 DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); 1718 } 1719 } 1720 1721 void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { 1722 InterferenceNode* from = opportunity->node_a->GetAlias(); 1723 InterferenceNode* into = opportunity->node_b->GetAlias(); 1724 DCHECK_NE(from->stage, NodeStage::kPruned); 1725 DCHECK_NE(into->stage, NodeStage::kPruned); 1726 1727 if (from->IsPrecolored()) { 1728 // If we have one pre-colored node, make sure it's the `into` node. 1729 std::swap(from, into); 1730 } 1731 1732 if (from == into) { 1733 // These nodes have already been coalesced. 1734 opportunity->stage = CoalesceStage::kDefunct; 1735 CheckTransitionFromFreezeWorklist(from); 1736 } else if (from->IsPrecolored() || from->ContainsInterference(into)) { 1737 // These nodes interfere. 1738 opportunity->stage = CoalesceStage::kDefunct; 1739 CheckTransitionFromFreezeWorklist(from); 1740 CheckTransitionFromFreezeWorklist(into); 1741 } else if (PrecoloredHeuristic(from, into) 1742 || UncoloredHeuristic(from, into)) { 1743 // We can coalesce these nodes. 1744 opportunity->stage = CoalesceStage::kDefunct; 1745 Combine(from, into); 1746 CheckTransitionFromFreezeWorklist(into); 1747 } else { 1748 // We cannot coalesce, but we may be able to later. 1749 opportunity->stage = CoalesceStage::kActive; 1750 } 1751 } 1752 1753 // Build a mask with a bit set for each register assigned to some 1754 // interval in `intervals`. 1755 template <typename Container> 1756 static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) { 1757 std::bitset<kMaxNumRegs> conflict_mask; 1758 for (InterferenceNode* adjacent : intervals) { 1759 LiveInterval* conflicting = adjacent->GetInterval(); 1760 if (conflicting->HasRegister()) { 1761 conflict_mask.set(conflicting->GetRegister()); 1762 if (conflicting->HasHighInterval()) { 1763 DCHECK(conflicting->GetHighInterval()->HasRegister()); 1764 conflict_mask.set(conflicting->GetHighInterval()->GetRegister()); 1765 } 1766 } else { 1767 DCHECK(!conflicting->HasHighInterval() 1768 || !conflicting->GetHighInterval()->HasRegister()); 1769 } 1770 } 1771 return conflict_mask; 1772 } 1773 1774 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { 1775 return processing_core_regs 1776 ? !codegen_->IsCoreCalleeSaveRegister(reg) 1777 : !codegen_->IsFloatingPointCalleeSaveRegister(reg); 1778 } 1779 1780 static bool RegisterIsAligned(size_t reg) { 1781 return reg % 2 == 0; 1782 } 1783 1784 static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { 1785 // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. 1786 // Note that CTZ is undefined if all bits are 0, so we special-case it. 1787 return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); 1788 } 1789 1790 bool ColoringIteration::ColorInterferenceGraph() { 1791 DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; 1792 ScopedArenaVector<LiveInterval*> colored_intervals( 1793 allocator_->Adapter(kArenaAllocRegisterAllocator)); 1794 bool successful = true; 1795 1796 while (!pruned_nodes_.empty()) { 1797 InterferenceNode* node = pruned_nodes_.top(); 1798 pruned_nodes_.pop(); 1799 LiveInterval* interval = node->GetInterval(); 1800 size_t reg = 0; 1801 1802 InterferenceNode* alias = node->GetAlias(); 1803 if (alias != node) { 1804 // This node was coalesced with another. 1805 LiveInterval* alias_interval = alias->GetInterval(); 1806 if (alias_interval->HasRegister()) { 1807 reg = alias_interval->GetRegister(); 1808 DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) 1809 << "This node conflicts with the register it was coalesced with"; 1810 } else { 1811 DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " 1812 << "Move coalescing was not conservative, causing a node to be coalesced " 1813 << "with another node that could not be colored"; 1814 if (interval->RequiresRegister()) { 1815 successful = false; 1816 } 1817 } 1818 } else { 1819 // Search for free register(s). 1820 std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); 1821 if (interval->HasHighInterval()) { 1822 // Note that the graph coloring allocator assumes that pair intervals are aligned here, 1823 // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we 1824 // change the alignment requirements here, we will have to update the algorithm (e.g., 1825 // be more conservative about the weight of edges adjacent to pair nodes.) 1826 while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { 1827 reg += 2; 1828 } 1829 1830 // Try to use a caller-save register first. 1831 for (size_t i = 0; i < num_regs_ - 1; i += 2) { 1832 bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_); 1833 bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_); 1834 if (!conflict_mask[i] && !conflict_mask[i + 1]) { 1835 if (low_caller_save && high_caller_save) { 1836 reg = i; 1837 break; 1838 } else if (low_caller_save || high_caller_save) { 1839 reg = i; 1840 // Keep looking to try to get both parts in caller-save registers. 1841 } 1842 } 1843 } 1844 } else { 1845 // Not a pair interval. 1846 reg = FindFirstZeroInConflictMask(conflict_mask); 1847 1848 // Try to use caller-save registers first. 1849 for (size_t i = 0; i < num_regs_; ++i) { 1850 if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) { 1851 reg = i; 1852 break; 1853 } 1854 } 1855 } 1856 1857 // Last-chance coalescing. 1858 for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { 1859 if (opportunity->stage == CoalesceStage::kDefunct) { 1860 continue; 1861 } 1862 LiveInterval* other_interval = opportunity->node_a->GetAlias() == node 1863 ? opportunity->node_b->GetAlias()->GetInterval() 1864 : opportunity->node_a->GetAlias()->GetInterval(); 1865 if (other_interval->HasRegister()) { 1866 size_t coalesce_register = other_interval->GetRegister(); 1867 if (interval->HasHighInterval()) { 1868 if (!conflict_mask[coalesce_register] && 1869 !conflict_mask[coalesce_register + 1] && 1870 RegisterIsAligned(coalesce_register)) { 1871 reg = coalesce_register; 1872 break; 1873 } 1874 } else if (!conflict_mask[coalesce_register]) { 1875 reg = coalesce_register; 1876 break; 1877 } 1878 } 1879 } 1880 } 1881 1882 if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) { 1883 // Assign register. 1884 DCHECK(!interval->HasRegister()); 1885 interval->SetRegister(reg); 1886 colored_intervals.push_back(interval); 1887 if (interval->HasHighInterval()) { 1888 DCHECK(!interval->GetHighInterval()->HasRegister()); 1889 interval->GetHighInterval()->SetRegister(reg + 1); 1890 colored_intervals.push_back(interval->GetHighInterval()); 1891 } 1892 } else if (interval->RequiresRegister()) { 1893 // The interference graph is too dense to color. Make it sparser by 1894 // splitting this live interval. 1895 successful = false; 1896 register_allocator_->SplitAtRegisterUses(interval); 1897 // We continue coloring, because there may be additional intervals that cannot 1898 // be colored, and that we should split. 1899 } else { 1900 // Spill. 1901 node->SetNeedsSpillSlot(); 1902 } 1903 } 1904 1905 // If unsuccessful, reset all register assignments. 1906 if (!successful) { 1907 for (LiveInterval* interval : colored_intervals) { 1908 interval->ClearRegister(); 1909 } 1910 } 1911 1912 return successful; 1913 } 1914 1915 void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) { 1916 // The register allocation resolver will organize the stack based on value type, 1917 // so we assign stack slots for each value type separately. 1918 ScopedArenaAllocator allocator(allocator_->GetArenaStack()); 1919 ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator); 1920 ScopedArenaVector<LiveInterval*> double_intervals(adapter); 1921 ScopedArenaVector<LiveInterval*> long_intervals(adapter); 1922 ScopedArenaVector<LiveInterval*> float_intervals(adapter); 1923 ScopedArenaVector<LiveInterval*> int_intervals(adapter); 1924 1925 // The set of parent intervals already handled. 1926 ScopedArenaSet<LiveInterval*> seen(adapter); 1927 1928 // Find nodes that need spill slots. 1929 for (InterferenceNode* node : nodes) { 1930 if (!node->NeedsSpillSlot()) { 1931 continue; 1932 } 1933 1934 LiveInterval* parent = node->GetInterval()->GetParent(); 1935 if (seen.find(parent) != seen.end()) { 1936 // We've already handled this interval. 1937 // This can happen if multiple siblings of the same interval request a stack slot. 1938 continue; 1939 } 1940 seen.insert(parent); 1941 1942 HInstruction* defined_by = parent->GetDefinedBy(); 1943 if (parent->HasSpillSlot()) { 1944 // We already have a spill slot for this value that we can reuse. 1945 } else if (defined_by->IsParameterValue()) { 1946 // Parameters already have a stack slot. 1947 parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); 1948 } else if (defined_by->IsCurrentMethod()) { 1949 // The current method is always at stack slot 0. 1950 parent->SetSpillSlot(0); 1951 } else if (defined_by->IsConstant()) { 1952 // Constants don't need a spill slot. 1953 } else { 1954 // We need to find a spill slot for this interval. Place it in the correct 1955 // worklist to be processed later. 1956 switch (node->GetInterval()->GetType()) { 1957 case DataType::Type::kFloat64: 1958 double_intervals.push_back(parent); 1959 break; 1960 case DataType::Type::kInt64: 1961 long_intervals.push_back(parent); 1962 break; 1963 case DataType::Type::kFloat32: 1964 float_intervals.push_back(parent); 1965 break; 1966 case DataType::Type::kReference: 1967 case DataType::Type::kInt32: 1968 case DataType::Type::kUint16: 1969 case DataType::Type::kUint8: 1970 case DataType::Type::kInt8: 1971 case DataType::Type::kBool: 1972 case DataType::Type::kInt16: 1973 int_intervals.push_back(parent); 1974 break; 1975 case DataType::Type::kUint32: 1976 case DataType::Type::kUint64: 1977 case DataType::Type::kVoid: 1978 LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); 1979 UNREACHABLE(); 1980 } 1981 } 1982 } 1983 1984 // Color spill slots for each value type. 1985 ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_); 1986 ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_); 1987 ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_); 1988 ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_); 1989 } 1990 1991 void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals, 1992 /* out */ size_t* num_stack_slots_used) { 1993 // We cannot use the original interference graph here because spill slots are assigned to 1994 // all of the siblings of an interval, whereas an interference node represents only a single 1995 // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints 1996 // by position, and assigning the lowest spill slot available when we encounter an interval 1997 // beginning. We ignore lifetime holes for simplicity. 1998 ScopedArenaAllocator allocator(allocator_->GetArenaStack()); 1999 ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( 2000 allocator.Adapter(kArenaAllocRegisterAllocator)); 2001 2002 for (LiveInterval* parent_interval : intervals) { 2003 DCHECK(parent_interval->IsParent()); 2004 DCHECK(!parent_interval->HasSpillSlot()); 2005 size_t start = parent_interval->GetStart(); 2006 size_t end = parent_interval->GetLastSibling()->GetEnd(); 2007 DCHECK_LT(start, end); 2008 interval_endpoints.push_back(std::make_tuple(start, true, parent_interval)); 2009 interval_endpoints.push_back(std::make_tuple(end, false, parent_interval)); 2010 } 2011 2012 // Sort by position. 2013 // We explicitly ignore the third entry of each tuple (the interval pointer) in order 2014 // to maintain determinism. 2015 std::sort(interval_endpoints.begin(), interval_endpoints.end(), 2016 [] (const std::tuple<size_t, bool, LiveInterval*>& lhs, 2017 const std::tuple<size_t, bool, LiveInterval*>& rhs) { 2018 return std::tie(std::get<0>(lhs), std::get<1>(lhs)) 2019 < std::tie(std::get<0>(rhs), std::get<1>(rhs)); 2020 }); 2021 2022 ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator); 2023 for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) { 2024 // Extract information from the current tuple. 2025 LiveInterval* parent_interval; 2026 bool is_interval_beginning; 2027 size_t position; 2028 std::tie(position, is_interval_beginning, parent_interval) = *it; 2029 size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); 2030 2031 if (is_interval_beginning) { 2032 DCHECK(!parent_interval->HasSpillSlot()); 2033 DCHECK_EQ(position, parent_interval->GetStart()); 2034 2035 // Find first available free stack slot(s). 2036 size_t slot = 0; 2037 for (; ; ++slot) { 2038 bool found = true; 2039 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { 2040 if (taken.IsBitSet(s)) { 2041 found = false; 2042 break; // failure 2043 } 2044 } 2045 if (found) { 2046 break; // success 2047 } 2048 } 2049 2050 parent_interval->SetSpillSlot(slot); 2051 2052 *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); 2053 if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { 2054 // The parallel move resolver requires that there be an even number of spill slots 2055 // allocated for pair value types. 2056 ++(*num_stack_slots_used); 2057 } 2058 2059 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { 2060 taken.SetBit(s); 2061 } 2062 } else { 2063 DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); 2064 DCHECK(parent_interval->HasSpillSlot()); 2065 2066 // Free up the stack slot(s) used by this interval. 2067 size_t slot = parent_interval->GetSpillSlot(); 2068 for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { 2069 DCHECK(taken.IsBitSet(s)); 2070 taken.ClearBit(s); 2071 } 2072 } 2073 } 2074 DCHECK_EQ(taken.NumSetBits(), 0u); 2075 } 2076 2077 } // namespace art 2078