1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <string> 18 19 #include "scheduler.h" 20 21 #include "base/scoped_arena_allocator.h" 22 #include "base/scoped_arena_containers.h" 23 #include "data_type-inl.h" 24 #include "prepare_for_register_allocation.h" 25 26 #ifdef ART_ENABLE_CODEGEN_arm64 27 #include "scheduler_arm64.h" 28 #endif 29 30 #ifdef ART_ENABLE_CODEGEN_arm 31 #include "scheduler_arm.h" 32 #endif 33 34 namespace art { 35 36 void SchedulingGraph::AddDependency(SchedulingNode* node, 37 SchedulingNode* dependency, 38 bool is_data_dependency) { 39 if (node == nullptr || dependency == nullptr) { 40 // A `nullptr` node indicates an instruction out of scheduling range (eg. in 41 // an other block), so we do not need to add a dependency edge to the graph. 42 return; 43 } 44 45 if (is_data_dependency) { 46 if (!HasImmediateDataDependency(node, dependency)) { 47 node->AddDataPredecessor(dependency); 48 } 49 } else if (!HasImmediateOtherDependency(node, dependency)) { 50 node->AddOtherPredecessor(dependency); 51 } 52 } 53 54 static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) { 55 // Read after write. 56 if (node.MayDependOn(other)) { 57 return true; 58 } 59 60 // Write after read. 61 if (other.MayDependOn(node)) { 62 return true; 63 } 64 65 // Memory write after write. 66 if (node.DoesAnyWrite() && other.DoesAnyWrite()) { 67 return true; 68 } 69 70 return false; 71 } 72 73 size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const { 74 DCHECK(heap_location_collector_ != nullptr); 75 size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction); 76 // This array access should be analyzed and added to HeapLocationCollector before. 77 DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); 78 return heap_loc; 79 } 80 81 bool SchedulingGraph::ArrayAccessMayAlias(HInstruction* node, 82 HInstruction* other) const { 83 DCHECK(heap_location_collector_ != nullptr); 84 size_t node_heap_loc = ArrayAccessHeapLocation(node); 85 size_t other_heap_loc = ArrayAccessHeapLocation(other); 86 87 // For example: arr[0] and arr[0] 88 if (node_heap_loc == other_heap_loc) { 89 return true; 90 } 91 92 // For example: arr[0] and arr[i] 93 if (heap_location_collector_->MayAlias(node_heap_loc, other_heap_loc)) { 94 return true; 95 } 96 97 return false; 98 } 99 100 static bool IsArrayAccess(const HInstruction* instruction) { 101 return instruction->IsArrayGet() || instruction->IsArraySet(); 102 } 103 104 static bool IsInstanceFieldAccess(const HInstruction* instruction) { 105 return instruction->IsInstanceFieldGet() || 106 instruction->IsInstanceFieldSet() || 107 instruction->IsUnresolvedInstanceFieldGet() || 108 instruction->IsUnresolvedInstanceFieldSet(); 109 } 110 111 static bool IsStaticFieldAccess(const HInstruction* instruction) { 112 return instruction->IsStaticFieldGet() || 113 instruction->IsStaticFieldSet() || 114 instruction->IsUnresolvedStaticFieldGet() || 115 instruction->IsUnresolvedStaticFieldSet(); 116 } 117 118 static bool IsResolvedFieldAccess(const HInstruction* instruction) { 119 return instruction->IsInstanceFieldGet() || 120 instruction->IsInstanceFieldSet() || 121 instruction->IsStaticFieldGet() || 122 instruction->IsStaticFieldSet(); 123 } 124 125 static bool IsUnresolvedFieldAccess(const HInstruction* instruction) { 126 return instruction->IsUnresolvedInstanceFieldGet() || 127 instruction->IsUnresolvedInstanceFieldSet() || 128 instruction->IsUnresolvedStaticFieldGet() || 129 instruction->IsUnresolvedStaticFieldSet(); 130 } 131 132 static bool IsFieldAccess(const HInstruction* instruction) { 133 return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction); 134 } 135 136 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) { 137 if (instruction->IsInstanceFieldGet()) { 138 return &instruction->AsInstanceFieldGet()->GetFieldInfo(); 139 } else if (instruction->IsInstanceFieldSet()) { 140 return &instruction->AsInstanceFieldSet()->GetFieldInfo(); 141 } else if (instruction->IsStaticFieldGet()) { 142 return &instruction->AsStaticFieldGet()->GetFieldInfo(); 143 } else if (instruction->IsStaticFieldSet()) { 144 return &instruction->AsStaticFieldSet()->GetFieldInfo(); 145 } else { 146 LOG(FATAL) << "Unexpected field access type"; 147 UNREACHABLE(); 148 } 149 } 150 151 size_t SchedulingGraph::FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const { 152 DCHECK(obj != nullptr); 153 DCHECK(field != nullptr); 154 DCHECK(heap_location_collector_ != nullptr); 155 156 size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(obj, field); 157 // This field access should be analyzed and added to HeapLocationCollector before. 158 DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); 159 160 return heap_loc; 161 } 162 163 bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node, 164 const HInstruction* other) const { 165 DCHECK(heap_location_collector_ != nullptr); 166 167 // Static and instance field accesses should not alias. 168 if ((IsInstanceFieldAccess(node) && IsStaticFieldAccess(other)) || 169 (IsStaticFieldAccess(node) && IsInstanceFieldAccess(other))) { 170 return false; 171 } 172 173 // If either of the field accesses is unresolved. 174 if (IsUnresolvedFieldAccess(node) || IsUnresolvedFieldAccess(other)) { 175 // Conservatively treat these two accesses may alias. 176 return true; 177 } 178 179 // If both fields accesses are resolved. 180 const FieldInfo* node_field = GetFieldInfo(node); 181 const FieldInfo* other_field = GetFieldInfo(other); 182 183 size_t node_loc = FieldAccessHeapLocation(node->InputAt(0), node_field); 184 size_t other_loc = FieldAccessHeapLocation(other->InputAt(0), other_field); 185 186 if (node_loc == other_loc) { 187 return true; 188 } 189 190 if (!heap_location_collector_->MayAlias(node_loc, other_loc)) { 191 return false; 192 } 193 194 return true; 195 } 196 197 bool SchedulingGraph::HasMemoryDependency(HInstruction* node, 198 HInstruction* other) const { 199 if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) { 200 return false; 201 } 202 203 if (heap_location_collector_ == nullptr || 204 heap_location_collector_->GetNumberOfHeapLocations() == 0) { 205 // Without HeapLocation information from load store analysis, 206 // we cannot do further disambiguation analysis on these two instructions. 207 // Just simply say that those two instructions have memory dependency. 208 return true; 209 } 210 211 if (IsArrayAccess(node) && IsArrayAccess(other)) { 212 return ArrayAccessMayAlias(node, other); 213 } 214 if (IsFieldAccess(node) && IsFieldAccess(other)) { 215 return FieldAccessMayAlias(node, other); 216 } 217 218 // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess 219 if (node->IsVecMemoryOperation() && other->IsVecMemoryOperation()) { 220 return true; 221 } 222 if (node->IsVecMemoryOperation() && IsArrayAccess(other)) { 223 return true; 224 } 225 if (IsArrayAccess(node) && other->IsVecMemoryOperation()) { 226 return true; 227 } 228 229 // Heap accesses of different kinds should not alias. 230 if (IsArrayAccess(node) && IsFieldAccess(other)) { 231 return false; 232 } 233 if (IsFieldAccess(node) && IsArrayAccess(other)) { 234 return false; 235 } 236 if (node->IsVecMemoryOperation() && IsFieldAccess(other)) { 237 return false; 238 } 239 if (IsFieldAccess(node) && other->IsVecMemoryOperation()) { 240 return false; 241 } 242 243 // We conservatively treat all other cases having dependency, 244 // for example, Invoke and ArrayGet. 245 return true; 246 } 247 248 bool SchedulingGraph::HasExceptionDependency(const HInstruction* node, 249 const HInstruction* other) const { 250 if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) { 251 return true; 252 } 253 if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) { 254 return true; 255 } 256 if (other->CanThrow() && node->CanThrow()) { 257 return true; 258 } 259 260 // Above checks should cover all cases where we cannot reorder two 261 // instructions which may throw exception. 262 return false; 263 } 264 265 // Check whether `node` depends on `other`, taking into account `SideEffect` 266 // information and `CanThrow` information. 267 bool SchedulingGraph::HasSideEffectDependency(HInstruction* node, 268 HInstruction* other) const { 269 if (HasMemoryDependency(node, other)) { 270 return true; 271 } 272 273 // Even if above memory dependency check has passed, it is still necessary to 274 // check dependencies between instructions that can throw and instructions 275 // that write to memory. 276 if (HasExceptionDependency(node, other)) { 277 return true; 278 } 279 280 return false; 281 } 282 283 // Check if the specified instruction is a better candidate which more likely will 284 // have other instructions depending on it. 285 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate, 286 HInstruction* old_candidate) { 287 if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) { 288 // Weaker side effects. 289 return false; 290 } 291 if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) { 292 // Same side effects, check if `new_candidate` has stronger `CanThrow()`. 293 return new_candidate->CanThrow() && !old_candidate->CanThrow(); 294 } else { 295 // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`. 296 return new_candidate->CanThrow() || !old_candidate->CanThrow(); 297 } 298 } 299 300 void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) { 301 SchedulingNode* instruction_node = GetNode(instruction); 302 303 // Define-use dependencies. 304 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { 305 AddDataDependency(GetNode(use.GetUser()), instruction_node); 306 } 307 308 // Scheduling barrier dependencies. 309 DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_); 310 if (contains_scheduling_barrier_) { 311 // A barrier depends on instructions after it. And instructions before the 312 // barrier depend on it. 313 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) { 314 SchedulingNode* other_node = GetNode(other); 315 CHECK(other_node != nullptr) 316 << other->DebugName() 317 << " is in block " << other->GetBlock()->GetBlockId() 318 << ", and expected in block " << instruction->GetBlock()->GetBlockId(); 319 bool other_is_barrier = other_node->IsSchedulingBarrier(); 320 if (is_scheduling_barrier || other_is_barrier) { 321 AddOtherDependency(other_node, instruction_node); 322 } 323 if (other_is_barrier) { 324 // This other scheduling barrier guarantees ordering of instructions after 325 // it, so avoid creating additional useless dependencies in the graph. 326 // For example if we have 327 // instr_1 328 // barrier_2 329 // instr_3 330 // barrier_4 331 // instr_5 332 // we only create the following non-data dependencies 333 // 1 -> 2 334 // 2 -> 3 335 // 2 -> 4 336 // 3 -> 4 337 // 4 -> 5 338 // and do not create 339 // 1 -> 4 340 // 2 -> 5 341 // Note that in this example we could also avoid creating the dependency 342 // `2 -> 4`. But if we remove `instr_3` that dependency is required to 343 // order the barriers. So we generate it to avoid a special case. 344 break; 345 } 346 } 347 } 348 349 // Side effect dependencies. 350 if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) { 351 HInstruction* dep_chain_candidate = nullptr; 352 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) { 353 SchedulingNode* other_node = GetNode(other); 354 if (other_node->IsSchedulingBarrier()) { 355 // We have reached a scheduling barrier so we can stop further 356 // processing. 357 DCHECK(HasImmediateOtherDependency(other_node, instruction_node)); 358 break; 359 } 360 if (HasSideEffectDependency(other, instruction)) { 361 if (dep_chain_candidate != nullptr && 362 HasSideEffectDependency(other, dep_chain_candidate)) { 363 // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency. 364 } else { 365 AddOtherDependency(other_node, instruction_node); 366 } 367 // Check if `other` is a better candidate which more likely will have other instructions 368 // depending on it. 369 if (dep_chain_candidate == nullptr || 370 IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) { 371 dep_chain_candidate = other; 372 } 373 } 374 } 375 } 376 377 // Environment dependencies. 378 // We do not need to process those if the instruction is a scheduling barrier, 379 // since the barrier already has non-data dependencies on all following 380 // instructions. 381 if (!is_scheduling_barrier) { 382 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { 383 // Note that here we could stop processing if the environment holder is 384 // across a scheduling barrier. But checking this would likely require 385 // more work than simply iterating through environment uses. 386 AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node); 387 } 388 } 389 } 390 391 bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node, 392 const SchedulingNode* other) const { 393 return ContainsElement(node->GetDataPredecessors(), other); 394 } 395 396 bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction, 397 const HInstruction* other_instruction) const { 398 const SchedulingNode* node = GetNode(instruction); 399 const SchedulingNode* other = GetNode(other_instruction); 400 if (node == nullptr || other == nullptr) { 401 // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their 402 // corresponding SchedulingNode in the graph, and tell whether there is a dependency. 403 // Otherwise there is no dependency from SchedulingGraph's perspective, for example, 404 // instruction and other_instruction are in different basic blocks. 405 return false; 406 } 407 return HasImmediateDataDependency(node, other); 408 } 409 410 bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node, 411 const SchedulingNode* other) const { 412 return ContainsElement(node->GetOtherPredecessors(), other); 413 } 414 415 bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction, 416 const HInstruction* other_instruction) const { 417 const SchedulingNode* node = GetNode(instruction); 418 const SchedulingNode* other = GetNode(other_instruction); 419 if (node == nullptr || other == nullptr) { 420 // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their 421 // corresponding SchedulingNode in the graph, and tell whether there is a dependency. 422 // Otherwise there is no dependency from SchedulingGraph's perspective, for example, 423 // instruction and other_instruction are in different basic blocks. 424 return false; 425 } 426 return HasImmediateOtherDependency(node, other); 427 } 428 429 static const std::string InstructionTypeId(const HInstruction* instruction) { 430 return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId()); 431 } 432 433 // Ideally we would reuse the graph visualizer code, but it is not available 434 // from here and it is not worth moving all that code only for our use. 435 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) { 436 const HInstruction* instruction = node->GetInstruction(); 437 // Use the instruction typed id as the node identifier. 438 std::string instruction_id = InstructionTypeId(instruction); 439 output << instruction_id << "[shape=record, label=\"" 440 << instruction_id << ' ' << instruction->DebugName() << " ["; 441 // List the instruction's inputs in its description. When visualizing the 442 // graph this helps differentiating data inputs from other dependencies. 443 const char* seperator = ""; 444 for (const HInstruction* input : instruction->GetInputs()) { 445 output << seperator << InstructionTypeId(input); 446 seperator = ","; 447 } 448 output << "]"; 449 // Other properties of the node. 450 output << "\\ninternal_latency: " << node->GetInternalLatency(); 451 output << "\\ncritical_path: " << node->GetCriticalPath(); 452 if (node->IsSchedulingBarrier()) { 453 output << "\\n(barrier)"; 454 } 455 output << "\"];\n"; 456 // We want program order to go from top to bottom in the graph output, so we 457 // reverse the edges and specify `dir=back`. 458 for (const SchedulingNode* predecessor : node->GetDataPredecessors()) { 459 const HInstruction* predecessor_instruction = predecessor->GetInstruction(); 460 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n " 461 << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n"; 462 } 463 for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) { 464 const HInstruction* predecessor_instruction = predecessor->GetInstruction(); 465 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n " 466 << "[dir=back,color=blue]\n"; 467 } 468 } 469 470 void SchedulingGraph::DumpAsDotGraph(const std::string& description, 471 const ScopedArenaVector<SchedulingNode*>& initial_candidates) { 472 // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that 473 // we should move this dotty graph dump feature to visualizer, and have a compiler option for it. 474 std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app); 475 // Description of this graph, as a comment. 476 output << "// " << description << "\n"; 477 // Start the dot graph. Use an increasing index for easier differentiation. 478 output << "digraph G {\n"; 479 for (const auto& entry : nodes_map_) { 480 SchedulingNode* node = entry.second.get(); 481 DumpAsDotNode(output, node); 482 } 483 // Create a fake 'end_of_scheduling' node to help visualization of critical_paths. 484 for (SchedulingNode* node : initial_candidates) { 485 const HInstruction* instruction = node->GetInstruction(); 486 output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n " 487 << "[label=\"" << node->GetLatency() << "\",dir=back]\n"; 488 } 489 // End of the dot graph. 490 output << "}\n"; 491 output.close(); 492 } 493 494 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition( 495 ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const { 496 // Schedule condition inputs that can be materialized immediately before their use. 497 // In following example, after we've scheduled HSelect, we want LessThan to be scheduled 498 // immediately, because it is a materialized condition, and will be emitted right before HSelect 499 // in codegen phase. 500 // 501 // i20 HLessThan [...] HLessThan HAdd HAdd 502 // i21 HAdd [...] ===> | | | 503 // i22 HAdd [...] +----------+---------+ 504 // i23 HSelect [i21, i22, i20] HSelect 505 506 if (prev_select_ == nullptr) { 507 return nullptr; 508 } 509 510 const HInstruction* instruction = prev_select_->GetInstruction(); 511 const HCondition* condition = nullptr; 512 DCHECK(instruction != nullptr); 513 514 if (instruction->IsIf()) { 515 condition = instruction->AsIf()->InputAt(0)->AsCondition(); 516 } else if (instruction->IsSelect()) { 517 condition = instruction->AsSelect()->GetCondition()->AsCondition(); 518 } 519 520 SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr; 521 522 if ((condition_node != nullptr) && 523 condition->HasOnlyOneNonEnvironmentUse() && 524 ContainsElement(*nodes, condition_node)) { 525 DCHECK(!condition_node->HasUnscheduledSuccessors()); 526 // Remove the condition from the list of candidates and schedule it. 527 RemoveElement(*nodes, condition_node); 528 return condition_node; 529 } 530 531 return nullptr; 532 } 533 534 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode( 535 ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) { 536 DCHECK(!nodes->empty()); 537 SchedulingNode* select_node = nullptr; 538 539 // Optimize for materialized condition and its emit before use scenario. 540 select_node = SelectMaterializedCondition(nodes, graph); 541 542 if (select_node == nullptr) { 543 // Get highest priority node based on critical path information. 544 select_node = (*nodes)[0]; 545 size_t select = 0; 546 for (size_t i = 1, e = nodes->size(); i < e; i++) { 547 SchedulingNode* check = (*nodes)[i]; 548 SchedulingNode* candidate = (*nodes)[select]; 549 select_node = GetHigherPrioritySchedulingNode(candidate, check); 550 if (select_node == check) { 551 select = i; 552 } 553 } 554 DeleteNodeAtIndex(nodes, select); 555 } 556 557 prev_select_ = select_node; 558 return select_node; 559 } 560 561 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode( 562 SchedulingNode* candidate, SchedulingNode* check) const { 563 uint32_t candidate_path = candidate->GetCriticalPath(); 564 uint32_t check_path = check->GetCriticalPath(); 565 // First look at the critical_path. 566 if (check_path != candidate_path) { 567 return check_path < candidate_path ? check : candidate; 568 } 569 // If both critical paths are equal, schedule instructions with a higher latency 570 // first in program order. 571 return check->GetLatency() < candidate->GetLatency() ? check : candidate; 572 } 573 574 void HScheduler::Schedule(HGraph* graph) { 575 // We run lsa here instead of in a separate pass to better control whether we 576 // should run the analysis or not. 577 const HeapLocationCollector* heap_location_collector = nullptr; 578 LoadStoreAnalysis lsa(graph); 579 if (!only_optimize_loop_blocks_ || graph->HasLoops()) { 580 lsa.Run(); 581 heap_location_collector = &lsa.GetHeapLocationCollector(); 582 } 583 584 for (HBasicBlock* block : graph->GetReversePostOrder()) { 585 if (IsSchedulable(block)) { 586 Schedule(block, heap_location_collector); 587 } 588 } 589 } 590 591 void HScheduler::Schedule(HBasicBlock* block, 592 const HeapLocationCollector* heap_location_collector) { 593 ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack()); 594 ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler)); 595 596 // Build the scheduling graph. 597 SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector); 598 for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 599 HInstruction* instruction = it.Current(); 600 CHECK_EQ(instruction->GetBlock(), block) 601 << instruction->DebugName() 602 << " is in block " << instruction->GetBlock()->GetBlockId() 603 << ", and expected in block " << block->GetBlockId(); 604 SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction)); 605 CalculateLatency(node); 606 scheduling_nodes.push_back(node); 607 } 608 609 if (scheduling_graph.Size() <= 1) { 610 return; 611 } 612 613 cursor_ = block->GetLastInstruction(); 614 615 // The list of candidates for scheduling. A node becomes a candidate when all 616 // its predecessors have been scheduled. 617 ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler)); 618 619 // Find the initial candidates for scheduling. 620 for (SchedulingNode* node : scheduling_nodes) { 621 if (!node->HasUnscheduledSuccessors()) { 622 node->MaybeUpdateCriticalPath(node->GetLatency()); 623 candidates.push_back(node); 624 } 625 } 626 627 ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler)); 628 if (kDumpDotSchedulingGraphs) { 629 // Remember the list of initial candidates for debug output purposes. 630 initial_candidates.assign(candidates.begin(), candidates.end()); 631 } 632 633 // Schedule all nodes. 634 selector_->Reset(); 635 while (!candidates.empty()) { 636 SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph); 637 Schedule(node, &candidates); 638 } 639 640 if (kDumpDotSchedulingGraphs) { 641 // Dump the graph in `dot` format. 642 HGraph* graph = block->GetGraph(); 643 std::stringstream description; 644 description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx()) 645 << " B" << block->GetBlockId(); 646 scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates); 647 } 648 } 649 650 void HScheduler::Schedule(SchedulingNode* scheduling_node, 651 /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) { 652 // Check whether any of the node's predecessors will be valid candidates after 653 // this node is scheduled. 654 uint32_t path_to_node = scheduling_node->GetCriticalPath(); 655 for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) { 656 predecessor->MaybeUpdateCriticalPath( 657 path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency()); 658 predecessor->DecrementNumberOfUnscheduledSuccessors(); 659 if (!predecessor->HasUnscheduledSuccessors()) { 660 candidates->push_back(predecessor); 661 } 662 } 663 for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) { 664 // Do not update the critical path. 665 // The 'other' (so 'non-data') dependencies (usually) do not represent a 666 // 'material' dependency of nodes on others. They exist for program 667 // correctness. So we do not use them to compute the critical path. 668 predecessor->DecrementNumberOfUnscheduledSuccessors(); 669 if (!predecessor->HasUnscheduledSuccessors()) { 670 candidates->push_back(predecessor); 671 } 672 } 673 674 Schedule(scheduling_node->GetInstruction()); 675 } 676 677 // Move an instruction after cursor instruction inside one basic block. 678 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) { 679 DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock()); 680 DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction()); 681 DCHECK(!instruction->IsControlFlow()); 682 DCHECK(!cursor->IsControlFlow()); 683 instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false); 684 } 685 686 void HScheduler::Schedule(HInstruction* instruction) { 687 if (instruction == cursor_) { 688 cursor_ = cursor_->GetPrevious(); 689 } else { 690 MoveAfterInBlock(instruction, cursor_); 691 } 692 } 693 694 bool HScheduler::IsSchedulable(const HInstruction* instruction) const { 695 // We want to avoid exhaustively listing all instructions, so we first check 696 // for instruction categories that we know are safe. 697 if (instruction->IsControlFlow() || 698 instruction->IsConstant()) { 699 return true; 700 } 701 // Currently all unary and binary operations are safe to schedule, so avoid 702 // checking for each of them individually. 703 // Since nothing prevents a new scheduling-unsafe HInstruction to subclass 704 // HUnaryOperation (or HBinaryOperation), check in debug mode that we have 705 // the exhaustive lists here. 706 if (instruction->IsUnaryOperation()) { 707 DCHECK(instruction->IsAbs() || 708 instruction->IsBooleanNot() || 709 instruction->IsNot() || 710 instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName(); 711 return true; 712 } 713 if (instruction->IsBinaryOperation()) { 714 DCHECK(instruction->IsAdd() || 715 instruction->IsAnd() || 716 instruction->IsCompare() || 717 instruction->IsCondition() || 718 instruction->IsDiv() || 719 instruction->IsMin() || 720 instruction->IsMax() || 721 instruction->IsMul() || 722 instruction->IsOr() || 723 instruction->IsRem() || 724 instruction->IsRor() || 725 instruction->IsShl() || 726 instruction->IsShr() || 727 instruction->IsSub() || 728 instruction->IsUShr() || 729 instruction->IsXor()) << "unexpected instruction " << instruction->DebugName(); 730 return true; 731 } 732 // The scheduler should not see any of these. 733 DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName(); 734 // List of instructions explicitly excluded: 735 // HClearException 736 // HClinitCheck 737 // HDeoptimize 738 // HLoadClass 739 // HLoadException 740 // HMemoryBarrier 741 // HMonitorOperation 742 // HNativeDebugInfo 743 // HThrow 744 // HTryBoundary 745 // TODO: Some of the instructions above may be safe to schedule (maybe as 746 // scheduling barriers). 747 return instruction->IsArrayGet() || 748 instruction->IsArraySet() || 749 instruction->IsArrayLength() || 750 instruction->IsBoundType() || 751 instruction->IsBoundsCheck() || 752 instruction->IsCheckCast() || 753 instruction->IsClassTableGet() || 754 instruction->IsCurrentMethod() || 755 instruction->IsDivZeroCheck() || 756 (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) || 757 (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) || 758 instruction->IsInstanceOf() || 759 instruction->IsInvokeInterface() || 760 instruction->IsInvokeStaticOrDirect() || 761 instruction->IsInvokeUnresolved() || 762 instruction->IsInvokeVirtual() || 763 instruction->IsLoadString() || 764 instruction->IsNewArray() || 765 instruction->IsNewInstance() || 766 instruction->IsNullCheck() || 767 instruction->IsPackedSwitch() || 768 instruction->IsParameterValue() || 769 instruction->IsPhi() || 770 instruction->IsReturn() || 771 instruction->IsReturnVoid() || 772 instruction->IsSelect() || 773 (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) || 774 (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) || 775 instruction->IsSuspendCheck() || 776 instruction->IsTypeConversion(); 777 } 778 779 bool HScheduler::IsSchedulable(const HBasicBlock* block) const { 780 // We may be only interested in loop blocks. 781 if (only_optimize_loop_blocks_ && !block->IsInLoop()) { 782 return false; 783 } 784 if (block->GetTryCatchInformation() != nullptr) { 785 // Do not schedule blocks that are part of try-catch. 786 // Because scheduler cannot see if catch block has assumptions on the instruction order in 787 // the try block. In following example, if we enable scheduler for the try block, 788 // MulitiplyAccumulate may be scheduled before DivZeroCheck, 789 // which can result in an incorrect value in the catch block. 790 // try { 791 // a = a/b; // DivZeroCheck 792 // // Div 793 // c = c*d+e; // MulitiplyAccumulate 794 // } catch {System.out.print(c); } 795 return false; 796 } 797 // Check whether all instructions in this block are schedulable. 798 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 799 if (!IsSchedulable(it.Current())) { 800 return false; 801 } 802 } 803 return true; 804 } 805 806 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const { 807 return instr->IsControlFlow() || 808 // Don't break calling convention. 809 instr->IsParameterValue() || 810 // Code generation of goto relies on SuspendCheck's position. 811 instr->IsSuspendCheck(); 812 } 813 814 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks, 815 bool schedule_randomly) { 816 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) 817 // Phase-local allocator that allocates scheduler internal data structures like 818 // scheduling nodes, internel nodes map, dependencies, etc. 819 CriticalPathSchedulingNodeSelector critical_path_selector; 820 RandomSchedulingNodeSelector random_selector; 821 SchedulingNodeSelector* selector = schedule_randomly 822 ? static_cast<SchedulingNodeSelector*>(&random_selector) 823 : static_cast<SchedulingNodeSelector*>(&critical_path_selector); 824 #else 825 // Avoid compilation error when compiling for unsupported instruction set. 826 UNUSED(only_optimize_loop_blocks); 827 UNUSED(schedule_randomly); 828 UNUSED(codegen_); 829 #endif 830 831 switch (instruction_set_) { 832 #ifdef ART_ENABLE_CODEGEN_arm64 833 case InstructionSet::kArm64: { 834 arm64::HSchedulerARM64 scheduler(selector); 835 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); 836 scheduler.Schedule(graph_); 837 break; 838 } 839 #endif 840 #if defined(ART_ENABLE_CODEGEN_arm) 841 case InstructionSet::kThumb2: 842 case InstructionSet::kArm: { 843 arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); 844 arm::HSchedulerARM scheduler(selector, &arm_latency_visitor); 845 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); 846 scheduler.Schedule(graph_); 847 break; 848 } 849 #endif 850 default: 851 break; 852 } 853 return true; 854 } 855 856 } // namespace art 857