1 // Copyright 2015 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/compiler/instruction-scheduler.h" 6 7 #include "src/base/adapters.h" 8 #include "src/base/utils/random-number-generator.h" 9 #include "src/isolate.h" 10 11 namespace v8 { 12 namespace internal { 13 namespace compiler { 14 15 void InstructionScheduler::SchedulingQueueBase::AddNode( 16 ScheduleGraphNode* node) { 17 // We keep the ready list sorted by total latency so that we can quickly find 18 // the next best candidate to schedule. 19 auto it = nodes_.begin(); 20 while ((it != nodes_.end()) && 21 ((*it)->total_latency() >= node->total_latency())) { 22 ++it; 23 } 24 nodes_.insert(it, node); 25 } 26 27 28 InstructionScheduler::ScheduleGraphNode* 29 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { 30 DCHECK(!IsEmpty()); 31 auto candidate = nodes_.end(); 32 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { 33 // We only consider instructions that have all their operands ready. 34 if (cycle >= (*iterator)->start_cycle()) { 35 candidate = iterator; 36 break; 37 } 38 } 39 40 if (candidate != nodes_.end()) { 41 ScheduleGraphNode *result = *candidate; 42 nodes_.erase(candidate); 43 return result; 44 } 45 46 return nullptr; 47 } 48 49 50 InstructionScheduler::ScheduleGraphNode* 51 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { 52 DCHECK(!IsEmpty()); 53 // Choose a random element from the ready list. 54 auto candidate = nodes_.begin(); 55 std::advance(candidate, isolate()->random_number_generator()->NextInt( 56 static_cast<int>(nodes_.size()))); 57 ScheduleGraphNode *result = *candidate; 58 nodes_.erase(candidate); 59 return result; 60 } 61 62 63 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode( 64 Zone* zone, 65 Instruction* instr) 66 : instr_(instr), 67 successors_(zone), 68 unscheduled_predecessors_count_(0), 69 latency_(GetInstructionLatency(instr)), 70 total_latency_(-1), 71 start_cycle_(-1) { 72 } 73 74 75 void InstructionScheduler::ScheduleGraphNode::AddSuccessor( 76 ScheduleGraphNode* node) { 77 successors_.push_back(node); 78 node->unscheduled_predecessors_count_++; 79 } 80 81 InstructionScheduler::InstructionScheduler(Zone* zone, 82 InstructionSequence* sequence) 83 : zone_(zone), 84 sequence_(sequence), 85 graph_(zone), 86 last_side_effect_instr_(nullptr), 87 pending_loads_(zone), 88 last_live_in_reg_marker_(nullptr), 89 last_deopt_or_trap_(nullptr), 90 operands_map_(zone) {} 91 92 void InstructionScheduler::StartBlock(RpoNumber rpo) { 93 DCHECK(graph_.empty()); 94 DCHECK_NULL(last_side_effect_instr_); 95 DCHECK(pending_loads_.empty()); 96 DCHECK_NULL(last_live_in_reg_marker_); 97 DCHECK_NULL(last_deopt_or_trap_); 98 DCHECK(operands_map_.empty()); 99 sequence()->StartBlock(rpo); 100 } 101 102 103 void InstructionScheduler::EndBlock(RpoNumber rpo) { 104 if (FLAG_turbo_stress_instruction_scheduling) { 105 ScheduleBlock<StressSchedulerQueue>(); 106 } else { 107 ScheduleBlock<CriticalPathFirstQueue>(); 108 } 109 sequence()->EndBlock(rpo); 110 graph_.clear(); 111 last_side_effect_instr_ = nullptr; 112 pending_loads_.clear(); 113 last_live_in_reg_marker_ = nullptr; 114 last_deopt_or_trap_ = nullptr; 115 operands_map_.clear(); 116 } 117 118 void InstructionScheduler::AddTerminator(Instruction* instr) { 119 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); 120 // Make sure that basic block terminators are not moved by adding them 121 // as successor of every instruction. 122 for (ScheduleGraphNode* node : graph_) { 123 node->AddSuccessor(new_node); 124 } 125 graph_.push_back(new_node); 126 } 127 128 void InstructionScheduler::AddInstruction(Instruction* instr) { 129 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); 130 131 // We should not have branches in the middle of a block. 132 DCHECK_NE(instr->flags_mode(), kFlags_branch); 133 DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison); 134 135 if (IsFixedRegisterParameter(instr)) { 136 if (last_live_in_reg_marker_ != nullptr) { 137 last_live_in_reg_marker_->AddSuccessor(new_node); 138 } 139 last_live_in_reg_marker_ = new_node; 140 } else { 141 if (last_live_in_reg_marker_ != nullptr) { 142 last_live_in_reg_marker_->AddSuccessor(new_node); 143 } 144 145 // Make sure that instructions are not scheduled before the last 146 // deoptimization or trap point when they depend on it. 147 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) { 148 last_deopt_or_trap_->AddSuccessor(new_node); 149 } 150 151 // Instructions with side effects and memory operations can't be 152 // reordered with respect to each other. 153 if (HasSideEffect(instr)) { 154 if (last_side_effect_instr_ != nullptr) { 155 last_side_effect_instr_->AddSuccessor(new_node); 156 } 157 for (ScheduleGraphNode* load : pending_loads_) { 158 load->AddSuccessor(new_node); 159 } 160 pending_loads_.clear(); 161 last_side_effect_instr_ = new_node; 162 } else if (IsLoadOperation(instr)) { 163 // Load operations can't be reordered with side effects instructions but 164 // independent loads can be reordered with respect to each other. 165 if (last_side_effect_instr_ != nullptr) { 166 last_side_effect_instr_->AddSuccessor(new_node); 167 } 168 pending_loads_.push_back(new_node); 169 } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) { 170 // Ensure that deopts or traps are not reordered with respect to 171 // side-effect instructions. 172 if (last_side_effect_instr_ != nullptr) { 173 last_side_effect_instr_->AddSuccessor(new_node); 174 } 175 last_deopt_or_trap_ = new_node; 176 } 177 178 // Look for operand dependencies. 179 for (size_t i = 0; i < instr->InputCount(); ++i) { 180 const InstructionOperand* input = instr->InputAt(i); 181 if (input->IsUnallocated()) { 182 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); 183 auto it = operands_map_.find(vreg); 184 if (it != operands_map_.end()) { 185 it->second->AddSuccessor(new_node); 186 } 187 } 188 } 189 190 // Record the virtual registers defined by this instruction. 191 for (size_t i = 0; i < instr->OutputCount(); ++i) { 192 const InstructionOperand* output = instr->OutputAt(i); 193 if (output->IsUnallocated()) { 194 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = 195 new_node; 196 } else if (output->IsConstant()) { 197 operands_map_[ConstantOperand::cast(output)->virtual_register()] = 198 new_node; 199 } 200 } 201 } 202 203 graph_.push_back(new_node); 204 } 205 206 207 template <typename QueueType> 208 void InstructionScheduler::ScheduleBlock() { 209 QueueType ready_list(this); 210 211 // Compute total latencies so that we can schedule the critical path first. 212 ComputeTotalLatencies(); 213 214 // Add nodes which don't have dependencies to the ready list. 215 for (ScheduleGraphNode* node : graph_) { 216 if (!node->HasUnscheduledPredecessor()) { 217 ready_list.AddNode(node); 218 } 219 } 220 221 // Go through the ready list and schedule the instructions. 222 int cycle = 0; 223 while (!ready_list.IsEmpty()) { 224 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); 225 226 if (candidate != nullptr) { 227 sequence()->AddInstruction(candidate->instruction()); 228 229 for (ScheduleGraphNode* successor : candidate->successors()) { 230 successor->DropUnscheduledPredecessor(); 231 successor->set_start_cycle( 232 std::max(successor->start_cycle(), 233 cycle + candidate->latency())); 234 235 if (!successor->HasUnscheduledPredecessor()) { 236 ready_list.AddNode(successor); 237 } 238 } 239 } 240 241 cycle++; 242 } 243 } 244 245 246 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { 247 switch (instr->arch_opcode()) { 248 case kArchNop: 249 case kArchFramePointer: 250 case kArchParentFramePointer: 251 case kArchStackSlot: // Despite its name this opcode will produce a 252 // reference to a frame slot, so it is not affected 253 // by the arm64 dual stack issues mentioned below. 254 case kArchComment: 255 case kArchDeoptimize: 256 case kArchJmp: 257 case kArchBinarySearchSwitch: 258 case kArchLookupSwitch: 259 case kArchRet: 260 case kArchTableSwitch: 261 case kArchThrowTerminator: 262 return kNoOpcodeFlags; 263 264 case kArchTruncateDoubleToI: 265 case kIeee754Float64Acos: 266 case kIeee754Float64Acosh: 267 case kIeee754Float64Asin: 268 case kIeee754Float64Asinh: 269 case kIeee754Float64Atan: 270 case kIeee754Float64Atanh: 271 case kIeee754Float64Atan2: 272 case kIeee754Float64Cbrt: 273 case kIeee754Float64Cos: 274 case kIeee754Float64Cosh: 275 case kIeee754Float64Exp: 276 case kIeee754Float64Expm1: 277 case kIeee754Float64Log: 278 case kIeee754Float64Log1p: 279 case kIeee754Float64Log10: 280 case kIeee754Float64Log2: 281 case kIeee754Float64Pow: 282 case kIeee754Float64Sin: 283 case kIeee754Float64Sinh: 284 case kIeee754Float64Tan: 285 case kIeee754Float64Tanh: 286 return kNoOpcodeFlags; 287 288 case kArchStackPointer: 289 // ArchStackPointer instruction loads the current stack pointer value and 290 // must not be reordered with instruction with side effects. 291 return kIsLoadOperation; 292 293 case kArchWordPoisonOnSpeculation: 294 // While poisoning operations have no side effect, they must not be 295 // reordered relative to branches. 296 return kHasSideEffect; 297 298 case kArchPrepareCallCFunction: 299 case kArchSaveCallerRegisters: 300 case kArchRestoreCallerRegisters: 301 case kArchPrepareTailCall: 302 case kArchCallCFunction: 303 case kArchCallCodeObject: 304 case kArchCallJSFunction: 305 case kArchCallWasmFunction: 306 case kArchTailCallCodeObjectFromJSFunction: 307 case kArchTailCallCodeObject: 308 case kArchTailCallAddress: 309 case kArchTailCallWasm: 310 case kArchDebugAbort: 311 case kArchDebugBreak: 312 return kHasSideEffect; 313 314 case kArchStoreWithWriteBarrier: 315 return kHasSideEffect; 316 317 case kWord32AtomicLoadInt8: 318 case kWord32AtomicLoadUint8: 319 case kWord32AtomicLoadInt16: 320 case kWord32AtomicLoadUint16: 321 case kWord32AtomicLoadWord32: 322 return kIsLoadOperation; 323 324 case kWord32AtomicStoreWord8: 325 case kWord32AtomicStoreWord16: 326 case kWord32AtomicStoreWord32: 327 return kHasSideEffect; 328 329 case kWord32AtomicExchangeInt8: 330 case kWord32AtomicExchangeUint8: 331 case kWord32AtomicExchangeInt16: 332 case kWord32AtomicExchangeUint16: 333 case kWord32AtomicExchangeWord32: 334 case kWord32AtomicCompareExchangeInt8: 335 case kWord32AtomicCompareExchangeUint8: 336 case kWord32AtomicCompareExchangeInt16: 337 case kWord32AtomicCompareExchangeUint16: 338 case kWord32AtomicCompareExchangeWord32: 339 case kWord32AtomicAddInt8: 340 case kWord32AtomicAddUint8: 341 case kWord32AtomicAddInt16: 342 case kWord32AtomicAddUint16: 343 case kWord32AtomicAddWord32: 344 case kWord32AtomicSubInt8: 345 case kWord32AtomicSubUint8: 346 case kWord32AtomicSubInt16: 347 case kWord32AtomicSubUint16: 348 case kWord32AtomicSubWord32: 349 case kWord32AtomicAndInt8: 350 case kWord32AtomicAndUint8: 351 case kWord32AtomicAndInt16: 352 case kWord32AtomicAndUint16: 353 case kWord32AtomicAndWord32: 354 case kWord32AtomicOrInt8: 355 case kWord32AtomicOrUint8: 356 case kWord32AtomicOrInt16: 357 case kWord32AtomicOrUint16: 358 case kWord32AtomicOrWord32: 359 case kWord32AtomicXorInt8: 360 case kWord32AtomicXorUint8: 361 case kWord32AtomicXorInt16: 362 case kWord32AtomicXorUint16: 363 case kWord32AtomicXorWord32: 364 return kHasSideEffect; 365 366 #define CASE(Name) case k##Name: 367 TARGET_ARCH_OPCODE_LIST(CASE) 368 #undef CASE 369 return GetTargetInstructionFlags(instr); 370 } 371 372 UNREACHABLE(); 373 } 374 375 void InstructionScheduler::ComputeTotalLatencies() { 376 for (ScheduleGraphNode* node : base::Reversed(graph_)) { 377 int max_latency = 0; 378 379 for (ScheduleGraphNode* successor : node->successors()) { 380 DCHECK_NE(-1, successor->total_latency()); 381 if (successor->total_latency() > max_latency) { 382 max_latency = successor->total_latency(); 383 } 384 } 385 386 node->set_total_latency(max_latency + node->latency()); 387 } 388 } 389 390 } // namespace compiler 391 } // namespace internal 392 } // namespace v8 393