Home | History | Annotate | Download | only in compiler
      1 // Copyright 2015 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/compiler/instruction-scheduler.h"
      6 
      7 #include "src/base/adapters.h"
      8 #include "src/base/utils/random-number-generator.h"
      9 #include "src/isolate.h"
     10 
     11 namespace v8 {
     12 namespace internal {
     13 namespace compiler {
     14 
     15 void InstructionScheduler::SchedulingQueueBase::AddNode(
     16     ScheduleGraphNode* node) {
     17   // We keep the ready list sorted by total latency so that we can quickly find
     18   // the next best candidate to schedule.
     19   auto it = nodes_.begin();
     20   while ((it != nodes_.end()) &&
     21          ((*it)->total_latency() >= node->total_latency())) {
     22     ++it;
     23   }
     24   nodes_.insert(it, node);
     25 }
     26 
     27 
     28 InstructionScheduler::ScheduleGraphNode*
     29 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
     30   DCHECK(!IsEmpty());
     31   auto candidate = nodes_.end();
     32   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
     33     // We only consider instructions that have all their operands ready.
     34     if (cycle >= (*iterator)->start_cycle()) {
     35       candidate = iterator;
     36       break;
     37     }
     38   }
     39 
     40   if (candidate != nodes_.end()) {
     41     ScheduleGraphNode *result = *candidate;
     42     nodes_.erase(candidate);
     43     return result;
     44   }
     45 
     46   return nullptr;
     47 }
     48 
     49 
     50 InstructionScheduler::ScheduleGraphNode*
     51 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
     52   DCHECK(!IsEmpty());
     53   // Choose a random element from the ready list.
     54   auto candidate = nodes_.begin();
     55   std::advance(candidate, isolate()->random_number_generator()->NextInt(
     56       static_cast<int>(nodes_.size())));
     57   ScheduleGraphNode *result = *candidate;
     58   nodes_.erase(candidate);
     59   return result;
     60 }
     61 
     62 
     63 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
     64     Zone* zone,
     65     Instruction* instr)
     66     : instr_(instr),
     67       successors_(zone),
     68       unscheduled_predecessors_count_(0),
     69       latency_(GetInstructionLatency(instr)),
     70       total_latency_(-1),
     71       start_cycle_(-1) {
     72 }
     73 
     74 
     75 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
     76     ScheduleGraphNode* node) {
     77   successors_.push_back(node);
     78   node->unscheduled_predecessors_count_++;
     79 }
     80 
     81 InstructionScheduler::InstructionScheduler(Zone* zone,
     82                                            InstructionSequence* sequence)
     83     : zone_(zone),
     84       sequence_(sequence),
     85       graph_(zone),
     86       last_side_effect_instr_(nullptr),
     87       pending_loads_(zone),
     88       last_live_in_reg_marker_(nullptr),
     89       last_deopt_or_trap_(nullptr),
     90       operands_map_(zone) {}
     91 
     92 void InstructionScheduler::StartBlock(RpoNumber rpo) {
     93   DCHECK(graph_.empty());
     94   DCHECK_NULL(last_side_effect_instr_);
     95   DCHECK(pending_loads_.empty());
     96   DCHECK_NULL(last_live_in_reg_marker_);
     97   DCHECK_NULL(last_deopt_or_trap_);
     98   DCHECK(operands_map_.empty());
     99   sequence()->StartBlock(rpo);
    100 }
    101 
    102 
    103 void InstructionScheduler::EndBlock(RpoNumber rpo) {
    104   if (FLAG_turbo_stress_instruction_scheduling) {
    105     ScheduleBlock<StressSchedulerQueue>();
    106   } else {
    107     ScheduleBlock<CriticalPathFirstQueue>();
    108   }
    109   sequence()->EndBlock(rpo);
    110   graph_.clear();
    111   last_side_effect_instr_ = nullptr;
    112   pending_loads_.clear();
    113   last_live_in_reg_marker_ = nullptr;
    114   last_deopt_or_trap_ = nullptr;
    115   operands_map_.clear();
    116 }
    117 
    118 void InstructionScheduler::AddTerminator(Instruction* instr) {
    119   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
    120   // Make sure that basic block terminators are not moved by adding them
    121   // as successor of every instruction.
    122   for (ScheduleGraphNode* node : graph_) {
    123     node->AddSuccessor(new_node);
    124   }
    125   graph_.push_back(new_node);
    126 }
    127 
    128 void InstructionScheduler::AddInstruction(Instruction* instr) {
    129   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
    130 
    131   // We should not have branches in the middle of a block.
    132   DCHECK_NE(instr->flags_mode(), kFlags_branch);
    133   DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
    134 
    135   if (IsFixedRegisterParameter(instr)) {
    136     if (last_live_in_reg_marker_ != nullptr) {
    137       last_live_in_reg_marker_->AddSuccessor(new_node);
    138     }
    139     last_live_in_reg_marker_ = new_node;
    140   } else {
    141     if (last_live_in_reg_marker_ != nullptr) {
    142       last_live_in_reg_marker_->AddSuccessor(new_node);
    143     }
    144 
    145     // Make sure that instructions are not scheduled before the last
    146     // deoptimization or trap point when they depend on it.
    147     if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
    148       last_deopt_or_trap_->AddSuccessor(new_node);
    149     }
    150 
    151     // Instructions with side effects and memory operations can't be
    152     // reordered with respect to each other.
    153     if (HasSideEffect(instr)) {
    154       if (last_side_effect_instr_ != nullptr) {
    155         last_side_effect_instr_->AddSuccessor(new_node);
    156       }
    157       for (ScheduleGraphNode* load : pending_loads_) {
    158         load->AddSuccessor(new_node);
    159       }
    160       pending_loads_.clear();
    161       last_side_effect_instr_ = new_node;
    162     } else if (IsLoadOperation(instr)) {
    163       // Load operations can't be reordered with side effects instructions but
    164       // independent loads can be reordered with respect to each other.
    165       if (last_side_effect_instr_ != nullptr) {
    166         last_side_effect_instr_->AddSuccessor(new_node);
    167       }
    168       pending_loads_.push_back(new_node);
    169     } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
    170       // Ensure that deopts or traps are not reordered with respect to
    171       // side-effect instructions.
    172       if (last_side_effect_instr_ != nullptr) {
    173         last_side_effect_instr_->AddSuccessor(new_node);
    174       }
    175       last_deopt_or_trap_ = new_node;
    176     }
    177 
    178     // Look for operand dependencies.
    179     for (size_t i = 0; i < instr->InputCount(); ++i) {
    180       const InstructionOperand* input = instr->InputAt(i);
    181       if (input->IsUnallocated()) {
    182         int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
    183         auto it = operands_map_.find(vreg);
    184         if (it != operands_map_.end()) {
    185           it->second->AddSuccessor(new_node);
    186         }
    187       }
    188     }
    189 
    190     // Record the virtual registers defined by this instruction.
    191     for (size_t i = 0; i < instr->OutputCount(); ++i) {
    192       const InstructionOperand* output = instr->OutputAt(i);
    193       if (output->IsUnallocated()) {
    194         operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
    195             new_node;
    196       } else if (output->IsConstant()) {
    197         operands_map_[ConstantOperand::cast(output)->virtual_register()] =
    198             new_node;
    199       }
    200     }
    201   }
    202 
    203   graph_.push_back(new_node);
    204 }
    205 
    206 
    207 template <typename QueueType>
    208 void InstructionScheduler::ScheduleBlock() {
    209   QueueType ready_list(this);
    210 
    211   // Compute total latencies so that we can schedule the critical path first.
    212   ComputeTotalLatencies();
    213 
    214   // Add nodes which don't have dependencies to the ready list.
    215   for (ScheduleGraphNode* node : graph_) {
    216     if (!node->HasUnscheduledPredecessor()) {
    217       ready_list.AddNode(node);
    218     }
    219   }
    220 
    221   // Go through the ready list and schedule the instructions.
    222   int cycle = 0;
    223   while (!ready_list.IsEmpty()) {
    224     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
    225 
    226     if (candidate != nullptr) {
    227       sequence()->AddInstruction(candidate->instruction());
    228 
    229       for (ScheduleGraphNode* successor : candidate->successors()) {
    230         successor->DropUnscheduledPredecessor();
    231         successor->set_start_cycle(
    232             std::max(successor->start_cycle(),
    233                      cycle + candidate->latency()));
    234 
    235         if (!successor->HasUnscheduledPredecessor()) {
    236           ready_list.AddNode(successor);
    237         }
    238       }
    239     }
    240 
    241     cycle++;
    242   }
    243 }
    244 
    245 
    246 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
    247   switch (instr->arch_opcode()) {
    248     case kArchNop:
    249     case kArchFramePointer:
    250     case kArchParentFramePointer:
    251     case kArchStackSlot:  // Despite its name this opcode will produce a
    252                           // reference to a frame slot, so it is not affected
    253                           // by the arm64 dual stack issues mentioned below.
    254     case kArchComment:
    255     case kArchDeoptimize:
    256     case kArchJmp:
    257     case kArchBinarySearchSwitch:
    258     case kArchLookupSwitch:
    259     case kArchRet:
    260     case kArchTableSwitch:
    261     case kArchThrowTerminator:
    262       return kNoOpcodeFlags;
    263 
    264     case kArchTruncateDoubleToI:
    265     case kIeee754Float64Acos:
    266     case kIeee754Float64Acosh:
    267     case kIeee754Float64Asin:
    268     case kIeee754Float64Asinh:
    269     case kIeee754Float64Atan:
    270     case kIeee754Float64Atanh:
    271     case kIeee754Float64Atan2:
    272     case kIeee754Float64Cbrt:
    273     case kIeee754Float64Cos:
    274     case kIeee754Float64Cosh:
    275     case kIeee754Float64Exp:
    276     case kIeee754Float64Expm1:
    277     case kIeee754Float64Log:
    278     case kIeee754Float64Log1p:
    279     case kIeee754Float64Log10:
    280     case kIeee754Float64Log2:
    281     case kIeee754Float64Pow:
    282     case kIeee754Float64Sin:
    283     case kIeee754Float64Sinh:
    284     case kIeee754Float64Tan:
    285     case kIeee754Float64Tanh:
    286       return kNoOpcodeFlags;
    287 
    288     case kArchStackPointer:
    289       // ArchStackPointer instruction loads the current stack pointer value and
    290       // must not be reordered with instruction with side effects.
    291       return kIsLoadOperation;
    292 
    293     case kArchWordPoisonOnSpeculation:
    294       // While poisoning operations have no side effect, they must not be
    295       // reordered relative to branches.
    296       return kHasSideEffect;
    297 
    298     case kArchPrepareCallCFunction:
    299     case kArchSaveCallerRegisters:
    300     case kArchRestoreCallerRegisters:
    301     case kArchPrepareTailCall:
    302     case kArchCallCFunction:
    303     case kArchCallCodeObject:
    304     case kArchCallJSFunction:
    305     case kArchCallWasmFunction:
    306     case kArchTailCallCodeObjectFromJSFunction:
    307     case kArchTailCallCodeObject:
    308     case kArchTailCallAddress:
    309     case kArchTailCallWasm:
    310     case kArchDebugAbort:
    311     case kArchDebugBreak:
    312       return kHasSideEffect;
    313 
    314     case kArchStoreWithWriteBarrier:
    315       return kHasSideEffect;
    316 
    317     case kWord32AtomicLoadInt8:
    318     case kWord32AtomicLoadUint8:
    319     case kWord32AtomicLoadInt16:
    320     case kWord32AtomicLoadUint16:
    321     case kWord32AtomicLoadWord32:
    322       return kIsLoadOperation;
    323 
    324     case kWord32AtomicStoreWord8:
    325     case kWord32AtomicStoreWord16:
    326     case kWord32AtomicStoreWord32:
    327       return kHasSideEffect;
    328 
    329     case kWord32AtomicExchangeInt8:
    330     case kWord32AtomicExchangeUint8:
    331     case kWord32AtomicExchangeInt16:
    332     case kWord32AtomicExchangeUint16:
    333     case kWord32AtomicExchangeWord32:
    334     case kWord32AtomicCompareExchangeInt8:
    335     case kWord32AtomicCompareExchangeUint8:
    336     case kWord32AtomicCompareExchangeInt16:
    337     case kWord32AtomicCompareExchangeUint16:
    338     case kWord32AtomicCompareExchangeWord32:
    339     case kWord32AtomicAddInt8:
    340     case kWord32AtomicAddUint8:
    341     case kWord32AtomicAddInt16:
    342     case kWord32AtomicAddUint16:
    343     case kWord32AtomicAddWord32:
    344     case kWord32AtomicSubInt8:
    345     case kWord32AtomicSubUint8:
    346     case kWord32AtomicSubInt16:
    347     case kWord32AtomicSubUint16:
    348     case kWord32AtomicSubWord32:
    349     case kWord32AtomicAndInt8:
    350     case kWord32AtomicAndUint8:
    351     case kWord32AtomicAndInt16:
    352     case kWord32AtomicAndUint16:
    353     case kWord32AtomicAndWord32:
    354     case kWord32AtomicOrInt8:
    355     case kWord32AtomicOrUint8:
    356     case kWord32AtomicOrInt16:
    357     case kWord32AtomicOrUint16:
    358     case kWord32AtomicOrWord32:
    359     case kWord32AtomicXorInt8:
    360     case kWord32AtomicXorUint8:
    361     case kWord32AtomicXorInt16:
    362     case kWord32AtomicXorUint16:
    363     case kWord32AtomicXorWord32:
    364       return kHasSideEffect;
    365 
    366 #define CASE(Name) case k##Name:
    367     TARGET_ARCH_OPCODE_LIST(CASE)
    368 #undef CASE
    369       return GetTargetInstructionFlags(instr);
    370   }
    371 
    372   UNREACHABLE();
    373 }
    374 
    375 void InstructionScheduler::ComputeTotalLatencies() {
    376   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
    377     int max_latency = 0;
    378 
    379     for (ScheduleGraphNode* successor : node->successors()) {
    380       DCHECK_NE(-1, successor->total_latency());
    381       if (successor->total_latency() > max_latency) {
    382         max_latency = successor->total_latency();
    383       }
    384     }
    385 
    386     node->set_total_latency(max_latency + node->latency());
    387   }
    388 }
    389 
    390 }  // namespace compiler
    391 }  // namespace internal
    392 }  // namespace v8
    393