Home | History | Annotate | Download | only in compiler
      1 // Copyright 2015 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/compiler/instruction-scheduler.h"
      6 
      7 #include "src/base/adapters.h"
      8 #include "src/base/utils/random-number-generator.h"
      9 
     10 namespace v8 {
     11 namespace internal {
     12 namespace compiler {
     13 
     14 void InstructionScheduler::SchedulingQueueBase::AddNode(
     15     ScheduleGraphNode* node) {
     16   // We keep the ready list sorted by total latency so that we can quickly find
     17   // the next best candidate to schedule.
     18   auto it = nodes_.begin();
     19   while ((it != nodes_.end()) &&
     20          ((*it)->total_latency() >= node->total_latency())) {
     21     ++it;
     22   }
     23   nodes_.insert(it, node);
     24 }
     25 
     26 
     27 InstructionScheduler::ScheduleGraphNode*
     28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
     29   DCHECK(!IsEmpty());
     30   auto candidate = nodes_.end();
     31   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
     32     // We only consider instructions that have all their operands ready.
     33     if (cycle >= (*iterator)->start_cycle()) {
     34       candidate = iterator;
     35       break;
     36     }
     37   }
     38 
     39   if (candidate != nodes_.end()) {
     40     ScheduleGraphNode *result = *candidate;
     41     nodes_.erase(candidate);
     42     return result;
     43   }
     44 
     45   return nullptr;
     46 }
     47 
     48 
     49 InstructionScheduler::ScheduleGraphNode*
     50 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
     51   DCHECK(!IsEmpty());
     52   // Choose a random element from the ready list.
     53   auto candidate = nodes_.begin();
     54   std::advance(candidate, isolate()->random_number_generator()->NextInt(
     55       static_cast<int>(nodes_.size())));
     56   ScheduleGraphNode *result = *candidate;
     57   nodes_.erase(candidate);
     58   return result;
     59 }
     60 
     61 
     62 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
     63     Zone* zone,
     64     Instruction* instr)
     65     : instr_(instr),
     66       successors_(zone),
     67       unscheduled_predecessors_count_(0),
     68       latency_(GetInstructionLatency(instr)),
     69       total_latency_(-1),
     70       start_cycle_(-1) {
     71 }
     72 
     73 
     74 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
     75     ScheduleGraphNode* node) {
     76   successors_.push_back(node);
     77   node->unscheduled_predecessors_count_++;
     78 }
     79 
     80 
     81 InstructionScheduler::InstructionScheduler(Zone* zone,
     82                                            InstructionSequence* sequence)
     83     : zone_(zone),
     84       sequence_(sequence),
     85       graph_(zone),
     86       last_side_effect_instr_(nullptr),
     87       pending_loads_(zone),
     88       last_live_in_reg_marker_(nullptr),
     89       last_deopt_(nullptr),
     90       operands_map_(zone) {}
     91 
     92 
     93 void InstructionScheduler::StartBlock(RpoNumber rpo) {
     94   DCHECK(graph_.empty());
     95   DCHECK(last_side_effect_instr_ == nullptr);
     96   DCHECK(pending_loads_.empty());
     97   DCHECK(last_live_in_reg_marker_ == nullptr);
     98   DCHECK(last_deopt_ == nullptr);
     99   DCHECK(operands_map_.empty());
    100   sequence()->StartBlock(rpo);
    101 }
    102 
    103 
    104 void InstructionScheduler::EndBlock(RpoNumber rpo) {
    105   if (FLAG_turbo_stress_instruction_scheduling) {
    106     ScheduleBlock<StressSchedulerQueue>();
    107   } else {
    108     ScheduleBlock<CriticalPathFirstQueue>();
    109   }
    110   sequence()->EndBlock(rpo);
    111   graph_.clear();
    112   last_side_effect_instr_ = nullptr;
    113   pending_loads_.clear();
    114   last_live_in_reg_marker_ = nullptr;
    115   last_deopt_ = nullptr;
    116   operands_map_.clear();
    117 }
    118 
    119 
    120 void InstructionScheduler::AddInstruction(Instruction* instr) {
    121   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
    122 
    123   if (IsBlockTerminator(instr)) {
    124     // Make sure that basic block terminators are not moved by adding them
    125     // as successor of every instruction.
    126     for (ScheduleGraphNode* node : graph_) {
    127       node->AddSuccessor(new_node);
    128     }
    129   } else if (IsFixedRegisterParameter(instr)) {
    130     if (last_live_in_reg_marker_ != nullptr) {
    131       last_live_in_reg_marker_->AddSuccessor(new_node);
    132     }
    133     last_live_in_reg_marker_ = new_node;
    134   } else {
    135     if (last_live_in_reg_marker_ != nullptr) {
    136       last_live_in_reg_marker_->AddSuccessor(new_node);
    137     }
    138 
    139     // Make sure that instructions are not scheduled before the last
    140     // deoptimization point when they depend on it.
    141     if ((last_deopt_ != nullptr) && DependsOnDeoptimization(instr)) {
    142       last_deopt_->AddSuccessor(new_node);
    143     }
    144 
    145     // Instructions with side effects and memory operations can't be
    146     // reordered with respect to each other.
    147     if (HasSideEffect(instr)) {
    148       if (last_side_effect_instr_ != nullptr) {
    149         last_side_effect_instr_->AddSuccessor(new_node);
    150       }
    151       for (ScheduleGraphNode* load : pending_loads_) {
    152         load->AddSuccessor(new_node);
    153       }
    154       pending_loads_.clear();
    155       last_side_effect_instr_ = new_node;
    156     } else if (IsLoadOperation(instr)) {
    157       // Load operations can't be reordered with side effects instructions but
    158       // independent loads can be reordered with respect to each other.
    159       if (last_side_effect_instr_ != nullptr) {
    160         last_side_effect_instr_->AddSuccessor(new_node);
    161       }
    162       pending_loads_.push_back(new_node);
    163     } else if (instr->IsDeoptimizeCall()) {
    164       // Ensure that deopts are not reordered with respect to side-effect
    165       // instructions.
    166       if (last_side_effect_instr_ != nullptr) {
    167         last_side_effect_instr_->AddSuccessor(new_node);
    168       }
    169       last_deopt_ = new_node;
    170     }
    171 
    172     // Look for operand dependencies.
    173     for (size_t i = 0; i < instr->InputCount(); ++i) {
    174       const InstructionOperand* input = instr->InputAt(i);
    175       if (input->IsUnallocated()) {
    176         int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
    177         auto it = operands_map_.find(vreg);
    178         if (it != operands_map_.end()) {
    179           it->second->AddSuccessor(new_node);
    180         }
    181       }
    182     }
    183 
    184     // Record the virtual registers defined by this instruction.
    185     for (size_t i = 0; i < instr->OutputCount(); ++i) {
    186       const InstructionOperand* output = instr->OutputAt(i);
    187       if (output->IsUnallocated()) {
    188         operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
    189             new_node;
    190       } else if (output->IsConstant()) {
    191         operands_map_[ConstantOperand::cast(output)->virtual_register()] =
    192             new_node;
    193       }
    194     }
    195   }
    196 
    197   graph_.push_back(new_node);
    198 }
    199 
    200 
    201 template <typename QueueType>
    202 void InstructionScheduler::ScheduleBlock() {
    203   QueueType ready_list(this);
    204 
    205   // Compute total latencies so that we can schedule the critical path first.
    206   ComputeTotalLatencies();
    207 
    208   // Add nodes which don't have dependencies to the ready list.
    209   for (ScheduleGraphNode* node : graph_) {
    210     if (!node->HasUnscheduledPredecessor()) {
    211       ready_list.AddNode(node);
    212     }
    213   }
    214 
    215   // Go through the ready list and schedule the instructions.
    216   int cycle = 0;
    217   while (!ready_list.IsEmpty()) {
    218     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
    219 
    220     if (candidate != nullptr) {
    221       sequence()->AddInstruction(candidate->instruction());
    222 
    223       for (ScheduleGraphNode* successor : candidate->successors()) {
    224         successor->DropUnscheduledPredecessor();
    225         successor->set_start_cycle(
    226             std::max(successor->start_cycle(),
    227                      cycle + candidate->latency()));
    228 
    229         if (!successor->HasUnscheduledPredecessor()) {
    230           ready_list.AddNode(successor);
    231         }
    232       }
    233     }
    234 
    235     cycle++;
    236   }
    237 }
    238 
    239 
    240 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
    241   switch (instr->arch_opcode()) {
    242     case kArchNop:
    243     case kArchFramePointer:
    244     case kArchParentFramePointer:
    245     case kArchTruncateDoubleToI:
    246     case kArchStackSlot:
    247     case kArchDebugBreak:
    248     case kArchComment:
    249     case kIeee754Float64Acos:
    250     case kIeee754Float64Acosh:
    251     case kIeee754Float64Asin:
    252     case kIeee754Float64Asinh:
    253     case kIeee754Float64Atan:
    254     case kIeee754Float64Atanh:
    255     case kIeee754Float64Atan2:
    256     case kIeee754Float64Cbrt:
    257     case kIeee754Float64Cos:
    258     case kIeee754Float64Cosh:
    259     case kIeee754Float64Exp:
    260     case kIeee754Float64Expm1:
    261     case kIeee754Float64Log:
    262     case kIeee754Float64Log1p:
    263     case kIeee754Float64Log10:
    264     case kIeee754Float64Log2:
    265     case kIeee754Float64Pow:
    266     case kIeee754Float64Sin:
    267     case kIeee754Float64Sinh:
    268     case kIeee754Float64Tan:
    269     case kIeee754Float64Tanh:
    270       return kNoOpcodeFlags;
    271 
    272     case kArchStackPointer:
    273       // ArchStackPointer instruction loads the current stack pointer value and
    274       // must not be reordered with instruction with side effects.
    275       return kIsLoadOperation;
    276 
    277     case kArchPrepareCallCFunction:
    278     case kArchPrepareTailCall:
    279     case kArchCallCFunction:
    280     case kArchCallCodeObject:
    281     case kArchCallJSFunction:
    282       return kHasSideEffect;
    283 
    284     case kArchTailCallCodeObjectFromJSFunction:
    285     case kArchTailCallCodeObject:
    286     case kArchTailCallJSFunctionFromJSFunction:
    287     case kArchTailCallAddress:
    288       return kHasSideEffect | kIsBlockTerminator;
    289 
    290     case kArchDeoptimize:
    291     case kArchJmp:
    292     case kArchLookupSwitch:
    293     case kArchTableSwitch:
    294     case kArchRet:
    295     case kArchThrowTerminator:
    296       return kIsBlockTerminator;
    297 
    298     case kCheckedLoadInt8:
    299     case kCheckedLoadUint8:
    300     case kCheckedLoadInt16:
    301     case kCheckedLoadUint16:
    302     case kCheckedLoadWord32:
    303     case kCheckedLoadWord64:
    304     case kCheckedLoadFloat32:
    305     case kCheckedLoadFloat64:
    306       return kIsLoadOperation;
    307 
    308     case kCheckedStoreWord8:
    309     case kCheckedStoreWord16:
    310     case kCheckedStoreWord32:
    311     case kCheckedStoreWord64:
    312     case kCheckedStoreFloat32:
    313     case kCheckedStoreFloat64:
    314     case kArchStoreWithWriteBarrier:
    315       return kHasSideEffect;
    316 
    317     case kAtomicLoadInt8:
    318     case kAtomicLoadUint8:
    319     case kAtomicLoadInt16:
    320     case kAtomicLoadUint16:
    321     case kAtomicLoadWord32:
    322       return kIsLoadOperation;
    323 
    324     case kAtomicStoreWord8:
    325     case kAtomicStoreWord16:
    326     case kAtomicStoreWord32:
    327       return kHasSideEffect;
    328 
    329 #define CASE(Name) case k##Name:
    330     TARGET_ARCH_OPCODE_LIST(CASE)
    331 #undef CASE
    332       return GetTargetInstructionFlags(instr);
    333   }
    334 
    335   UNREACHABLE();
    336   return kNoOpcodeFlags;
    337 }
    338 
    339 
    340 bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
    341   return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
    342           (instr->flags_mode() == kFlags_branch));
    343 }
    344 
    345 
    346 void InstructionScheduler::ComputeTotalLatencies() {
    347   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
    348     int max_latency = 0;
    349 
    350     for (ScheduleGraphNode* successor : node->successors()) {
    351       DCHECK(successor->total_latency() != -1);
    352       if (successor->total_latency() > max_latency) {
    353         max_latency = successor->total_latency();
    354       }
    355     }
    356 
    357     node->set_total_latency(max_latency + node->latency());
    358   }
    359 }
    360 
    361 }  // namespace compiler
    362 }  // namespace internal
    363 }  // namespace v8
    364