Home | History | Annotate | Download | only in llvm-mca
      1 //===--------------------- Scheduler.h ------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 /// \file
     10 ///
     11 /// A scheduler for Processor Resource Units and Processor Resource Groups.
     12 ///
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
     16 #define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
     17 
     18 #include "HWEventListener.h"
     19 #include "HardwareUnit.h"
     20 #include "Instruction.h"
     21 #include "LSUnit.h"
     22 #include "RetireControlUnit.h"
     23 #include "llvm/ADT/ArrayRef.h"
     24 #include "llvm/ADT/DenseMap.h"
     25 #include "llvm/ADT/SmallVector.h"
     26 #include "llvm/MC/MCSubtargetInfo.h"
     27 #include <map>
     28 
     29 namespace mca {
     30 
     31 /// Used to notify the internal state of a processor resource.
     32 ///
     33 /// A processor resource is available if it is not reserved, and there are
     34 /// available slots in the buffer.  A processor resource is unavailable if it
     35 /// is either reserved, or the associated buffer is full. A processor resource
     36 /// with a buffer size of -1 is always available if it is not reserved.
     37 ///
     38 /// Values of type ResourceStateEvent are returned by method
     39 /// ResourceState::isBufferAvailable(), which is used to query the internal
     40 /// state of a resource.
     41 ///
     42 /// The naming convention for resource state events is:
     43 ///  * Event names start with prefix RS_
     44 ///  * Prefix RS_ is followed by a string describing the actual resource state.
     45 enum ResourceStateEvent {
     46   RS_BUFFER_AVAILABLE,
     47   RS_BUFFER_UNAVAILABLE,
     48   RS_RESERVED
     49 };
     50 
     51 /// A descriptor for processor resources.
     52 ///
     53 /// Each object of class ResourceState is associated to a specific processor
     54 /// resource. There is an instance of this class for every processor resource
     55 /// defined by the scheduling model.
     56 /// A ResourceState dynamically tracks the availability of units of a processor
     57 /// resource. For example, the ResourceState of a ProcResGroup tracks the
     58 /// availability of resource units which are part of the group.
     59 ///
     60 /// Internally, ResourceState uses a round-robin selector to identify
     61 /// which unit of the group shall be used next.
     62 class ResourceState {
     63   // Index to the MCProcResourceDesc in the processor Model.
     64   unsigned ProcResourceDescIndex;
     65   // A resource mask. This is generated by the tool with the help of
     66   // function `mca::createProcResourceMasks' (see Support.h).
     67   uint64_t ResourceMask;
     68 
     69   // A ProcResource can specify a number of units. For the purpose of dynamic
     70   // scheduling, a processor resource with more than one unit behaves like a
     71   // group. This field has one bit set for every unit/resource that is part of
     72   // the group.
     73   // For groups, this field defaults to 'ResourceMask'. For non-group
     74   // resources, the number of bits set in this mask is equivalent to the
     75   // number of units (i.e. field 'NumUnits' in 'ProcResourceUnits').
     76   uint64_t ResourceSizeMask;
     77 
     78   // A simple round-robin selector for processor resources.
     79   // Each bit of the mask identifies a sub resource within this group.
     80   //
     81   // As an example, lets assume that this ResourceState describes a
     82   // processor resource group composed of the following three units:
     83   //   ResourceA -- 0b001
     84   //   ResourceB -- 0b010
     85   //   ResourceC -- 0b100
     86   //
     87   // Each unit is identified by a ResourceMask which always contains a
     88   // single bit set. Field NextInSequenceMask is initially set to value
     89   // 0xb111. That value is obtained by OR'ing the resource masks of
     90   // processor resource that are part of the group.
     91   //
     92   //   NextInSequenceMask  -- 0b111
     93   //
     94   // Field NextInSequenceMask is used by the resource manager (i.e.
     95   // an object of class ResourceManager) to select the "next available resource"
     96   // from the set. The algorithm would prioritize resources with a bigger
     97   // ResourceMask value.
     98   //
     99   // In this example, there are three resources in the set, and 'ResourceC'
    100   // has the highest mask value. The round-robin selector would firstly select
    101   //  'ResourceC', then 'ResourceB', and eventually 'ResourceA'.
    102   //
    103   // When a resource R is used, its corresponding bit is cleared from the set.
    104   //
    105   // Back to the example:
    106   // If 'ResourceC' is selected, then the new value of NextInSequenceMask
    107   // becomes 0xb011.
    108   //
    109   // When NextInSequenceMask becomes zero, it is reset to its original value
    110   // (in this example, that value would be 0b111).
    111   uint64_t NextInSequenceMask;
    112 
    113   // Some instructions can only be issued on very specific pipeline resources.
    114   // For those instructions, we know exactly which resource would be consumed
    115   // without having to dynamically select it using field 'NextInSequenceMask'.
    116   //
    117   // The resource mask bit associated to the (statically) selected
    118   // processor resource is still cleared from the 'NextInSequenceMask'.
    119   // If that bit was already zero in NextInSequenceMask, then we update
    120   // mask 'RemovedFromNextInSequence'.
    121   //
    122   // When NextInSequenceMask is reset back to its initial value, the algorithm
    123   // removes any bits which are set in RemoveFromNextInSequence.
    124   uint64_t RemovedFromNextInSequence;
    125 
    126   // A mask of ready units.
    127   uint64_t ReadyMask;
    128 
    129   // Buffered resources will have this field set to a positive number bigger
    130   // than 0. A buffered resource behaves like a separate reservation station
    131   // implementing its own buffer for out-of-order execution.
    132   // A buffer of 1 is for units that force in-order execution.
    133   // A value of 0 is treated specially. In particular, a resource with
    134   // A BufferSize = 0 is for an in-order issue/dispatch resource.
    135   // That means, this resource is reserved starting from the dispatch event,
    136   // until all the "resource cycles" are consumed after the issue event.
    137   // While this resource is reserved, no other instruction may be dispatched.
    138   int BufferSize;
    139 
    140   // Available slots in the buffer (zero, if this is not a buffered resource).
    141   unsigned AvailableSlots;
    142 
    143   // True if this is resource is currently unavailable.
    144   // An instruction may "reserve" a resource for a number of cycles.
    145   // During those cycles, the reserved resource cannot be used for other
    146   // instructions, even if the ReadyMask is set.
    147   bool Unavailable;
    148 
    149   bool isSubResourceReady(uint64_t ID) const { return ReadyMask & ID; }
    150 
    151   /// Returns the mask identifier of the next available resource in the set.
    152   uint64_t getNextInSequence() const {
    153     assert(NextInSequenceMask);
    154     return llvm::PowerOf2Floor(NextInSequenceMask);
    155   }
    156 
    157   /// Returns the mask of the next available resource within the set,
    158   /// and updates the resource selector.
    159   void updateNextInSequence() {
    160     NextInSequenceMask ^= getNextInSequence();
    161     if (!NextInSequenceMask)
    162       NextInSequenceMask = ResourceSizeMask;
    163   }
    164 
    165   uint64_t computeResourceSizeMaskForGroup(uint64_t ResourceMask) {
    166     assert(llvm::countPopulation(ResourceMask) > 1);
    167     return ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
    168   }
    169 
    170 public:
    171   ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index,
    172                 uint64_t Mask)
    173       : ProcResourceDescIndex(Index), ResourceMask(Mask) {
    174     bool IsAGroup = llvm::countPopulation(ResourceMask) > 1;
    175     ResourceSizeMask = IsAGroup ? computeResourceSizeMaskForGroup(ResourceMask)
    176                                 : ((1ULL << Desc.NumUnits) - 1);
    177     NextInSequenceMask = ResourceSizeMask;
    178     RemovedFromNextInSequence = 0;
    179     ReadyMask = ResourceSizeMask;
    180     BufferSize = Desc.BufferSize;
    181     AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
    182     Unavailable = false;
    183   }
    184 
    185   unsigned getProcResourceID() const { return ProcResourceDescIndex; }
    186   uint64_t getResourceMask() const { return ResourceMask; }
    187   int getBufferSize() const { return BufferSize; }
    188 
    189   bool isBuffered() const { return BufferSize > 0; }
    190   bool isInOrder() const { return BufferSize == 1; }
    191   bool isADispatchHazard() const { return BufferSize == 0; }
    192   bool isReserved() const { return Unavailable; }
    193 
    194   void setReserved() { Unavailable = true; }
    195   void clearReserved() { Unavailable = false; }
    196 
    197   // A resource is ready if it is not reserved, and if there are enough
    198   // available units.
    199   // If a resource is also a dispatch hazard, then we don't check if
    200   // it is reserved because that check would always return true.
    201   // A resource marked as "dispatch hazard" is always reserved at
    202   // dispatch time. When this method is called, the assumption is that
    203   // the user of this resource has been already dispatched.
    204   bool isReady(unsigned NumUnits = 1) const {
    205     return (!isReserved() || isADispatchHazard()) &&
    206            llvm::countPopulation(ReadyMask) >= NumUnits;
    207   }
    208   bool isAResourceGroup() const {
    209     return llvm::countPopulation(ResourceMask) > 1;
    210   }
    211 
    212   bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
    213 
    214   void markSubResourceAsUsed(uint64_t ID) {
    215     assert(isSubResourceReady(ID));
    216     ReadyMask ^= ID;
    217   }
    218 
    219   void releaseSubResource(uint64_t ID) {
    220     assert(!isSubResourceReady(ID));
    221     ReadyMask ^= ID;
    222   }
    223 
    224   unsigned getNumUnits() const {
    225     return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
    226   }
    227 
    228   uint64_t selectNextInSequence();
    229   void removeFromNextInSequence(uint64_t ID);
    230 
    231   ResourceStateEvent isBufferAvailable() const {
    232     if (isADispatchHazard() && isReserved())
    233       return RS_RESERVED;
    234     if (!isBuffered() || AvailableSlots)
    235       return RS_BUFFER_AVAILABLE;
    236     return RS_BUFFER_UNAVAILABLE;
    237   }
    238 
    239   void reserveBuffer() {
    240     if (AvailableSlots)
    241       AvailableSlots--;
    242   }
    243 
    244   void releaseBuffer() {
    245     if (BufferSize > 0)
    246       AvailableSlots++;
    247     assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
    248   }
    249 
    250 #ifndef NDEBUG
    251   void dump() const;
    252 #endif
    253 };
    254 
    255 /// A resource unit identifier.
    256 ///
    257 /// This is used to identify a specific processor resource unit using a pair
    258 /// of indices where the 'first' index is a processor resource mask, and the
    259 /// 'second' index is an index for a "sub-resource" (i.e. unit).
    260 typedef std::pair<uint64_t, uint64_t> ResourceRef;
    261 
    262 // First: a MCProcResourceDesc index identifying a buffered resource.
    263 // Second: max number of buffer entries used in this resource.
    264 typedef std::pair<unsigned, unsigned> BufferUsageEntry;
    265 
    266 /// A resource manager for processor resource units and groups.
    267 ///
    268 /// This class owns all the ResourceState objects, and it is responsible for
    269 /// acting on requests from a Scheduler by updating the internal state of
    270 /// ResourceState objects.
    271 /// This class doesn't know about instruction itineraries and functional units.
    272 /// In future, it can be extended to support itineraries too through the same
    273 /// public interface.
    274 class ResourceManager {
    275   // The resource manager owns all the ResourceState.
    276   using UniqueResourceState = std::unique_ptr<ResourceState>;
    277   llvm::SmallDenseMap<uint64_t, UniqueResourceState> Resources;
    278 
    279   // Keeps track of which resources are busy, and how many cycles are left
    280   // before those become usable again.
    281   llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
    282 
    283   // A table to map processor resource IDs to processor resource masks.
    284   llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
    285 
    286   // Adds a new resource state in Resources, as well as a new descriptor in
    287   // ResourceDescriptor.
    288   void addResource(const llvm::MCProcResourceDesc &Desc, unsigned Index,
    289                    uint64_t Mask);
    290 
    291   // Populate resource descriptors.
    292   void initialize(const llvm::MCSchedModel &SM);
    293 
    294   // Returns the actual resource unit that will be used.
    295   ResourceRef selectPipe(uint64_t ResourceID);
    296 
    297   void use(ResourceRef RR);
    298   void release(ResourceRef RR);
    299 
    300   unsigned getNumUnits(uint64_t ResourceID) const {
    301     assert(Resources.find(ResourceID) != Resources.end());
    302     return Resources.find(ResourceID)->getSecond()->getNumUnits();
    303   }
    304 
    305   // Reserve a specific Resource kind.
    306   void reserveBuffer(uint64_t ResourceID) {
    307     assert(isBufferAvailable(ResourceID) ==
    308            ResourceStateEvent::RS_BUFFER_AVAILABLE);
    309     ResourceState &Resource = *Resources[ResourceID];
    310     Resource.reserveBuffer();
    311   }
    312 
    313   void releaseBuffer(uint64_t ResourceID) {
    314     Resources[ResourceID]->releaseBuffer();
    315   }
    316 
    317   ResourceStateEvent isBufferAvailable(uint64_t ResourceID) const {
    318     const ResourceState &Resource = *Resources.find(ResourceID)->second;
    319     return Resource.isBufferAvailable();
    320   }
    321 
    322   bool isReady(uint64_t ResourceID, unsigned NumUnits) const {
    323     const ResourceState &Resource = *Resources.find(ResourceID)->second;
    324     return Resource.isReady(NumUnits);
    325   }
    326 
    327 public:
    328   ResourceManager(const llvm::MCSchedModel &SM)
    329       : ProcResID2Mask(SM.getNumProcResourceKinds()) {
    330     initialize(SM);
    331   }
    332 
    333   // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
    334   // there are enough available slots in the buffers.
    335   ResourceStateEvent canBeDispatched(llvm::ArrayRef<uint64_t> Buffers) const;
    336 
    337   // Return the processor resource identifier associated to this Mask.
    338   unsigned resolveResourceMask(uint64_t Mask) const {
    339     return Resources.find(Mask)->second->getProcResourceID();
    340   }
    341 
    342   // Consume a slot in every buffered resource from array 'Buffers'. Resource
    343   // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
    344   void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers);
    345 
    346   // Release buffer entries previously allocated by method reserveBuffers.
    347   void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers);
    348 
    349   void reserveResource(uint64_t ResourceID) {
    350     ResourceState &Resource = *Resources[ResourceID];
    351     assert(!Resource.isReserved());
    352     Resource.setReserved();
    353   }
    354 
    355   void releaseResource(uint64_t ResourceID) {
    356     ResourceState &Resource = *Resources[ResourceID];
    357     Resource.clearReserved();
    358   }
    359 
    360   // Returns true if all resources are in-order, and there is at least one
    361   // resource which is a dispatch hazard (BufferSize = 0).
    362   bool mustIssueImmediately(const InstrDesc &Desc);
    363 
    364   bool canBeIssued(const InstrDesc &Desc) const;
    365 
    366   void issueInstruction(
    367       const InstrDesc &Desc,
    368       llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
    369 
    370   void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
    371 
    372 #ifndef NDEBUG
    373   void dump() const {
    374     for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources)
    375       Resource.second->dump();
    376   }
    377 #endif
    378 }; // namespace mca
    379 
    380 /// Class Scheduler is responsible for issuing instructions to pipeline
    381 /// resources. Internally, it delegates to a ResourceManager the management of
    382 /// processor resources.
    383 /// This class is also responsible for tracking the progress of instructions
    384 /// from the dispatch stage, until the write-back stage.
    385 ///
    386 /// An nstruction dispatched to the Scheduler is initially placed into either
    387 /// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the
    388 /// input operands. Instructions in the WaitQueue are ordered by instruction
    389 /// index. An instruction is moved from the WaitQueue to the ReadyQueue when
    390 /// register operands become available, and all memory dependencies are met.
    391 /// Instructions that are moved from the WaitQueue to the ReadyQueue transition
    392 /// from state 'IS_AVAILABLE' to state 'IS_READY'.
    393 ///
    394 /// At the beginning of each cycle, the Scheduler checks if there are
    395 /// instructions in the WaitQueue that can be moved to the ReadyQueue.  If the
    396 /// ReadyQueue is not empty, then older instructions from the queue are issued
    397 /// to the processor pipelines, and the underlying ResourceManager is updated
    398 /// accordingly.  The ReadyQueue is ordered by instruction index to guarantee
    399 /// that the first instructions in the set are also the oldest.
    400 ///
    401 /// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is
    402 /// issued to a (one or more) pipeline(s). This event also causes an instruction
    403 /// state transition (i.e. from state IS_READY, to state IS_EXECUTING).
    404 /// An Instruction leaves the IssuedQueue when it reaches the write-back stage.
    405 class Scheduler : public HardwareUnit {
    406   const llvm::MCSchedModel &SM;
    407 
    408   // Hardware resources that are managed by this scheduler.
    409   std::unique_ptr<ResourceManager> Resources;
    410   std::unique_ptr<LSUnit> LSU;
    411 
    412   using QueueEntryTy = std::pair<unsigned, Instruction *>;
    413   std::map<unsigned, Instruction *> WaitQueue;
    414   std::map<unsigned, Instruction *> ReadyQueue;
    415   std::map<unsigned, Instruction *> IssuedQueue;
    416 
    417   /// Issue an instruction without updating the ready queue.
    418   void issueInstructionImpl(
    419       InstRef &IR,
    420       llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
    421 
    422 public:
    423   Scheduler(const llvm::MCSchedModel &Model, unsigned LoadQueueSize,
    424             unsigned StoreQueueSize, bool AssumeNoAlias)
    425       : SM(Model), Resources(llvm::make_unique<ResourceManager>(SM)),
    426         LSU(llvm::make_unique<LSUnit>(LoadQueueSize, StoreQueueSize,
    427                                       AssumeNoAlias)) {}
    428 
    429   /// Check if the instruction in 'IR' can be dispatched.
    430   ///
    431   /// The DispatchStage is responsible for querying the Scheduler before
    432   /// dispatching new instructions. This routine is used for performing such
    433   /// a query.  If the instruction 'IR' can be dispatched, then true is
    434   /// returned, otherwise false is returned with Event set to the stall type.
    435   bool canBeDispatched(const InstRef &IR,
    436                        HWStallEvent::GenericEventType &Event) const;
    437 
    438   /// Returns true if there is availibility for IR in the LSU.
    439   bool isReady(const InstRef &IR) const { return LSU->isReady(IR); }
    440 
    441   /// Issue an instruction.  The Used container is populated with
    442   /// the resource objects consumed on behalf of issuing this instruction.
    443   void
    444   issueInstruction(InstRef &IR,
    445                    llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used);
    446 
    447   /// This routine will attempt to issue an instruction immediately (for
    448   /// zero-latency instructions).
    449   ///
    450   /// Returns true if the instruction is issued immediately.  If this does not
    451   /// occur, then the instruction will be added to the Scheduler's ReadyQueue.
    452   bool issueImmediately(InstRef &IR);
    453 
    454   /// Reserve one entry in each buffered resource.
    455   void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers) {
    456     Resources->reserveBuffers(Buffers);
    457   }
    458 
    459   /// Release buffer entries previously allocated by method reserveBuffers.
    460   void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers) {
    461     Resources->releaseBuffers(Buffers);
    462   }
    463 
    464   /// Update the resources managed by the scheduler.
    465   /// This routine is to be called at the start of a new cycle, and is
    466   /// responsible for updating scheduler resources.  Resources are released
    467   /// once they have been fully consumed.
    468   void reclaimSimulatedResources(llvm::SmallVectorImpl<ResourceRef> &Freed);
    469 
    470   /// Move instructions from the WaitQueue to the ReadyQueue if input operands
    471   /// are all available.
    472   void promoteToReadyQueue(llvm::SmallVectorImpl<InstRef> &Ready);
    473 
    474   /// Update the ready queue.
    475   void updatePendingQueue(llvm::SmallVectorImpl<InstRef> &Ready);
    476 
    477   /// Update the issued queue.
    478   void updateIssuedQueue(llvm::SmallVectorImpl<InstRef> &Executed);
    479 
    480   /// Updates the Scheduler's resources to reflect that an instruction has just
    481   /// been executed.
    482   void onInstructionExecuted(const InstRef &IR);
    483 
    484   /// Obtain the processor's resource identifier for the given
    485   /// resource mask.
    486   unsigned getResourceID(uint64_t Mask) {
    487     return Resources->resolveResourceMask(Mask);
    488   }
    489 
    490   /// Reserve resources necessary to issue the instruction.
    491   /// Returns true if the resources are ready and the (LSU) can
    492   /// execute the given instruction immediately.
    493   bool reserveResources(InstRef &IR);
    494 
    495   /// Select the next instruction to issue from the ReadyQueue.
    496   /// This method gives priority to older instructions.
    497   InstRef select();
    498 
    499 #ifndef NDEBUG
    500   // Update the ready queues.
    501   void dump() const;
    502 
    503   // This routine performs a sanity check.  This routine should only be called
    504   // when we know that 'IR' is not in the scheduler's instruction queues.
    505   void sanityCheck(const InstRef &IR) const {
    506     const unsigned Idx = IR.getSourceIndex();
    507     assert(WaitQueue.find(Idx) == WaitQueue.end());
    508     assert(ReadyQueue.find(Idx) == ReadyQueue.end());
    509     assert(IssuedQueue.find(Idx) == IssuedQueue.end());
    510   }
    511 #endif // !NDEBUG
    512 };
    513 } // namespace mca
    514 
    515 #endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
    516