Home | History | Annotate | Download | only in CodeGen
      1 //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interface for the MachineTraceMetrics analysis pass
     11 // that estimates CPU resource usage and critical data dependency paths through
     12 // preferred traces. This is useful for super-scalar CPUs where execution speed
     13 // can be limited both by data dependencies and by limited execution resources.
     14 //
     15 // Out-of-order CPUs will often be executing instructions from multiple basic
     16 // blocks at the same time. This makes it difficult to estimate the resource
     17 // usage accurately in a single basic block. Resources can be estimated better
     18 // by looking at a trace through the current basic block.
     19 //
     20 // For every block, the MachineTraceMetrics pass will pick a preferred trace
     21 // that passes through the block. The trace is chosen based on loop structure,
     22 // branch probabilities, and resource usage. The intention is to pick likely
     23 // traces that would be the most affected by code transformations.
     24 //
     25 // It is expensive to compute a full arbitrary trace for every block, so to
     26 // save some computations, traces are chosen to be convergent. This means that
     27 // if the traces through basic blocks A and B ever cross when moving away from
     28 // A and B, they never diverge again. This applies in both directions - If the
     29 // traces meet above A and B, they won't diverge when going further back.
     30 //
     31 // Traces tend to align with loops. The trace through a block in an inner loop
     32 // will begin at the loop entry block and end at a back edge. If there are
     33 // nested loops, the trace may begin and end at those instead.
     34 //
     35 // For each trace, we compute the critical path length, which is the number of
     36 // cycles required to execute the trace when execution is limited by data
     37 // dependencies only. We also compute the resource height, which is the number
     38 // of cycles required to execute all instructions in the trace when ignoring
     39 // data dependencies.
     40 //
     41 // Every instruction in the current block has a slack - the number of cycles
     42 // execution of the instruction can be delayed without extending the critical
     43 // path.
     44 //
     45 //===----------------------------------------------------------------------===//
     46 
     47 #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
     48 #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
     49 
     50 #include "llvm/ADT/ArrayRef.h"
     51 #include "llvm/ADT/DenseMap.h"
     52 #include "llvm/ADT/None.h"
     53 #include "llvm/ADT/SmallVector.h"
     54 #include "llvm/CodeGen/MachineFunctionPass.h"
     55 #include "llvm/CodeGen/TargetSchedule.h"
     56 
     57 namespace llvm {
     58 
     59 class AnalysisUsage;
     60 class MachineBasicBlock;
     61 class MachineFunction;
     62 class MachineInstr;
     63 class MachineLoop;
     64 class MachineLoopInfo;
     65 class MachineRegisterInfo;
     66 struct MCSchedClassDesc;
     67 class raw_ostream;
     68 class TargetInstrInfo;
     69 class TargetRegisterInfo;
     70 
     71 class MachineTraceMetrics : public MachineFunctionPass {
     72   const MachineFunction *MF = nullptr;
     73   const TargetInstrInfo *TII = nullptr;
     74   const TargetRegisterInfo *TRI = nullptr;
     75   const MachineRegisterInfo *MRI = nullptr;
     76   const MachineLoopInfo *Loops = nullptr;
     77   TargetSchedModel SchedModel;
     78 
     79 public:
     80   friend class Ensemble;
     81   friend class Trace;
     82 
     83   class Ensemble;
     84 
     85   static char ID;
     86 
     87   MachineTraceMetrics();
     88 
     89   void getAnalysisUsage(AnalysisUsage&) const override;
     90   bool runOnMachineFunction(MachineFunction&) override;
     91   void releaseMemory() override;
     92   void verifyAnalysis() const override;
     93 
     94   /// Per-basic block information that doesn't depend on the trace through the
     95   /// block.
     96   struct FixedBlockInfo {
     97     /// The number of non-trivial instructions in the block.
     98     /// Doesn't count PHI and COPY instructions that are likely to be removed.
     99     unsigned InstrCount = ~0u;
    100 
    101     /// True when the block contains calls.
    102     bool HasCalls = false;
    103 
    104     FixedBlockInfo() = default;
    105 
    106     /// Returns true when resource information for this block has been computed.
    107     bool hasResources() const { return InstrCount != ~0u; }
    108 
    109     /// Invalidate resource information.
    110     void invalidate() { InstrCount = ~0u; }
    111   };
    112 
    113   /// Get the fixed resource information about MBB. Compute it on demand.
    114   const FixedBlockInfo *getResources(const MachineBasicBlock*);
    115 
    116   /// Get the scaled number of cycles used per processor resource in MBB.
    117   /// This is an array with SchedModel.getNumProcResourceKinds() entries.
    118   /// The getResources() function above must have been called first.
    119   ///
    120   /// These numbers have already been scaled by SchedModel.getResourceFactor().
    121   ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
    122 
    123   /// A virtual register or regunit required by a basic block or its trace
    124   /// successors.
    125   struct LiveInReg {
    126     /// The virtual register required, or a register unit.
    127     unsigned Reg;
    128 
    129     /// For virtual registers: Minimum height of the defining instruction.
    130     /// For regunits: Height of the highest user in the trace.
    131     unsigned Height;
    132 
    133     LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
    134   };
    135 
    136   /// Per-basic block information that relates to a specific trace through the
    137   /// block. Convergent traces means that only one of these is required per
    138   /// block in a trace ensemble.
    139   struct TraceBlockInfo {
    140     /// Trace predecessor, or NULL for the first block in the trace.
    141     /// Valid when hasValidDepth().
    142     const MachineBasicBlock *Pred = nullptr;
    143 
    144     /// Trace successor, or NULL for the last block in the trace.
    145     /// Valid when hasValidHeight().
    146     const MachineBasicBlock *Succ = nullptr;
    147 
    148     /// The block number of the head of the trace. (When hasValidDepth()).
    149     unsigned Head;
    150 
    151     /// The block number of the tail of the trace. (When hasValidHeight()).
    152     unsigned Tail;
    153 
    154     /// Accumulated number of instructions in the trace above this block.
    155     /// Does not include instructions in this block.
    156     unsigned InstrDepth = ~0u;
    157 
    158     /// Accumulated number of instructions in the trace below this block.
    159     /// Includes instructions in this block.
    160     unsigned InstrHeight = ~0u;
    161 
    162     TraceBlockInfo() = default;
    163 
    164     /// Returns true if the depth resources have been computed from the trace
    165     /// above this block.
    166     bool hasValidDepth() const { return InstrDepth != ~0u; }
    167 
    168     /// Returns true if the height resources have been computed from the trace
    169     /// below this block.
    170     bool hasValidHeight() const { return InstrHeight != ~0u; }
    171 
    172     /// Invalidate depth resources when some block above this one has changed.
    173     void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
    174 
    175     /// Invalidate height resources when a block below this one has changed.
    176     void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
    177 
    178     /// Assuming that this is a dominator of TBI, determine if it contains
    179     /// useful instruction depths. A dominating block can be above the current
    180     /// trace head, and any dependencies from such a far away dominator are not
    181     /// expected to affect the critical path.
    182     ///
    183     /// Also returns true when TBI == this.
    184     bool isUsefulDominator(const TraceBlockInfo &TBI) const {
    185       // The trace for TBI may not even be calculated yet.
    186       if (!hasValidDepth() || !TBI.hasValidDepth())
    187         return false;
    188       // Instruction depths are only comparable if the traces share a head.
    189       if (Head != TBI.Head)
    190         return false;
    191       // It is almost always the case that TBI belongs to the same trace as
    192       // this block, but rare convoluted cases involving irreducible control
    193       // flow, a dominator may share a trace head without actually being on the
    194       // same trace as TBI. This is not a big problem as long as it doesn't
    195       // increase the instruction depth.
    196       return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
    197     }
    198 
    199     // Data-dependency-related information. Per-instruction depth and height
    200     // are computed from data dependencies in the current trace, using
    201     // itinerary data.
    202 
    203     /// Instruction depths have been computed. This implies hasValidDepth().
    204     bool HasValidInstrDepths = false;
    205 
    206     /// Instruction heights have been computed. This implies hasValidHeight().
    207     bool HasValidInstrHeights = false;
    208 
    209     /// Critical path length. This is the number of cycles in the longest data
    210     /// dependency chain through the trace. This is only valid when both
    211     /// HasValidInstrDepths and HasValidInstrHeights are set.
    212     unsigned CriticalPath;
    213 
    214     /// Live-in registers. These registers are defined above the current block
    215     /// and used by this block or a block below it.
    216     /// This does not include PHI uses in the current block, but it does
    217     /// include PHI uses in deeper blocks.
    218     SmallVector<LiveInReg, 4> LiveIns;
    219 
    220     void print(raw_ostream&) const;
    221   };
    222 
    223   /// InstrCycles represents the cycle height and depth of an instruction in a
    224   /// trace.
    225   struct InstrCycles {
    226     /// Earliest issue cycle as determined by data dependencies and instruction
    227     /// latencies from the beginning of the trace. Data dependencies from
    228     /// before the trace are not included.
    229     unsigned Depth;
    230 
    231     /// Minimum number of cycles from this instruction is issued to the of the
    232     /// trace, as determined by data dependencies and instruction latencies.
    233     unsigned Height;
    234   };
    235 
    236   /// A trace represents a plausible sequence of executed basic blocks that
    237   /// passes through the current basic block one. The Trace class serves as a
    238   /// handle to internal cached data structures.
    239   class Trace {
    240     Ensemble &TE;
    241     TraceBlockInfo &TBI;
    242 
    243     unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
    244 
    245   public:
    246     explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
    247 
    248     void print(raw_ostream&) const;
    249 
    250     /// Compute the total number of instructions in the trace.
    251     unsigned getInstrCount() const {
    252       return TBI.InstrDepth + TBI.InstrHeight;
    253     }
    254 
    255     /// Return the resource depth of the top/bottom of the trace center block.
    256     /// This is the number of cycles required to execute all instructions from
    257     /// the trace head to the trace center block. The resource depth only
    258     /// considers execution resources, it ignores data dependencies.
    259     /// When Bottom is set, instructions in the trace center block are included.
    260     unsigned getResourceDepth(bool Bottom) const;
    261 
    262     /// Return the resource length of the trace. This is the number of cycles
    263     /// required to execute the instructions in the trace if they were all
    264     /// independent, exposing the maximum instruction-level parallelism.
    265     ///
    266     /// Any blocks in Extrablocks are included as if they were part of the
    267     /// trace. Likewise, extra resources required by the specified scheduling
    268     /// classes are included. For the caller to account for extra machine
    269     /// instructions, it must first resolve each instruction's scheduling class.
    270     unsigned getResourceLength(
    271         ArrayRef<const MachineBasicBlock *> Extrablocks = None,
    272         ArrayRef<const MCSchedClassDesc *> ExtraInstrs = None,
    273         ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
    274 
    275     /// Return the length of the (data dependency) critical path through the
    276     /// trace.
    277     unsigned getCriticalPath() const { return TBI.CriticalPath; }
    278 
    279     /// Return the depth and height of MI. The depth is only valid for
    280     /// instructions in or above the trace center block. The height is only
    281     /// valid for instructions in or below the trace center block.
    282     InstrCycles getInstrCycles(const MachineInstr &MI) const {
    283       return TE.Cycles.lookup(&MI);
    284     }
    285 
    286     /// Return the slack of MI. This is the number of cycles MI can be delayed
    287     /// before the critical path becomes longer.
    288     /// MI must be an instruction in the trace center block.
    289     unsigned getInstrSlack(const MachineInstr &MI) const;
    290 
    291     /// Return the Depth of a PHI instruction in a trace center block successor.
    292     /// The PHI does not have to be part of the trace.
    293     unsigned getPHIDepth(const MachineInstr &PHI) const;
    294 
    295     /// A dependence is useful if the basic block of the defining instruction
    296     /// is part of the trace of the user instruction. It is assumed that DefMI
    297     /// dominates UseMI (see also isUsefulDominator).
    298     bool isDepInTrace(const MachineInstr &DefMI,
    299                       const MachineInstr &UseMI) const;
    300   };
    301 
    302   /// A trace ensemble is a collection of traces selected using the same
    303   /// strategy, for example 'minimum resource height'. There is one trace for
    304   /// every block in the function.
    305   class Ensemble {
    306     friend class Trace;
    307 
    308     SmallVector<TraceBlockInfo, 4> BlockInfo;
    309     DenseMap<const MachineInstr*, InstrCycles> Cycles;
    310     SmallVector<unsigned, 0> ProcResourceDepths;
    311     SmallVector<unsigned, 0> ProcResourceHeights;
    312 
    313     void computeTrace(const MachineBasicBlock*);
    314     void computeDepthResources(const MachineBasicBlock*);
    315     void computeHeightResources(const MachineBasicBlock*);
    316     unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
    317     void computeInstrDepths(const MachineBasicBlock*);
    318     void computeInstrHeights(const MachineBasicBlock*);
    319     void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
    320                     ArrayRef<const MachineBasicBlock*> Trace);
    321 
    322   protected:
    323     MachineTraceMetrics &MTM;
    324 
    325     explicit Ensemble(MachineTraceMetrics*);
    326 
    327     virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
    328     virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
    329     const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
    330     const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
    331     const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
    332     ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
    333     ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
    334 
    335   public:
    336     virtual ~Ensemble();
    337 
    338     virtual const char *getName() const = 0;
    339     void print(raw_ostream&) const;
    340     void invalidate(const MachineBasicBlock *MBB);
    341     void verify() const;
    342 
    343     /// Get the trace that passes through MBB.
    344     /// The trace is computed on demand.
    345     Trace getTrace(const MachineBasicBlock *MBB);
    346   };
    347 
    348   /// Strategies for selecting traces.
    349   enum Strategy {
    350     /// Select the trace through a block that has the fewest instructions.
    351     TS_MinInstrCount,
    352 
    353     TS_NumStrategies
    354   };
    355 
    356   /// Get the trace ensemble representing the given trace selection strategy.
    357   /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
    358   /// and valid for the lifetime of the analysis pass.
    359   Ensemble *getEnsemble(Strategy);
    360 
    361   /// Invalidate cached information about MBB. This must be called *before* MBB
    362   /// is erased, or the CFG is otherwise changed.
    363   ///
    364   /// This invalidates per-block information about resource usage for MBB only,
    365   /// and it invalidates per-trace information for any trace that passes
    366   /// through MBB.
    367   ///
    368   /// Call Ensemble::getTrace() again to update any trace handles.
    369   void invalidate(const MachineBasicBlock *MBB);
    370 
    371 private:
    372   // One entry per basic block, indexed by block number.
    373   SmallVector<FixedBlockInfo, 4> BlockInfo;
    374 
    375   // Cycles consumed on each processor resource per block.
    376   // The number of processor resource kinds is constant for a given subtarget,
    377   // but it is not known at compile time. The number of cycles consumed by
    378   // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
    379   // where Kinds = SchedModel.getNumProcResourceKinds().
    380   SmallVector<unsigned, 0> ProcResourceCycles;
    381 
    382   // One ensemble per strategy.
    383   Ensemble* Ensembles[TS_NumStrategies];
    384 
    385   // Convert scaled resource usage to a cycle count that can be compared with
    386   // latencies.
    387   unsigned getCycles(unsigned Scaled) {
    388     unsigned Factor = SchedModel.getLatencyFactor();
    389     return (Scaled + Factor - 1) / Factor;
    390   }
    391 };
    392 
    393 inline raw_ostream &operator<<(raw_ostream &OS,
    394                                const MachineTraceMetrics::Trace &Tr) {
    395   Tr.print(OS);
    396   return OS;
    397 }
    398 
    399 inline raw_ostream &operator<<(raw_ostream &OS,
    400                                const MachineTraceMetrics::Ensemble &En) {
    401   En.print(OS);
    402   return OS;
    403 }
    404 
    405 } // end namespace llvm
    406 
    407 #endif // LLVM_CODEGEN_MACHINETRACEMETRICS_H
    408