Home | History | Annotate | Download | only in AMDGPU
      1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// Interface definition for SIInstrInfo.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
     16 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
     17 
     18 #include "AMDGPUInstrInfo.h"
     19 #include "SIDefines.h"
     20 #include "SIRegisterInfo.h"
     21 #include "Utils/AMDGPUBaseInfo.h"
     22 #include "llvm/ADT/ArrayRef.h"
     23 #include "llvm/ADT/SetVector.h"
     24 #include "llvm/CodeGen/MachineBasicBlock.h"
     25 #include "llvm/CodeGen/MachineFunction.h"
     26 #include "llvm/CodeGen/MachineInstr.h"
     27 #include "llvm/CodeGen/MachineInstrBuilder.h"
     28 #include "llvm/CodeGen/MachineOperand.h"
     29 #include "llvm/MC/MCInstrDesc.h"
     30 #include "llvm/Support/Compiler.h"
     31 #include <cassert>
     32 #include <cstdint>
     33 
     34 #define GET_INSTRINFO_HEADER
     35 #include "AMDGPUGenInstrInfo.inc"
     36 
     37 namespace llvm {
     38 
     39 class APInt;
     40 class MachineRegisterInfo;
     41 class RegScavenger;
     42 class GCNSubtarget;
     43 class TargetRegisterClass;
     44 
     45 class SIInstrInfo final : public AMDGPUGenInstrInfo {
     46 private:
     47   const SIRegisterInfo RI;
     48   const GCNSubtarget &ST;
     49 
     50   // The inverse predicate should have the negative value.
     51   enum BranchPredicate {
     52     INVALID_BR = 0,
     53     SCC_TRUE = 1,
     54     SCC_FALSE = -1,
     55     VCCNZ = 2,
     56     VCCZ = -2,
     57     EXECNZ = -3,
     58     EXECZ = 3
     59   };
     60 
     61   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
     62 
     63   static unsigned getBranchOpcode(BranchPredicate Cond);
     64   static BranchPredicate getBranchPredicate(unsigned Opcode);
     65 
     66 public:
     67   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
     68                               MachineRegisterInfo &MRI,
     69                               MachineOperand &SuperReg,
     70                               const TargetRegisterClass *SuperRC,
     71                               unsigned SubIdx,
     72                               const TargetRegisterClass *SubRC) const;
     73   MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
     74                                          MachineRegisterInfo &MRI,
     75                                          MachineOperand &SuperReg,
     76                                          const TargetRegisterClass *SuperRC,
     77                                          unsigned SubIdx,
     78                                          const TargetRegisterClass *SubRC) const;
     79 private:
     80   void swapOperands(MachineInstr &Inst) const;
     81 
     82   bool moveScalarAddSub(SetVectorType &Worklist,
     83                         MachineInstr &Inst) const;
     84 
     85   void lowerScalarAbs(SetVectorType &Worklist,
     86                       MachineInstr &Inst) const;
     87 
     88   void lowerScalarXnor(SetVectorType &Worklist,
     89                        MachineInstr &Inst) const;
     90 
     91   void splitScalar64BitUnaryOp(SetVectorType &Worklist,
     92                                MachineInstr &Inst, unsigned Opcode) const;
     93 
     94   void splitScalar64BitAddSub(SetVectorType &Worklist,
     95                               MachineInstr &Inst) const;
     96 
     97   void splitScalar64BitBinaryOp(SetVectorType &Worklist,
     98                                 MachineInstr &Inst, unsigned Opcode) const;
     99 
    100   void splitScalar64BitBCNT(SetVectorType &Worklist,
    101                             MachineInstr &Inst) const;
    102   void splitScalar64BitBFE(SetVectorType &Worklist,
    103                            MachineInstr &Inst) const;
    104   void movePackToVALU(SetVectorType &Worklist,
    105                       MachineRegisterInfo &MRI,
    106                       MachineInstr &Inst) const;
    107 
    108   void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
    109                                     SetVectorType &Worklist) const;
    110 
    111   void
    112   addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
    113                                SetVectorType &Worklist) const;
    114 
    115   const TargetRegisterClass *
    116   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
    117 
    118   bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const;
    119 
    120   unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
    121 
    122 protected:
    123   bool swapSourceModifiers(MachineInstr &MI,
    124                            MachineOperand &Src0, unsigned Src0OpName,
    125                            MachineOperand &Src1, unsigned Src1OpName) const;
    126 
    127   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
    128                                        unsigned OpIdx0,
    129                                        unsigned OpIdx1) const override;
    130 
    131 public:
    132   enum TargetOperandFlags {
    133     MO_MASK = 0x7,
    134 
    135     MO_NONE = 0,
    136     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
    137     MO_GOTPCREL = 1,
    138     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
    139     MO_GOTPCREL32 = 2,
    140     MO_GOTPCREL32_LO = 2,
    141     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
    142     MO_GOTPCREL32_HI = 3,
    143     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
    144     MO_REL32 = 4,
    145     MO_REL32_LO = 4,
    146     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
    147     MO_REL32_HI = 5
    148   };
    149 
    150   explicit SIInstrInfo(const GCNSubtarget &ST);
    151 
    152   const SIRegisterInfo &getRegisterInfo() const {
    153     return RI;
    154   }
    155 
    156   bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
    157                                          AliasAnalysis *AA) const override;
    158 
    159   bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
    160                                int64_t &Offset1,
    161                                int64_t &Offset2) const override;
    162 
    163   bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
    164                              int64_t &Offset,
    165                              const TargetRegisterInfo *TRI) const final;
    166 
    167   bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
    168                            MachineInstr &SecondLdSt, unsigned BaseReg2,
    169                            unsigned NumLoads) const override;
    170 
    171   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
    172                                int64_t Offset1, unsigned NumLoads) const override;
    173 
    174   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    175                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
    176                    bool KillSrc) const override;
    177 
    178   unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
    179                                     RegScavenger *RS, unsigned TmpReg,
    180                                     unsigned Offset, unsigned Size) const;
    181 
    182   void materializeImmediate(MachineBasicBlock &MBB,
    183                             MachineBasicBlock::iterator MI,
    184                             const DebugLoc &DL,
    185                             unsigned DestReg,
    186                             int64_t Value) const;
    187 
    188   const TargetRegisterClass *getPreferredSelectRegClass(
    189                                unsigned Size) const;
    190 
    191   unsigned insertNE(MachineBasicBlock *MBB,
    192                     MachineBasicBlock::iterator I, const DebugLoc &DL,
    193                     unsigned SrcReg, int Value) const;
    194 
    195   unsigned insertEQ(MachineBasicBlock *MBB,
    196                     MachineBasicBlock::iterator I, const DebugLoc &DL,
    197                     unsigned SrcReg, int Value)  const;
    198 
    199   void storeRegToStackSlot(MachineBasicBlock &MBB,
    200                            MachineBasicBlock::iterator MI, unsigned SrcReg,
    201                            bool isKill, int FrameIndex,
    202                            const TargetRegisterClass *RC,
    203                            const TargetRegisterInfo *TRI) const override;
    204 
    205   void loadRegFromStackSlot(MachineBasicBlock &MBB,
    206                             MachineBasicBlock::iterator MI, unsigned DestReg,
    207                             int FrameIndex, const TargetRegisterClass *RC,
    208                             const TargetRegisterInfo *TRI) const override;
    209 
    210   bool expandPostRAPseudo(MachineInstr &MI) const override;
    211 
    212   // Returns an opcode that can be used to move a value to a \p DstRC
    213   // register.  If there is no hardware instruction that can store to \p
    214   // DstRC, then AMDGPU::COPY is returned.
    215   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
    216 
    217   LLVM_READONLY
    218   int commuteOpcode(unsigned Opc) const;
    219 
    220   LLVM_READONLY
    221   inline int commuteOpcode(const MachineInstr &MI) const {
    222     return commuteOpcode(MI.getOpcode());
    223   }
    224 
    225   bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
    226                              unsigned &SrcOpIdx2) const override;
    227 
    228   bool isBranchOffsetInRange(unsigned BranchOpc,
    229                              int64_t BrOffset) const override;
    230 
    231   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
    232 
    233   unsigned insertIndirectBranch(MachineBasicBlock &MBB,
    234                                 MachineBasicBlock &NewDestBB,
    235                                 const DebugLoc &DL,
    236                                 int64_t BrOffset,
    237                                 RegScavenger *RS = nullptr) const override;
    238 
    239   bool analyzeBranchImpl(MachineBasicBlock &MBB,
    240                          MachineBasicBlock::iterator I,
    241                          MachineBasicBlock *&TBB,
    242                          MachineBasicBlock *&FBB,
    243                          SmallVectorImpl<MachineOperand> &Cond,
    244                          bool AllowModify) const;
    245 
    246   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
    247                      MachineBasicBlock *&FBB,
    248                      SmallVectorImpl<MachineOperand> &Cond,
    249                      bool AllowModify = false) const override;
    250 
    251   unsigned removeBranch(MachineBasicBlock &MBB,
    252                         int *BytesRemoved = nullptr) const override;
    253 
    254   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
    255                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
    256                         const DebugLoc &DL,
    257                         int *BytesAdded = nullptr) const override;
    258 
    259   bool reverseBranchCondition(
    260     SmallVectorImpl<MachineOperand> &Cond) const override;
    261 
    262   bool canInsertSelect(const MachineBasicBlock &MBB,
    263                        ArrayRef<MachineOperand> Cond,
    264                        unsigned TrueReg, unsigned FalseReg,
    265                        int &CondCycles,
    266                        int &TrueCycles, int &FalseCycles) const override;
    267 
    268   void insertSelect(MachineBasicBlock &MBB,
    269                     MachineBasicBlock::iterator I, const DebugLoc &DL,
    270                     unsigned DstReg, ArrayRef<MachineOperand> Cond,
    271                     unsigned TrueReg, unsigned FalseReg) const override;
    272 
    273   void insertVectorSelect(MachineBasicBlock &MBB,
    274                           MachineBasicBlock::iterator I, const DebugLoc &DL,
    275                           unsigned DstReg, ArrayRef<MachineOperand> Cond,
    276                           unsigned TrueReg, unsigned FalseReg) const;
    277 
    278   unsigned getAddressSpaceForPseudoSourceKind(
    279              PseudoSourceValue::PSVKind Kind) const override;
    280 
    281   bool
    282   areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
    283                                   AliasAnalysis *AA = nullptr) const override;
    284 
    285   bool isFoldableCopy(const MachineInstr &MI) const;
    286 
    287   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
    288                      MachineRegisterInfo *MRI) const final;
    289 
    290   unsigned getMachineCSELookAheadLimit() const override { return 500; }
    291 
    292   MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
    293                                       MachineInstr &MI,
    294                                       LiveVariables *LV) const override;
    295 
    296   bool isSchedulingBoundary(const MachineInstr &MI,
    297                             const MachineBasicBlock *MBB,
    298                             const MachineFunction &MF) const override;
    299 
    300   static bool isSALU(const MachineInstr &MI) {
    301     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
    302   }
    303 
    304   bool isSALU(uint16_t Opcode) const {
    305     return get(Opcode).TSFlags & SIInstrFlags::SALU;
    306   }
    307 
    308   static bool isVALU(const MachineInstr &MI) {
    309     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
    310   }
    311 
    312   bool isVALU(uint16_t Opcode) const {
    313     return get(Opcode).TSFlags & SIInstrFlags::VALU;
    314   }
    315 
    316   static bool isVMEM(const MachineInstr &MI) {
    317     return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
    318   }
    319 
    320   bool isVMEM(uint16_t Opcode) const {
    321     return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
    322   }
    323 
    324   static bool isSOP1(const MachineInstr &MI) {
    325     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
    326   }
    327 
    328   bool isSOP1(uint16_t Opcode) const {
    329     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
    330   }
    331 
    332   static bool isSOP2(const MachineInstr &MI) {
    333     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
    334   }
    335 
    336   bool isSOP2(uint16_t Opcode) const {
    337     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
    338   }
    339 
    340   static bool isSOPC(const MachineInstr &MI) {
    341     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
    342   }
    343 
    344   bool isSOPC(uint16_t Opcode) const {
    345     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
    346   }
    347 
    348   static bool isSOPK(const MachineInstr &MI) {
    349     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
    350   }
    351 
    352   bool isSOPK(uint16_t Opcode) const {
    353     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
    354   }
    355 
    356   static bool isSOPP(const MachineInstr &MI) {
    357     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
    358   }
    359 
    360   bool isSOPP(uint16_t Opcode) const {
    361     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
    362   }
    363 
    364   static bool isVOP1(const MachineInstr &MI) {
    365     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
    366   }
    367 
    368   bool isVOP1(uint16_t Opcode) const {
    369     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
    370   }
    371 
    372   static bool isVOP2(const MachineInstr &MI) {
    373     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
    374   }
    375 
    376   bool isVOP2(uint16_t Opcode) const {
    377     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
    378   }
    379 
    380   static bool isVOP3(const MachineInstr &MI) {
    381     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
    382   }
    383 
    384   bool isVOP3(uint16_t Opcode) const {
    385     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
    386   }
    387 
    388   static bool isSDWA(const MachineInstr &MI) {
    389     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
    390   }
    391 
    392   bool isSDWA(uint16_t Opcode) const {
    393     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
    394   }
    395 
    396   static bool isVOPC(const MachineInstr &MI) {
    397     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
    398   }
    399 
    400   bool isVOPC(uint16_t Opcode) const {
    401     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
    402   }
    403 
    404   static bool isMUBUF(const MachineInstr &MI) {
    405     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
    406   }
    407 
    408   bool isMUBUF(uint16_t Opcode) const {
    409     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
    410   }
    411 
    412   static bool isMTBUF(const MachineInstr &MI) {
    413     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
    414   }
    415 
    416   bool isMTBUF(uint16_t Opcode) const {
    417     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
    418   }
    419 
    420   static bool isSMRD(const MachineInstr &MI) {
    421     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
    422   }
    423 
    424   bool isSMRD(uint16_t Opcode) const {
    425     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
    426   }
    427 
    428   bool isBufferSMRD(const MachineInstr &MI) const;
    429 
    430   static bool isDS(const MachineInstr &MI) {
    431     return MI.getDesc().TSFlags & SIInstrFlags::DS;
    432   }
    433 
    434   bool isDS(uint16_t Opcode) const {
    435     return get(Opcode).TSFlags & SIInstrFlags::DS;
    436   }
    437 
    438   static bool isMIMG(const MachineInstr &MI) {
    439     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
    440   }
    441 
    442   bool isMIMG(uint16_t Opcode) const {
    443     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
    444   }
    445 
    446   static bool isGather4(const MachineInstr &MI) {
    447     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
    448   }
    449 
    450   bool isGather4(uint16_t Opcode) const {
    451     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
    452   }
    453 
    454   static bool isFLAT(const MachineInstr &MI) {
    455     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
    456   }
    457 
    458   // Is a FLAT encoded instruction which accesses a specific segment,
    459   // i.e. global_* or scratch_*.
    460   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
    461     auto Flags = MI.getDesc().TSFlags;
    462     return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
    463   }
    464 
    465   // Any FLAT encoded instruction, including global_* and scratch_*.
    466   bool isFLAT(uint16_t Opcode) const {
    467     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
    468   }
    469 
    470   static bool isEXP(const MachineInstr &MI) {
    471     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
    472   }
    473 
    474   bool isEXP(uint16_t Opcode) const {
    475     return get(Opcode).TSFlags & SIInstrFlags::EXP;
    476   }
    477 
    478   static bool isWQM(const MachineInstr &MI) {
    479     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
    480   }
    481 
    482   bool isWQM(uint16_t Opcode) const {
    483     return get(Opcode).TSFlags & SIInstrFlags::WQM;
    484   }
    485 
    486   static bool isDisableWQM(const MachineInstr &MI) {
    487     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
    488   }
    489 
    490   bool isDisableWQM(uint16_t Opcode) const {
    491     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
    492   }
    493 
    494   static bool isVGPRSpill(const MachineInstr &MI) {
    495     return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
    496   }
    497 
    498   bool isVGPRSpill(uint16_t Opcode) const {
    499     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
    500   }
    501 
    502   static bool isSGPRSpill(const MachineInstr &MI) {
    503     return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
    504   }
    505 
    506   bool isSGPRSpill(uint16_t Opcode) const {
    507     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
    508   }
    509 
    510   static bool isDPP(const MachineInstr &MI) {
    511     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
    512   }
    513 
    514   bool isDPP(uint16_t Opcode) const {
    515     return get(Opcode).TSFlags & SIInstrFlags::DPP;
    516   }
    517 
    518   static bool isVOP3P(const MachineInstr &MI) {
    519     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
    520   }
    521 
    522   bool isVOP3P(uint16_t Opcode) const {
    523     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
    524   }
    525 
    526   static bool isVINTRP(const MachineInstr &MI) {
    527     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
    528   }
    529 
    530   bool isVINTRP(uint16_t Opcode) const {
    531     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
    532   }
    533 
    534   static bool isScalarUnit(const MachineInstr &MI) {
    535     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
    536   }
    537 
    538   static bool usesVM_CNT(const MachineInstr &MI) {
    539     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
    540   }
    541 
    542   static bool usesLGKM_CNT(const MachineInstr &MI) {
    543     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
    544   }
    545 
    546   static bool sopkIsZext(const MachineInstr &MI) {
    547     return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
    548   }
    549 
    550   bool sopkIsZext(uint16_t Opcode) const {
    551     return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
    552   }
    553 
    554   /// \returns true if this is an s_store_dword* instruction. This is more
    555   /// specific than than isSMEM && mayStore.
    556   static bool isScalarStore(const MachineInstr &MI) {
    557     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
    558   }
    559 
    560   bool isScalarStore(uint16_t Opcode) const {
    561     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
    562   }
    563 
    564   static bool isFixedSize(const MachineInstr &MI) {
    565     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
    566   }
    567 
    568   bool isFixedSize(uint16_t Opcode) const {
    569     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
    570   }
    571 
    572   static bool hasFPClamp(const MachineInstr &MI) {
    573     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
    574   }
    575 
    576   bool hasFPClamp(uint16_t Opcode) const {
    577     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
    578   }
    579 
    580   static bool hasIntClamp(const MachineInstr &MI) {
    581     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
    582   }
    583 
    584   uint64_t getClampMask(const MachineInstr &MI) const {
    585     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
    586                                 SIInstrFlags::IntClamp |
    587                                 SIInstrFlags::ClampLo |
    588                                 SIInstrFlags::ClampHi;
    589       return MI.getDesc().TSFlags & ClampFlags;
    590   }
    591 
    592   bool isVGPRCopy(const MachineInstr &MI) const {
    593     assert(MI.isCopy());
    594     unsigned Dest = MI.getOperand(0).getReg();
    595     const MachineFunction &MF = *MI.getParent()->getParent();
    596     const MachineRegisterInfo &MRI = MF.getRegInfo();
    597     return !RI.isSGPRReg(MRI, Dest);
    598   }
    599 
    600   /// Whether we must prevent this instruction from executing with EXEC = 0.
    601   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
    602 
    603   bool isInlineConstant(const APInt &Imm) const;
    604 
    605   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
    606 
    607   bool isInlineConstant(const MachineOperand &MO,
    608                         const MCOperandInfo &OpInfo) const {
    609     return isInlineConstant(MO, OpInfo.OperandType);
    610   }
    611 
    612   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
    613   /// be an inline immediate.
    614   bool isInlineConstant(const MachineInstr &MI,
    615                         const MachineOperand &UseMO,
    616                         const MachineOperand &DefMO) const {
    617     assert(UseMO.getParent() == &MI);
    618     int OpIdx = MI.getOperandNo(&UseMO);
    619     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
    620       return false;
    621     }
    622 
    623     return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
    624   }
    625 
    626   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
    627   /// immediate.
    628   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
    629     const MachineOperand &MO = MI.getOperand(OpIdx);
    630     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
    631   }
    632 
    633   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
    634                         const MachineOperand &MO) const {
    635     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
    636       return false;
    637 
    638     if (MI.isCopy()) {
    639       unsigned Size = getOpSize(MI, OpIdx);
    640       assert(Size == 8 || Size == 4);
    641 
    642       uint8_t OpType = (Size == 8) ?
    643         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
    644       return isInlineConstant(MO, OpType);
    645     }
    646 
    647     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
    648   }
    649 
    650   bool isInlineConstant(const MachineOperand &MO) const {
    651     const MachineInstr *Parent = MO.getParent();
    652     return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
    653   }
    654 
    655   bool isLiteralConstant(const MachineOperand &MO,
    656                          const MCOperandInfo &OpInfo) const {
    657     return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
    658   }
    659 
    660   bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
    661     const MachineOperand &MO = MI.getOperand(OpIdx);
    662     return MO.isImm() && !isInlineConstant(MI, OpIdx);
    663   }
    664 
    665   // Returns true if this operand could potentially require a 32-bit literal
    666   // operand, but not necessarily. A FrameIndex for example could resolve to an
    667   // inline immediate value that will not require an additional 4-bytes; this
    668   // assumes that it will.
    669   bool isLiteralConstantLike(const MachineOperand &MO,
    670                              const MCOperandInfo &OpInfo) const;
    671 
    672   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
    673                          const MachineOperand &MO) const;
    674 
    675   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
    676   /// This function will return false if you pass it a 32-bit instruction.
    677   bool hasVALU32BitEncoding(unsigned Opcode) const;
    678 
    679   /// Returns true if this operand uses the constant bus.
    680   bool usesConstantBus(const MachineRegisterInfo &MRI,
    681                        const MachineOperand &MO,
    682                        const MCOperandInfo &OpInfo) const;
    683 
    684   /// Return true if this instruction has any modifiers.
    685   ///  e.g. src[012]_mod, omod, clamp.
    686   bool hasModifiers(unsigned Opcode) const;
    687 
    688   bool hasModifiersSet(const MachineInstr &MI,
    689                        unsigned OpName) const;
    690   bool hasAnyModifiersSet(const MachineInstr &MI) const;
    691 
    692   bool verifyInstruction(const MachineInstr &MI,
    693                          StringRef &ErrInfo) const override;
    694 
    695   unsigned getVALUOp(const MachineInstr &MI) const;
    696 
    697   /// Return the correct register class for \p OpNo.  For target-specific
    698   /// instructions, this will return the register class that has been defined
    699   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
    700   /// the register class of its machine operand.
    701   /// to infer the correct register class base on the other operands.
    702   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
    703                                            unsigned OpNo) const;
    704 
    705   /// Return the size in bytes of the operand OpNo on the given
    706   // instruction opcode.
    707   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
    708     const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
    709 
    710     if (OpInfo.RegClass == -1) {
    711       // If this is an immediate operand, this must be a 32-bit literal.
    712       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
    713       return 4;
    714     }
    715 
    716     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
    717   }
    718 
    719   /// This form should usually be preferred since it handles operands
    720   /// with unknown register classes.
    721   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
    722     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
    723   }
    724 
    725   /// \returns true if it is legal for the operand at index \p OpNo
    726   /// to read a VGPR.
    727   bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
    728 
    729   /// Legalize the \p OpIndex operand of this instruction by inserting
    730   /// a MOV.  For example:
    731   /// ADD_I32_e32 VGPR0, 15
    732   /// to
    733   /// MOV VGPR1, 15
    734   /// ADD_I32_e32 VGPR0, VGPR1
    735   ///
    736   /// If the operand being legalized is a register, then a COPY will be used
    737   /// instead of MOV.
    738   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
    739 
    740   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
    741   /// for \p MI.
    742   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
    743                       const MachineOperand *MO = nullptr) const;
    744 
    745   /// Check if \p MO would be a valid operand for the given operand
    746   /// definition \p OpInfo. Note this does not attempt to validate constant bus
    747   /// restrictions (e.g. literal constant usage).
    748   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
    749                           const MCOperandInfo &OpInfo,
    750                           const MachineOperand &MO) const;
    751 
    752   /// Check if \p MO (a register operand) is a legal register for the
    753   /// given operand description.
    754   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
    755                          const MCOperandInfo &OpInfo,
    756                          const MachineOperand &MO) const;
    757 
    758   /// Legalize operands in \p MI by either commuting it or inserting a
    759   /// copy of src1.
    760   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
    761 
    762   /// Fix operands in \p MI to satisfy constant bus requirements.
    763   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
    764 
    765   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
    766   /// be used when it is know that the value in SrcReg is same across all
    767   /// threads in the wave.
    768   /// \returns The SGPR register that \p SrcReg was copied to.
    769   unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
    770                               MachineRegisterInfo &MRI) const;
    771 
    772   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
    773 
    774   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
    775                               MachineBasicBlock::iterator I,
    776                               const TargetRegisterClass *DstRC,
    777                               MachineOperand &Op, MachineRegisterInfo &MRI,
    778                               const DebugLoc &DL) const;
    779 
    780   /// Legalize all operands in this instruction.  This function may
    781   /// create new instruction and insert them before \p MI.
    782   void legalizeOperands(MachineInstr &MI) const;
    783 
    784   /// Replace this instruction's opcode with the equivalent VALU
    785   /// opcode.  This function will also move the users of \p MI to the
    786   /// VALU if necessary.
    787   void moveToVALU(MachineInstr &MI) const;
    788 
    789   void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
    790                         int Count) const;
    791 
    792   void insertNoop(MachineBasicBlock &MBB,
    793                   MachineBasicBlock::iterator MI) const override;
    794 
    795   void insertReturn(MachineBasicBlock &MBB) const;
    796   /// Return the number of wait states that result from executing this
    797   /// instruction.
    798   unsigned getNumWaitStates(const MachineInstr &MI) const;
    799 
    800   /// Returns the operand named \p Op.  If \p MI does not have an
    801   /// operand named \c Op, this function returns nullptr.
    802   LLVM_READONLY
    803   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
    804 
    805   LLVM_READONLY
    806   const MachineOperand *getNamedOperand(const MachineInstr &MI,
    807                                         unsigned OpName) const {
    808     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
    809   }
    810 
    811   /// Get required immediate operand
    812   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
    813     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
    814     return MI.getOperand(Idx).getImm();
    815   }
    816 
    817   uint64_t getDefaultRsrcDataFormat() const;
    818   uint64_t getScratchRsrcWords23() const;
    819 
    820   bool isLowLatencyInstruction(const MachineInstr &MI) const;
    821   bool isHighLatencyInstruction(const MachineInstr &MI) const;
    822 
    823   /// Return the descriptor of the target-specific machine instruction
    824   /// that corresponds to the specified pseudo or native opcode.
    825   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
    826     return get(pseudoToMCOpcode(Opcode));
    827   }
    828 
    829   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
    830   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
    831 
    832   unsigned isLoadFromStackSlot(const MachineInstr &MI,
    833                                int &FrameIndex) const override;
    834   unsigned isStoreToStackSlot(const MachineInstr &MI,
    835                               int &FrameIndex) const override;
    836 
    837   unsigned getInstBundleSize(const MachineInstr &MI) const;
    838   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
    839 
    840   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
    841 
    842   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
    843 
    844   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
    845                                  MachineBasicBlock *IfEnd) const;
    846 
    847   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
    848                                    MachineBasicBlock *LoopEnd) const;
    849 
    850   std::pair<unsigned, unsigned>
    851   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
    852 
    853   ArrayRef<std::pair<int, const char *>>
    854   getSerializableTargetIndices() const override;
    855 
    856   ArrayRef<std::pair<unsigned, const char *>>
    857   getSerializableDirectMachineOperandTargetFlags() const override;
    858 
    859   ScheduleHazardRecognizer *
    860   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
    861                                  const ScheduleDAG *DAG) const override;
    862 
    863   ScheduleHazardRecognizer *
    864   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
    865 
    866   bool isBasicBlockPrologue(const MachineInstr &MI) const override;
    867 
    868   /// Return a partially built integer add instruction without carry.
    869   /// Caller must add source operands.
    870   /// For pre-GFX9 it will generate unused carry destination operand.
    871   /// TODO: After GFX9 it should return a no-carry operation.
    872   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
    873                                     MachineBasicBlock::iterator I,
    874                                     const DebugLoc &DL,
    875                                     unsigned DestReg) const;
    876 
    877   static bool isKillTerminator(unsigned Opcode);
    878   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
    879 
    880   static bool isLegalMUBUFImmOffset(unsigned Imm) {
    881     return isUInt<12>(Imm);
    882   }
    883 
    884   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
    885   /// Return -1 if the target-specific opcode for the pseudo instruction does
    886   /// not exist. If Opcode is not a pseudo instruction, this is identity.
    887   int pseudoToMCOpcode(int Opcode) const;
    888 
    889 };
    890 
    891 namespace AMDGPU {
    892 
    893   LLVM_READONLY
    894   int getVOPe64(uint16_t Opcode);
    895 
    896   LLVM_READONLY
    897   int getVOPe32(uint16_t Opcode);
    898 
    899   LLVM_READONLY
    900   int getSDWAOp(uint16_t Opcode);
    901 
    902   LLVM_READONLY
    903   int getBasicFromSDWAOp(uint16_t Opcode);
    904 
    905   LLVM_READONLY
    906   int getCommuteRev(uint16_t Opcode);
    907 
    908   LLVM_READONLY
    909   int getCommuteOrig(uint16_t Opcode);
    910 
    911   LLVM_READONLY
    912   int getAddr64Inst(uint16_t Opcode);
    913 
    914   LLVM_READONLY
    915   int getMUBUFNoLdsInst(uint16_t Opcode);
    916 
    917   LLVM_READONLY
    918   int getAtomicRetOp(uint16_t Opcode);
    919 
    920   LLVM_READONLY
    921   int getAtomicNoRetOp(uint16_t Opcode);
    922 
    923   LLVM_READONLY
    924   int getSOPKOp(uint16_t Opcode);
    925 
    926   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
    927   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
    928   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
    929   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
    930 
    931   // For MachineOperands.
    932   enum TargetFlags {
    933     TF_LONG_BRANCH_FORWARD = 1 << 0,
    934     TF_LONG_BRANCH_BACKWARD = 1 << 1
    935   };
    936 
    937 } // end namespace AMDGPU
    938 
    939 namespace SI {
    940 namespace KernelInputOffsets {
    941 
    942 /// Offsets in bytes from the start of the input buffer
    943 enum Offsets {
    944   NGROUPS_X = 0,
    945   NGROUPS_Y = 4,
    946   NGROUPS_Z = 8,
    947   GLOBAL_SIZE_X = 12,
    948   GLOBAL_SIZE_Y = 16,
    949   GLOBAL_SIZE_Z = 20,
    950   LOCAL_SIZE_X = 24,
    951   LOCAL_SIZE_Y = 28,
    952   LOCAL_SIZE_Z = 32
    953 };
    954 
    955 } // end namespace KernelInputOffsets
    956 } // end namespace SI
    957 
    958 } // end namespace llvm
    959 
    960 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
    961