Home | History | Annotate | Download | only in AMDGPU
      1 //===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// Vector, Reduction, and Cube instructions need to fill the entire instruction
     12 /// group to work correctly.  This pass expands these individual instructions
     13 /// into several instructions that will completely fill the instruction group.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "AMDGPU.h"
     18 #include "AMDGPUSubtarget.h"
     19 #include "R600Defines.h"
     20 #include "R600InstrInfo.h"
     21 #include "R600RegisterInfo.h"
     22 #include "llvm/CodeGen/MachineBasicBlock.h"
     23 #include "llvm/CodeGen/MachineFunction.h"
     24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     25 #include "llvm/CodeGen/MachineFunctionPass.h"
     26 #include "llvm/CodeGen/MachineInstr.h"
     27 #include "llvm/CodeGen/MachineInstrBuilder.h"
     28 #include "llvm/CodeGen/MachineOperand.h"
     29 #include "llvm/Pass.h"
     30 #include <cassert>
     31 #include <cstdint>
     32 #include <iterator>
     33 
     34 using namespace llvm;
     35 
     36 #define DEBUG_TYPE "r600-expand-special-instrs"
     37 
     38 namespace {
     39 
     40 class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
     41 private:
     42   const R600InstrInfo *TII = nullptr;
     43 
     44   void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI,
     45       unsigned Op);
     46 
     47 public:
     48   static char ID;
     49 
     50   R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID) {}
     51 
     52   bool runOnMachineFunction(MachineFunction &MF) override;
     53 
     54   StringRef getPassName() const override {
     55     return "R600 Expand special instructions pass";
     56   }
     57 };
     58 
     59 } // end anonymous namespace
     60 
     61 INITIALIZE_PASS_BEGIN(R600ExpandSpecialInstrsPass, DEBUG_TYPE,
     62                      "R600 Expand Special Instrs", false, false)
     63 INITIALIZE_PASS_END(R600ExpandSpecialInstrsPass, DEBUG_TYPE,
     64                     "R600ExpandSpecialInstrs", false, false)
     65 
     66 char R600ExpandSpecialInstrsPass::ID = 0;
     67 
     68 char &llvm::R600ExpandSpecialInstrsPassID = R600ExpandSpecialInstrsPass::ID;
     69 
     70 FunctionPass *llvm::createR600ExpandSpecialInstrsPass() {
     71   return new R600ExpandSpecialInstrsPass();
     72 }
     73 
     74 void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
     75     const MachineInstr *OldMI, unsigned Op) {
     76   int OpIdx = TII->getOperandIdx(*OldMI, Op);
     77   if (OpIdx > -1) {
     78     uint64_t Val = OldMI->getOperand(OpIdx).getImm();
     79     TII->setImmOperand(*NewMI, Op, Val);
     80   }
     81 }
     82 
     83 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
     84   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
     85   TII = ST.getInstrInfo();
     86 
     87   const R600RegisterInfo &TRI = TII->getRegisterInfo();
     88 
     89   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
     90                                                   BB != BB_E; ++BB) {
     91     MachineBasicBlock &MBB = *BB;
     92     MachineBasicBlock::iterator I = MBB.begin();
     93     while (I != MBB.end()) {
     94       MachineInstr &MI = *I;
     95       I = std::next(I);
     96 
     97       // Expand LDS_*_RET instructions
     98       if (TII->isLDSRetInstr(MI.getOpcode())) {
     99         int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
    100         assert(DstIdx != -1);
    101         MachineOperand &DstOp = MI.getOperand(DstIdx);
    102         MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
    103                                                DstOp.getReg(), R600::OQAP);
    104         DstOp.setReg(R600::OQAP);
    105         int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
    106                                            R600::OpName::pred_sel);
    107         int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
    108                                            R600::OpName::pred_sel);
    109         // Copy the pred_sel bit
    110         Mov->getOperand(MovPredSelIdx).setReg(
    111             MI.getOperand(LDSPredSelIdx).getReg());
    112       }
    113 
    114       switch (MI.getOpcode()) {
    115       default: break;
    116       // Expand PRED_X to one of the PRED_SET instructions.
    117       case R600::PRED_X: {
    118         uint64_t Flags = MI.getOperand(3).getImm();
    119         // The native opcode used by PRED_X is stored as an immediate in the
    120         // third operand.
    121         MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
    122                                             MI.getOperand(2).getImm(), // opcode
    123                                             MI.getOperand(0).getReg(), // dst
    124                                             MI.getOperand(1).getReg(), // src0
    125                                             R600::ZERO);             // src1
    126         TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
    127         if (Flags & MO_FLAG_PUSH) {
    128           TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);
    129         } else {
    130           TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);
    131         }
    132         MI.eraseFromParent();
    133         continue;
    134         }
    135       case R600::DOT_4: {
    136 
    137         const R600RegisterInfo &TRI = TII->getRegisterInfo();
    138 
    139         unsigned DstReg = MI.getOperand(0).getReg();
    140         unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
    141 
    142         for (unsigned Chan = 0; Chan < 4; ++Chan) {
    143           bool Mask = (Chan != TRI.getHWRegChan(DstReg));
    144           unsigned SubDstReg =
    145               R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
    146           MachineInstr *BMI =
    147               TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
    148           if (Chan > 0) {
    149             BMI->bundleWithPred();
    150           }
    151           if (Mask) {
    152             TII->addFlag(*BMI, 0, MO_FLAG_MASK);
    153           }
    154           if (Chan != 3)
    155             TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
    156           unsigned Opcode = BMI->getOpcode();
    157           // While not strictly necessary from hw point of view, we force
    158           // all src operands of a dot4 inst to belong to the same slot.
    159           unsigned Src0 = BMI->getOperand(
    160               TII->getOperandIdx(Opcode, R600::OpName::src0))
    161               .getReg();
    162           unsigned Src1 = BMI->getOperand(
    163               TII->getOperandIdx(Opcode, R600::OpName::src1))
    164               .getReg();
    165           (void) Src0;
    166           (void) Src1;
    167           if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
    168               (TRI.getEncodingValue(Src1) & 0xff) < 127)
    169             assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
    170         }
    171         MI.eraseFromParent();
    172         continue;
    173       }
    174       }
    175 
    176       bool IsReduction = TII->isReductionOp(MI.getOpcode());
    177       bool IsVector = TII->isVector(MI);
    178       bool IsCube = TII->isCubeOp(MI.getOpcode());
    179       if (!IsReduction && !IsVector && !IsCube) {
    180         continue;
    181       }
    182 
    183       // Expand the instruction
    184       //
    185       // Reduction instructions:
    186       // T0_X = DP4 T1_XYZW, T2_XYZW
    187       // becomes:
    188       // TO_X = DP4 T1_X, T2_X
    189       // TO_Y (write masked) = DP4 T1_Y, T2_Y
    190       // TO_Z (write masked) = DP4 T1_Z, T2_Z
    191       // TO_W (write masked) = DP4 T1_W, T2_W
    192       //
    193       // Vector instructions:
    194       // T0_X = MULLO_INT T1_X, T2_X
    195       // becomes:
    196       // T0_X = MULLO_INT T1_X, T2_X
    197       // T0_Y (write masked) = MULLO_INT T1_X, T2_X
    198       // T0_Z (write masked) = MULLO_INT T1_X, T2_X
    199       // T0_W (write masked) = MULLO_INT T1_X, T2_X
    200       //
    201       // Cube instructions:
    202       // T0_XYZW = CUBE T1_XYZW
    203       // becomes:
    204       // TO_X = CUBE T1_Z, T1_Y
    205       // T0_Y = CUBE T1_Z, T1_X
    206       // T0_Z = CUBE T1_X, T1_Z
    207       // T0_W = CUBE T1_Y, T1_Z
    208       for (unsigned Chan = 0; Chan < 4; Chan++) {
    209         unsigned DstReg = MI.getOperand(
    210                             TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
    211         unsigned Src0 = MI.getOperand(
    212                            TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
    213         unsigned Src1 = 0;
    214 
    215         // Determine the correct source registers
    216         if (!IsCube) {
    217           int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);
    218           if (Src1Idx != -1) {
    219             Src1 = MI.getOperand(Src1Idx).getReg();
    220           }
    221         }
    222         if (IsReduction) {
    223           unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
    224           Src0 = TRI.getSubReg(Src0, SubRegIndex);
    225           Src1 = TRI.getSubReg(Src1, SubRegIndex);
    226         } else if (IsCube) {
    227           static const int CubeSrcSwz[] = {2, 2, 0, 1};
    228           unsigned SubRegIndex0 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]);
    229           unsigned SubRegIndex1 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
    230           Src1 = TRI.getSubReg(Src0, SubRegIndex1);
    231           Src0 = TRI.getSubReg(Src0, SubRegIndex0);
    232         }
    233 
    234         // Determine the correct destination registers;
    235         bool Mask = false;
    236         bool NotLast = true;
    237         if (IsCube) {
    238           unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
    239           DstReg = TRI.getSubReg(DstReg, SubRegIndex);
    240         } else {
    241           // Mask the write if the original instruction does not write to
    242           // the current Channel.
    243           Mask = (Chan != TRI.getHWRegChan(DstReg));
    244           unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
    245           DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
    246         }
    247 
    248         // Set the IsLast bit
    249         NotLast = (Chan != 3 );
    250 
    251         // Add the new instruction
    252         unsigned Opcode = MI.getOpcode();
    253         switch (Opcode) {
    254         case R600::CUBE_r600_pseudo:
    255           Opcode = R600::CUBE_r600_real;
    256           break;
    257         case R600::CUBE_eg_pseudo:
    258           Opcode = R600::CUBE_eg_real;
    259           break;
    260         default:
    261           break;
    262         }
    263 
    264         MachineInstr *NewMI =
    265           TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
    266 
    267         if (Chan != 0)
    268           NewMI->bundleWithPred();
    269         if (Mask) {
    270           TII->addFlag(*NewMI, 0, MO_FLAG_MASK);
    271         }
    272         if (NotLast) {
    273           TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
    274         }
    275         SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp);
    276         SetFlagInNewMI(NewMI, &MI, R600::OpName::literal);
    277         SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs);
    278         SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs);
    279         SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg);
    280         SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);
    281       }
    282       MI.eraseFromParent();
    283     }
    284   }
    285   return false;
    286 }
    287