Home | History | Annotate | Download | only in radeon
      1 //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 // Vector, Reduction, and Cube instructions need to fill the entire instruction
     10 // group to work correctly.  This pass expands these individual instructions
     11 // into several instructions that will completely fill the instruction group.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AMDGPU.h"
     15 #include "R600Defines.h"
     16 #include "R600InstrInfo.h"
     17 #include "R600RegisterInfo.h"
     18 #include "llvm/CodeGen/MachineFunctionPass.h"
     19 #include "llvm/CodeGen/MachineInstrBuilder.h"
     20 #include "llvm/CodeGen/MachineRegisterInfo.h"
     21 
     22 using namespace llvm;
     23 
     24 namespace {
     25 
     26 class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
     27 
     28 private:
     29   static char ID;
     30   const R600InstrInfo *TII;
     31 
     32 public:
     33   R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
     34     TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
     35 
     36   virtual bool runOnMachineFunction(MachineFunction &MF);
     37 
     38   const char *getPassName() const {
     39     return "R600 Expand special instructions pass";
     40   }
     41 };
     42 
     43 } // End anonymous namespace
     44 
     45 char R600ExpandSpecialInstrsPass::ID = 0;
     46 
     47 FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
     48   return new R600ExpandSpecialInstrsPass(TM);
     49 }
     50 
     51 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
     52 
     53   const R600RegisterInfo &TRI = TII->getRegisterInfo();
     54 
     55   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
     56                                                   BB != BB_E; ++BB) {
     57     MachineBasicBlock &MBB = *BB;
     58     MachineBasicBlock::iterator I = MBB.begin();
     59     while (I != MBB.end()) {
     60       MachineInstr &MI = *I;
     61       I = llvm::next(I);
     62 
     63       bool IsReduction = TII->isReductionOp(MI.getOpcode());
     64       bool IsVector = TII->isVector(MI);
     65 	    bool IsCube = TII->isCubeOp(MI.getOpcode());
     66       if (!IsReduction && !IsVector && !IsCube) {
     67         continue;
     68       }
     69 
     70       // Expand the instruction
     71       //
     72       // Reduction instructions:
     73       // T0_X = DP4 T1_XYZW, T2_XYZW
     74       // becomes:
     75       // TO_X = DP4 T1_X, T2_X
     76       // TO_Y (write masked) = DP4 T1_Y, T2_Y
     77       // TO_Z (write masked) = DP4 T1_Z, T2_Z
     78       // TO_W (write masked) = DP4 T1_W, T2_W
     79       //
     80       // Vector instructions:
     81       // T0_X = MULLO_INT T1_X, T2_X
     82       // becomes:
     83       // T0_X = MULLO_INT T1_X, T2_X
     84       // T0_Y (write masked) = MULLO_INT T1_X, T2_X
     85       // T0_Z (write masked) = MULLO_INT T1_X, T2_X
     86       // T0_W (write masked) = MULLO_INT T1_X, T2_X
     87       //
     88       // Cube instructions:
     89       // T0_XYZW = CUBE T1_XYZW
     90       // becomes:
     91       // TO_X = CUBE T1_Z, T1_Y
     92       // T0_Y = CUBE T1_Z, T1_X
     93       // T0_Z = CUBE T1_X, T1_Z
     94       // T0_W = CUBE T1_Y, T1_Z
     95       for (unsigned Chan = 0; Chan < 4; Chan++) {
     96         unsigned DstReg = MI.getOperand(0).getReg();
     97         unsigned Src0 = MI.getOperand(1).getReg();
     98         unsigned Src1 = 0;
     99 
    100         // Determine the correct source registers
    101         if (!IsCube) {
    102           Src1 = MI.getOperand(2).getReg();
    103         }
    104         if (IsReduction) {
    105           unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
    106           Src0 = TRI.getSubReg(Src0, SubRegIndex);
    107           Src1 = TRI.getSubReg(Src1, SubRegIndex);
    108         } else if (IsCube) {
    109           static const int CubeSrcSwz[] = {2, 2, 0, 1};
    110           unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
    111           unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
    112           Src1 = TRI.getSubReg(Src0, SubRegIndex1);
    113           Src0 = TRI.getSubReg(Src0, SubRegIndex0);
    114         }
    115 
    116         // Determine the correct destination registers;
    117         unsigned Flags = 0;
    118         if (IsCube) {
    119           unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
    120           DstReg = TRI.getSubReg(DstReg, SubRegIndex);
    121         } else {
    122           // Mask the write if the original instruction does not write to
    123           // the current Channel.
    124           Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
    125           unsigned DstBase = TRI.getHWRegIndex(DstReg);
    126           DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
    127         }
    128 
    129         // Set the IsLast bit
    130         Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
    131 
    132         // Add the new instruction
    133         unsigned Opcode;
    134         if (IsCube) {
    135           switch (MI.getOpcode()) {
    136           case AMDGPU::CUBE_r600_pseudo:
    137             Opcode = AMDGPU::CUBE_r600_real;
    138             break;
    139           case AMDGPU::CUBE_eg_pseudo:
    140             Opcode = AMDGPU::CUBE_eg_real;
    141             break;
    142           default:
    143             assert(!"Unknown CUBE instruction");
    144             Opcode = 0;
    145             break;
    146           }
    147         } else {
    148           Opcode = MI.getOpcode();
    149         }
    150         MachineInstr *NewMI =
    151           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
    152                   .addReg(Src0)
    153                   .addReg(Src1)
    154                   .addImm(0); // Flag
    155 
    156         NewMI->setIsInsideBundle(Chan != 0);
    157         TII->addFlag(NewMI, 0, Flags);
    158       }
    159       MI.eraseFromParent();
    160     }
    161   }
    162   return false;
    163 }
    164