Home | History | Annotate | Download | only in R600
      1 //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// This pass compute turns all control flow pseudo instructions into native one
     12 /// computing their address on the fly ; it also sets STACK_SIZE info.
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "r600cf"
     16 #include "llvm/Support/Debug.h"
     17 #include "AMDGPU.h"
     18 #include "R600Defines.h"
     19 #include "R600InstrInfo.h"
     20 #include "R600MachineFunctionInfo.h"
     21 #include "R600RegisterInfo.h"
     22 #include "llvm/CodeGen/MachineFunctionPass.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 #include "llvm/Support/raw_ostream.h"
     26 
     27 using namespace llvm;
     28 
     29 namespace {
     30 
     31 class R600ControlFlowFinalizer : public MachineFunctionPass {
     32 
     33 private:
     34   typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
     35 
     36   enum ControlFlowInstruction {
     37     CF_TC,
     38     CF_VC,
     39     CF_CALL_FS,
     40     CF_WHILE_LOOP,
     41     CF_END_LOOP,
     42     CF_LOOP_BREAK,
     43     CF_LOOP_CONTINUE,
     44     CF_JUMP,
     45     CF_ELSE,
     46     CF_POP,
     47     CF_END
     48   };
     49 
     50   static char ID;
     51   const R600InstrInfo *TII;
     52   const R600RegisterInfo *TRI;
     53   unsigned MaxFetchInst;
     54   const AMDGPUSubtarget &ST;
     55 
     56   bool IsTrivialInst(MachineInstr *MI) const {
     57     switch (MI->getOpcode()) {
     58     case AMDGPU::KILL:
     59     case AMDGPU::RETURN:
     60       return true;
     61     default:
     62       return false;
     63     }
     64   }
     65 
     66   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
     67     unsigned Opcode = 0;
     68     bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
     69     switch (CFI) {
     70     case CF_TC:
     71       Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
     72       break;
     73     case CF_VC:
     74       Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
     75       break;
     76     case CF_CALL_FS:
     77       Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
     78       break;
     79     case CF_WHILE_LOOP:
     80       Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
     81       break;
     82     case CF_END_LOOP:
     83       Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
     84       break;
     85     case CF_LOOP_BREAK:
     86       Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
     87       break;
     88     case CF_LOOP_CONTINUE:
     89       Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
     90       break;
     91     case CF_JUMP:
     92       Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
     93       break;
     94     case CF_ELSE:
     95       Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
     96       break;
     97     case CF_POP:
     98       Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
     99       break;
    100     case CF_END:
    101       if (ST.hasCaymanISA()) {
    102         Opcode = AMDGPU::CF_END_CM;
    103         break;
    104       }
    105       Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
    106       break;
    107     }
    108     assert (Opcode && "No opcode selected");
    109     return TII->get(Opcode);
    110   }
    111 
    112   bool isCompatibleWithClause(const MachineInstr *MI,
    113       std::set<unsigned> &DstRegs) const {
    114     unsigned DstMI, SrcMI;
    115     for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
    116         E = MI->operands_end(); I != E; ++I) {
    117       const MachineOperand &MO = *I;
    118       if (!MO.isReg())
    119         continue;
    120       if (MO.isDef()) {
    121         unsigned Reg = MO.getReg();
    122         if (AMDGPU::R600_Reg128RegClass.contains(Reg))
    123           DstMI = Reg;
    124         else
    125           DstMI = TRI->getMatchingSuperReg(Reg,
    126               TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
    127               &AMDGPU::R600_Reg128RegClass);
    128       }
    129       if (MO.isUse()) {
    130         unsigned Reg = MO.getReg();
    131         if (AMDGPU::R600_Reg128RegClass.contains(Reg))
    132           SrcMI = Reg;
    133         else
    134           SrcMI = TRI->getMatchingSuperReg(Reg,
    135               TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
    136               &AMDGPU::R600_Reg128RegClass);
    137       }
    138     }
    139     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
    140       DstRegs.insert(DstMI);
    141       return true;
    142     } else
    143       return false;
    144   }
    145 
    146   ClauseFile
    147   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
    148       const {
    149     MachineBasicBlock::iterator ClauseHead = I;
    150     std::vector<MachineInstr *> ClauseContent;
    151     unsigned AluInstCount = 0;
    152     bool IsTex = TII->usesTextureCache(ClauseHead);
    153     std::set<unsigned> DstRegs;
    154     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
    155       if (IsTrivialInst(I))
    156         continue;
    157       if (AluInstCount >= MaxFetchInst)
    158         break;
    159       if ((IsTex && !TII->usesTextureCache(I)) ||
    160           (!IsTex && !TII->usesVertexCache(I)))
    161         break;
    162       if (!isCompatibleWithClause(I, DstRegs))
    163         break;
    164       AluInstCount ++;
    165       ClauseContent.push_back(I);
    166     }
    167     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
    168         getHWInstrDesc(IsTex?CF_TC:CF_VC))
    169         .addImm(0) // ADDR
    170         .addImm(AluInstCount - 1); // COUNT
    171     return ClauseFile(MIb, ClauseContent);
    172   }
    173 
    174   void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
    175     static const unsigned LiteralRegs[] = {
    176       AMDGPU::ALU_LITERAL_X,
    177       AMDGPU::ALU_LITERAL_Y,
    178       AMDGPU::ALU_LITERAL_Z,
    179       AMDGPU::ALU_LITERAL_W
    180     };
    181     const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
    182         TII->getSrcs(MI);
    183     for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
    184       if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
    185         continue;
    186       int64_t Imm = Srcs[i].second;
    187       std::vector<int64_t>::iterator It =
    188           std::find(Lits.begin(), Lits.end(), Imm);
    189       if (It != Lits.end()) {
    190         unsigned Index = It - Lits.begin();
    191         Srcs[i].first->setReg(LiteralRegs[Index]);
    192       } else {
    193         assert(Lits.size() < 4 && "Too many literals in Instruction Group");
    194         Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
    195         Lits.push_back(Imm);
    196       }
    197     }
    198   }
    199 
    200   MachineBasicBlock::iterator insertLiterals(
    201       MachineBasicBlock::iterator InsertPos,
    202       const std::vector<unsigned> &Literals) const {
    203     MachineBasicBlock *MBB = InsertPos->getParent();
    204     for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
    205       unsigned LiteralPair0 = Literals[i];
    206       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
    207       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
    208           TII->get(AMDGPU::LITERALS))
    209           .addImm(LiteralPair0)
    210           .addImm(LiteralPair1);
    211     }
    212     return InsertPos;
    213   }
    214 
    215   ClauseFile
    216   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
    217       const {
    218     MachineBasicBlock::iterator ClauseHead = I;
    219     std::vector<MachineInstr *> ClauseContent;
    220     I++;
    221     for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
    222       if (IsTrivialInst(I)) {
    223         ++I;
    224         continue;
    225       }
    226       if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
    227         break;
    228       std::vector<int64_t> Literals;
    229       if (I->isBundle()) {
    230         MachineInstr *DeleteMI = I;
    231         MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
    232         while (++BI != E && BI->isBundledWithPred()) {
    233           BI->unbundleFromPred();
    234           for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
    235             MachineOperand &MO = BI->getOperand(i);
    236             if (MO.isReg() && MO.isInternalRead())
    237               MO.setIsInternalRead(false);
    238           }
    239           getLiteral(BI, Literals);
    240           ClauseContent.push_back(BI);
    241         }
    242         I = BI;
    243         DeleteMI->eraseFromParent();
    244       } else {
    245         getLiteral(I, Literals);
    246         ClauseContent.push_back(I);
    247         I++;
    248       }
    249       for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
    250         unsigned literal0 = Literals[i];
    251         unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
    252         MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
    253             TII->get(AMDGPU::LITERALS))
    254             .addImm(literal0)
    255             .addImm(literal2);
    256         ClauseContent.push_back(MILit);
    257       }
    258     }
    259     assert(ClauseContent.size() < 128 && "ALU clause is too big");
    260     ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
    261     return ClauseFile(ClauseHead, ClauseContent);
    262   }
    263 
    264   void
    265   EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
    266       unsigned &CfCount) {
    267     CounterPropagateAddr(Clause.first, CfCount);
    268     MachineBasicBlock *BB = Clause.first->getParent();
    269     BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
    270         .addImm(CfCount);
    271     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
    272       BB->splice(InsertPos, BB, Clause.second[i]);
    273     }
    274     CfCount += 2 * Clause.second.size();
    275   }
    276 
    277   void
    278   EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
    279       unsigned &CfCount) {
    280     Clause.first->getOperand(0).setImm(0);
    281     CounterPropagateAddr(Clause.first, CfCount);
    282     MachineBasicBlock *BB = Clause.first->getParent();
    283     BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
    284         .addImm(CfCount);
    285     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
    286       BB->splice(InsertPos, BB, Clause.second[i]);
    287     }
    288     CfCount += Clause.second.size();
    289   }
    290 
    291   void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
    292     MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
    293   }
    294   void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
    295       const {
    296     for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
    297         It != E; ++It) {
    298       MachineInstr *MI = *It;
    299       CounterPropagateAddr(MI, Addr);
    300     }
    301   }
    302 
    303   unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
    304     switch (ST.getGeneration()) {
    305     case AMDGPUSubtarget::R600:
    306     case AMDGPUSubtarget::R700:
    307       if (hasPush)
    308         StackSubEntry += 2;
    309       break;
    310     case AMDGPUSubtarget::EVERGREEN:
    311       if (hasPush)
    312         StackSubEntry ++;
    313     case AMDGPUSubtarget::NORTHERN_ISLANDS:
    314       StackSubEntry += 2;
    315       break;
    316     default: llvm_unreachable("Not a VLIW4/VLIW5 GPU");
    317     }
    318     return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
    319   }
    320 
    321 public:
    322   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
    323     TII (0), TRI(0),
    324     ST(tm.getSubtarget<AMDGPUSubtarget>()) {
    325       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
    326       MaxFetchInst = ST.getTexVTXClauseSize();
    327   }
    328 
    329   virtual bool runOnMachineFunction(MachineFunction &MF) {
    330     TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
    331     TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
    332 
    333     unsigned MaxStack = 0;
    334     unsigned CurrentStack = 0;
    335     bool HasPush = false;
    336     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
    337         ++MB) {
    338       MachineBasicBlock &MBB = *MB;
    339       unsigned CfCount = 0;
    340       std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
    341       std::vector<MachineInstr * > IfThenElseStack;
    342       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
    343       if (MFI->ShaderType == 1) {
    344         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
    345             getHWInstrDesc(CF_CALL_FS));
    346         CfCount++;
    347         MaxStack = 1;
    348       }
    349       std::vector<ClauseFile> FetchClauses, AluClauses;
    350       std::vector<MachineInstr *> LastAlu(1);
    351       std::vector<MachineInstr *> ToPopAfter;
    352 
    353       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    354           I != E;) {
    355         if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
    356           DEBUG(dbgs() << CfCount << ":"; I->dump(););
    357           FetchClauses.push_back(MakeFetchClause(MBB, I));
    358           CfCount++;
    359           continue;
    360         }
    361 
    362         MachineBasicBlock::iterator MI = I;
    363         if (MI->getOpcode() != AMDGPU::ENDIF)
    364           LastAlu.back() = 0;
    365         if (MI->getOpcode() == AMDGPU::CF_ALU)
    366           LastAlu.back() = MI;
    367         I++;
    368         switch (MI->getOpcode()) {
    369         case AMDGPU::CF_ALU_PUSH_BEFORE:
    370           CurrentStack++;
    371           MaxStack = std::max(MaxStack, CurrentStack);
    372           HasPush = true;
    373         case AMDGPU::CF_ALU:
    374           I = MI;
    375           AluClauses.push_back(MakeALUClause(MBB, I));
    376         case AMDGPU::EG_ExportBuf:
    377         case AMDGPU::EG_ExportSwz:
    378         case AMDGPU::R600_ExportBuf:
    379         case AMDGPU::R600_ExportSwz:
    380         case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
    381         case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
    382         case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
    383         case AMDGPU::RAT_STORE_DWORD32_cm:
    384         case AMDGPU::RAT_STORE_DWORD64_cm:
    385           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
    386           CfCount++;
    387           break;
    388         case AMDGPU::WHILELOOP: {
    389           CurrentStack+=4;
    390           MaxStack = std::max(MaxStack, CurrentStack);
    391           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    392               getHWInstrDesc(CF_WHILE_LOOP))
    393               .addImm(1);
    394           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
    395               std::set<MachineInstr *>());
    396           Pair.second.insert(MIb);
    397           LoopStack.push_back(Pair);
    398           MI->eraseFromParent();
    399           CfCount++;
    400           break;
    401         }
    402         case AMDGPU::ENDLOOP: {
    403           CurrentStack-=4;
    404           std::pair<unsigned, std::set<MachineInstr *> > Pair =
    405               LoopStack.back();
    406           LoopStack.pop_back();
    407           CounterPropagateAddr(Pair.second, CfCount);
    408           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
    409               .addImm(Pair.first + 1);
    410           MI->eraseFromParent();
    411           CfCount++;
    412           break;
    413         }
    414         case AMDGPU::IF_PREDICATE_SET: {
    415           LastAlu.push_back(0);
    416           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    417               getHWInstrDesc(CF_JUMP))
    418               .addImm(0)
    419               .addImm(0);
    420           IfThenElseStack.push_back(MIb);
    421           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    422           MI->eraseFromParent();
    423           CfCount++;
    424           break;
    425         }
    426         case AMDGPU::ELSE: {
    427           MachineInstr * JumpInst = IfThenElseStack.back();
    428           IfThenElseStack.pop_back();
    429           CounterPropagateAddr(JumpInst, CfCount);
    430           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    431               getHWInstrDesc(CF_ELSE))
    432               .addImm(0)
    433               .addImm(0);
    434           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    435           IfThenElseStack.push_back(MIb);
    436           MI->eraseFromParent();
    437           CfCount++;
    438           break;
    439         }
    440         case AMDGPU::ENDIF: {
    441           CurrentStack--;
    442           if (LastAlu.back()) {
    443             ToPopAfter.push_back(LastAlu.back());
    444           } else {
    445             MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    446                 getHWInstrDesc(CF_POP))
    447                 .addImm(CfCount + 1)
    448                 .addImm(1);
    449             (void)MIb;
    450             DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    451             CfCount++;
    452           }
    453 
    454           MachineInstr *IfOrElseInst = IfThenElseStack.back();
    455           IfThenElseStack.pop_back();
    456           CounterPropagateAddr(IfOrElseInst, CfCount);
    457           IfOrElseInst->getOperand(1).setImm(1);
    458           LastAlu.pop_back();
    459           MI->eraseFromParent();
    460           break;
    461         }
    462         case AMDGPU::BREAK: {
    463           CfCount ++;
    464           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    465               getHWInstrDesc(CF_LOOP_BREAK))
    466               .addImm(0);
    467           LoopStack.back().second.insert(MIb);
    468           MI->eraseFromParent();
    469           break;
    470         }
    471         case AMDGPU::CONTINUE: {
    472           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    473               getHWInstrDesc(CF_LOOP_CONTINUE))
    474               .addImm(0);
    475           LoopStack.back().second.insert(MIb);
    476           MI->eraseFromParent();
    477           CfCount++;
    478           break;
    479         }
    480         case AMDGPU::RETURN: {
    481           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
    482           CfCount++;
    483           MI->eraseFromParent();
    484           if (CfCount % 2) {
    485             BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
    486             CfCount++;
    487           }
    488           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
    489             EmitFetchClause(I, FetchClauses[i], CfCount);
    490           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
    491             EmitALUClause(I, AluClauses[i], CfCount);
    492         }
    493         default:
    494           break;
    495         }
    496       }
    497       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
    498         MachineInstr *Alu = ToPopAfter[i];
    499         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
    500             TII->get(AMDGPU::CF_ALU_POP_AFTER))
    501             .addImm(Alu->getOperand(0).getImm())
    502             .addImm(Alu->getOperand(1).getImm())
    503             .addImm(Alu->getOperand(2).getImm())
    504             .addImm(Alu->getOperand(3).getImm())
    505             .addImm(Alu->getOperand(4).getImm())
    506             .addImm(Alu->getOperand(5).getImm())
    507             .addImm(Alu->getOperand(6).getImm())
    508             .addImm(Alu->getOperand(7).getImm())
    509             .addImm(Alu->getOperand(8).getImm());
    510         Alu->eraseFromParent();
    511       }
    512       MFI->StackSize = getHWStackSize(MaxStack, HasPush);
    513     }
    514 
    515     return false;
    516   }
    517 
    518   const char *getPassName() const {
    519     return "R600 Control Flow Finalizer Pass";
    520   }
    521 };
    522 
    523 char R600ControlFlowFinalizer::ID = 0;
    524 
    525 } // end anonymous namespace
    526 
    527 
    528 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
    529   return new R600ControlFlowFinalizer(TM);
    530 }
    531