Home | History | Annotate | Download | only in AMDGPU
      1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Implementation of the TargetInstrInfo class that is common to all
     12 /// AMD GPUs.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AMDGPUInstrInfo.h"
     17 #include "AMDGPURegisterInfo.h"
     18 #include "AMDGPUTargetMachine.h"
     19 #include "llvm/CodeGen/MachineFrameInfo.h"
     20 #include "llvm/CodeGen/MachineInstrBuilder.h"
     21 #include "llvm/CodeGen/MachineRegisterInfo.h"
     22 
     23 using namespace llvm;
     24 
     25 #define GET_INSTRINFO_CTOR_DTOR
     26 #define GET_INSTRINFO_NAMED_OPS
     27 #define GET_INSTRMAP_INFO
     28 #include "AMDGPUGenInstrInfo.inc"
     29 
     30 // Pin the vtable to this file.
     31 void AMDGPUInstrInfo::anchor() {}
     32 
     33 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
     34   : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
     35 
     36 bool AMDGPUInstrInfo::enableClusterLoads() const {
     37   return true;
     38 }
     39 
     40 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
     41 // the first 16 loads will be interleaved with the stores, and the next 16 will
     42 // be clustered as expected. It should really split into 2 16 store batches.
     43 //
     44 // Loads are clustered until this returns false, rather than trying to schedule
     45 // groups of stores. This also means we have to deal with saying different
     46 // address space loads should be clustered, and ones which might cause bank
     47 // conflicts.
     48 //
     49 // This might be deprecated so it might not be worth that much effort to fix.
     50 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
     51                                               int64_t Offset0, int64_t Offset1,
     52                                               unsigned NumLoads) const {
     53   assert(Offset1 > Offset0 &&
     54          "Second offset should be larger than first offset!");
     55   // If we have less than 16 loads in a row, and the offsets are within 64
     56   // bytes, then schedule together.
     57 
     58   // A cacheline is 64 bytes (for global memory).
     59   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
     60 }
     61 
     62 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
     63   switch (Channels) {
     64   default: return Opcode;
     65   case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
     66   case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
     67   case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
     68   }
     69 }
     70 
     71 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
     72 enum SIEncodingFamily {
     73   SI = 0,
     74   VI = 1
     75 };
     76 
     77 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
     78 // header files, so we need to wrap it in a function that takes unsigned
     79 // instead.
     80 namespace llvm {
     81 namespace AMDGPU {
     82 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
     83   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
     84 }
     85 }
     86 }
     87 
     88 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
     89   switch (ST.getGeneration()) {
     90   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
     91   case AMDGPUSubtarget::SEA_ISLANDS:
     92     return SIEncodingFamily::SI;
     93   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
     94     return SIEncodingFamily::VI;
     95 
     96   // FIXME: This should never be called for r600 GPUs.
     97   case AMDGPUSubtarget::R600:
     98   case AMDGPUSubtarget::R700:
     99   case AMDGPUSubtarget::EVERGREEN:
    100   case AMDGPUSubtarget::NORTHERN_ISLANDS:
    101     return SIEncodingFamily::SI;
    102   }
    103 
    104   llvm_unreachable("Unknown subtarget generation!");
    105 }
    106 
    107 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
    108   int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
    109 
    110   // -1 means that Opcode is already a native instruction.
    111   if (MCOp == -1)
    112     return Opcode;
    113 
    114   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
    115   // no encoding in the given subtarget generation.
    116   if (MCOp == (uint16_t)-1)
    117     return -1;
    118 
    119   return MCOp;
    120 }
    121