Home | History | Annotate | Download | only in AMDGPU
      1 //===-- CaymanInstructions.td - CM Instruction defs  -------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // TableGen definitions for instructions which are available only on Cayman
     11 // family GPUs.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 def isCayman : Predicate<"Subtarget->hasCaymanISA()">;
     16 
     17 //===----------------------------------------------------------------------===//
     18 // Cayman Instructions
     19 //===----------------------------------------------------------------------===//
     20 
     21 let SubtargetPredicate = isCayman in {
     22 
     23 def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
     24   [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU
     25 >;
     26 def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
     27   [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU
     28 >;
     29 
     30 def : IMad24Pat<MULADD_INT24_cm>;
     31 
     32 let isVector = 1 in {
     33 
     34 def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
     35 
     36 def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
     37 def MULHI_INT_cm : MULHI_INT_Common<0x90>;
     38 def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
     39 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
     40 def MULHI_INT_cm24 : MULHI_INT24_Common<0x5c>;
     41 def MULHI_UINT_cm24 : MULHI_UINT24_Common<0xb2>;
     42 
     43 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
     44 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
     45 def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
     46 def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
     47 def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
     48 def SIN_cm : SIN_Common<0x8D>;
     49 def COS_cm : COS_Common<0x8E>;
     50 } // End isVector = 1
     51 
     52 def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
     53 
     54 def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
     55 
     56 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
     57 
     58 // RECIP_UINT emulation for Cayman
     59 // The multiplication scales from [0,1] to the unsigned integer range
     60 def : R600Pat <
     61   (AMDGPUurecip i32:$src0),
     62   (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
     63                             (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
     64 >;
     65 
     66 def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
     67     let ADDR = 0;
     68     let POP_COUNT = 0;
     69     let COUNT = 0;
     70   }
     71 
     72 
     73 
     74 def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
     75 
     76 class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
     77   CF_MEM_RAT_CACHELESS <0x14, 0, mask,
     78                         (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
     79                         "STORE_DWORD $rw_gpr, $index_gpr",
     80                         [(store_global vt:$rw_gpr, i32:$index_gpr)]> {
     81   let eop = 0; // This bit is not used on Cayman.
     82 }
     83 
     84 def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
     85 def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
     86 def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
     87 
     88 def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> {
     89   let eop = 0; // This bit is not used on Cayman.
     90 }
     91 
     92 class VTX_READ_cm <string name, dag outs>
     93     : VTX_WORD0_cm, VTX_READ<name, outs, []> {
     94 
     95   // Static fields
     96   let VC_INST = 0;
     97   let FETCH_TYPE = 2;
     98   let FETCH_WHOLE_QUAD = 0;
     99   let SRC_REL = 0;
    100   // XXX: We can infer this field based on the SRC_GPR.  This would allow us
    101   // to store vertex addresses in any channel, not just X.
    102   let SRC_SEL_X = 0;
    103   let SRC_SEL_Y = 0;
    104   let STRUCTURED_READ = 0;
    105   let LDS_REQ = 0;
    106   let COALESCED_READ = 0;
    107 
    108   let Inst{31-0} = Word0;
    109 }
    110 
    111 def VTX_READ_8_cm
    112     : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr",
    113                    (outs R600_TReg32_X:$dst_gpr)> {
    114 
    115   let DST_SEL_X = 0;
    116   let DST_SEL_Y = 7;   // Masked
    117   let DST_SEL_Z = 7;   // Masked
    118   let DST_SEL_W = 7;   // Masked
    119   let DATA_FORMAT = 1; // FMT_8
    120 }
    121 
    122 def VTX_READ_16_cm
    123     : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr",
    124                    (outs R600_TReg32_X:$dst_gpr)> {
    125   let DST_SEL_X = 0;
    126   let DST_SEL_Y = 7;   // Masked
    127   let DST_SEL_Z = 7;   // Masked
    128   let DST_SEL_W = 7;   // Masked
    129   let DATA_FORMAT = 5; // FMT_16
    130 
    131 }
    132 
    133 def VTX_READ_32_cm
    134     : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr",
    135                    (outs R600_TReg32_X:$dst_gpr)> {
    136 
    137   let DST_SEL_X        = 0;
    138   let DST_SEL_Y        = 7;   // Masked
    139   let DST_SEL_Z        = 7;   // Masked
    140   let DST_SEL_W        = 7;   // Masked
    141   let DATA_FORMAT      = 0xD; // COLOR_32
    142 
    143   // This is not really necessary, but there were some GPU hangs that appeared
    144   // to be caused by ALU instructions in the next instruction group that wrote
    145   // to the $src_gpr registers of the VTX_READ.
    146   // e.g.
    147   // %t3_x = VTX_READ_PARAM_32_eg killed %t2_x, 24
    148   // %t2_x = MOV %zero
    149   //Adding this constraint prevents this from happening.
    150   let Constraints = "$src_gpr.ptr = $dst_gpr";
    151 }
    152 
    153 def VTX_READ_64_cm
    154     : VTX_READ_cm <"VTX_READ_64 $dst_gpr.XY, $src_gpr",
    155                    (outs R600_Reg64:$dst_gpr)> {
    156 
    157   let DST_SEL_X        = 0;
    158   let DST_SEL_Y        = 1;
    159   let DST_SEL_Z        = 7;
    160   let DST_SEL_W        = 7;
    161   let DATA_FORMAT      = 0x1D; // COLOR_32_32
    162 }
    163 
    164 def VTX_READ_128_cm
    165     : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr",
    166                    (outs R600_Reg128:$dst_gpr)> {
    167 
    168   let DST_SEL_X        =  0;
    169   let DST_SEL_Y        =  1;
    170   let DST_SEL_Z        =  2;
    171   let DST_SEL_W        =  3;
    172   let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
    173 
    174   // XXX: Need to force VTX_READ_128 instructions to write to the same register
    175   // that holds its buffer address to avoid potential hangs.  We can't use
    176   // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
    177   // registers are different sizes.
    178 }
    179 
    180 //===----------------------------------------------------------------------===//
    181 // VTX Read from parameter memory space
    182 //===----------------------------------------------------------------------===//
    183 def : R600Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)),
    184           (VTX_READ_8_cm MEMxi:$src_gpr, 3)>;
    185 def : R600Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)),
    186           (VTX_READ_16_cm MEMxi:$src_gpr, 3)>;
    187 def : R600Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
    188           (VTX_READ_32_cm MEMxi:$src_gpr, 3)>;
    189 def : R600Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
    190           (VTX_READ_64_cm MEMxi:$src_gpr, 3)>;
    191 def : R600Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
    192           (VTX_READ_128_cm MEMxi:$src_gpr, 3)>;
    193 
    194 //===----------------------------------------------------------------------===//
    195 // VTX Read from constant memory space
    196 //===----------------------------------------------------------------------===//
    197 def : R600Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)),
    198           (VTX_READ_8_cm MEMxi:$src_gpr, 2)>;
    199 def : R600Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)),
    200           (VTX_READ_16_cm MEMxi:$src_gpr, 2)>;
    201 def : R600Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
    202           (VTX_READ_32_cm MEMxi:$src_gpr, 2)>;
    203 def : R600Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
    204           (VTX_READ_64_cm MEMxi:$src_gpr, 2)>;
    205 def : R600Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
    206           (VTX_READ_128_cm MEMxi:$src_gpr, 2)>;
    207 
    208 //===----------------------------------------------------------------------===//
    209 // VTX Read from global memory space
    210 //===----------------------------------------------------------------------===//
    211 def : R600Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)),
    212           (VTX_READ_8_cm MEMxi:$src_gpr, 1)>;
    213 def : R600Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)),
    214           (VTX_READ_16_cm MEMxi:$src_gpr, 1)>;
    215 def : R600Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
    216           (VTX_READ_32_cm MEMxi:$src_gpr, 1)>;
    217 def : R600Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
    218           (VTX_READ_64_cm MEMxi:$src_gpr, 1)>;
    219 def : R600Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
    220           (VTX_READ_128_cm MEMxi:$src_gpr, 1)>;
    221 
    222 } // End let SubtargetPredicate = isCayman
    223