Home | History | Annotate | Download | only in R600
      1 //===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // R600 Tablegen instruction definitions
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 include "R600Intrinsics.td"
     15 include "R600InstrFormats.td"
     16 
     17 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
     18     InstR600 <outs, ins, asm, pattern, NullALU> {
     19 
     20   let Namespace = "AMDGPU";
     21 }
     22 
     23 def MEMxi : Operand<iPTR> {
     24   let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
     25   let PrintMethod = "printMemOperand";
     26 }
     27 
     28 def MEMrr : Operand<iPTR> {
     29   let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
     30 }
     31 
     32 // Operands for non-registers
     33 
     34 class InstFlag<string PM = "printOperand", int Default = 0>
     35     : OperandWithDefaultOps <i32, (ops (i32 Default))> {
     36   let PrintMethod = PM;
     37 }
     38 
     39 // src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
     40 def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
     41   let PrintMethod = "printSel";
     42 }
     43 def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
     44   let PrintMethod = "printBankSwizzle";
     45 }
     46 
     47 def LITERAL : InstFlag<"printLiteral">;
     48 
     49 def WRITE : InstFlag <"printWrite", 1>;
     50 def OMOD : InstFlag <"printOMOD">;
     51 def REL : InstFlag <"printRel">;
     52 def CLAMP : InstFlag <"printClamp">;
     53 def NEG : InstFlag <"printNeg">;
     54 def ABS : InstFlag <"printAbs">;
     55 def UEM : InstFlag <"printUpdateExecMask">;
     56 def UP : InstFlag <"printUpdatePred">;
     57 
     58 // XXX: The r600g finalizer in Mesa expects last to be one in most cases.
     59 // Once we start using the packetizer in this backend we should have this
     60 // default to 0.
     61 def LAST : InstFlag<"printLast", 1>;
     62 def RSel : Operand<i32> {
     63   let PrintMethod = "printRSel";
     64 }
     65 def CT: Operand<i32> {
     66   let PrintMethod = "printCT";
     67 }
     68 
     69 def FRAMEri : Operand<iPTR> {
     70   let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
     71 }
     72 
     73 def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
     74 def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
     75 def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
     76 def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
     77 def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
     78 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
     79 
     80 
     81 def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
     82                                      (ops PRED_SEL_OFF)>;
     83 
     84 
     85 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
     86 
     87 // Class for instructions with only one source register.
     88 // If you add new ins to this instruction, make sure they are listed before
     89 // $literal, because the backend currently assumes that the last operand is
     90 // a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
     91 // R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
     92 // and R600InstrInfo::getOperandIdx().
     93 class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
     94                 InstrItinClass itin = AnyALU> :
     95     InstR600 <(outs R600_Reg32:$dst),
     96               (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
     97                    R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
     98                    LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
     99                    BANK_SWIZZLE:$bank_swizzle),
    100               !strconcat("  ", opName,
    101                    "$clamp $last $dst$write$dst_rel$omod, "
    102                    "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
    103                    "$pred_sel $bank_swizzle"),
    104               pattern,
    105               itin>,
    106     R600ALU_Word0,
    107     R600ALU_Word1_OP2 <inst> {
    108 
    109   let src1 = 0;
    110   let src1_rel = 0;
    111   let src1_neg = 0;
    112   let src1_abs = 0;
    113   let update_exec_mask = 0;
    114   let update_pred = 0;
    115   let HasNativeOperands = 1;
    116   let Op1 = 1;
    117   let ALUInst = 1;
    118   let DisableEncoding = "$literal";
    119   let UseNamedOperandTable = 1;
    120 
    121   let Inst{31-0}  = Word0;
    122   let Inst{63-32} = Word1;
    123 }
    124 
    125 class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
    126                     InstrItinClass itin = AnyALU> :
    127     R600_1OP <inst, opName,
    128               [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
    129 >;
    130 
    131 // If you add or change the operands for R600_2OP instructions, you must
    132 // also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
    133 // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
    134 class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
    135                 InstrItinClass itin = AnyALU> :
    136   InstR600 <(outs R600_Reg32:$dst),
    137           (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
    138                OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
    139                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
    140                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
    141                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
    142                BANK_SWIZZLE:$bank_swizzle),
    143           !strconcat("  ", opName,
    144                 "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
    145                 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
    146                 "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
    147                 "$pred_sel $bank_swizzle"),
    148           pattern,
    149           itin>,
    150     R600ALU_Word0,
    151     R600ALU_Word1_OP2 <inst> {
    152 
    153   let HasNativeOperands = 1;
    154   let Op2 = 1;
    155   let ALUInst = 1;
    156   let DisableEncoding = "$literal";
    157   let UseNamedOperandTable = 1;
    158 
    159   let Inst{31-0}  = Word0;
    160   let Inst{63-32} = Word1;
    161 }
    162 
    163 class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
    164                        InstrItinClass itim = AnyALU> :
    165     R600_2OP <inst, opName,
    166               [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
    167                                            R600_Reg32:$src1))]
    168 >;
    169 
    170 // If you add our change the operands for R600_3OP instructions, you must
    171 // also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
    172 // R600InstrInfo::buildDefaultInstruction(), and
    173 // R600InstrInfo::getOperandIdx().
    174 class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
    175                 InstrItinClass itin = AnyALU> :
    176   InstR600 <(outs R600_Reg32:$dst),
    177           (ins REL:$dst_rel, CLAMP:$clamp,
    178                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
    179                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
    180                R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
    181                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
    182                BANK_SWIZZLE:$bank_swizzle),
    183           !strconcat("  ", opName, "$clamp $last $dst$dst_rel, "
    184                              "$src0_neg$src0$src0_rel, "
    185                              "$src1_neg$src1$src1_rel, "
    186                              "$src2_neg$src2$src2_rel, "
    187                              "$pred_sel"
    188                              "$bank_swizzle"),
    189           pattern,
    190           itin>,
    191     R600ALU_Word0,
    192     R600ALU_Word1_OP3<inst>{
    193 
    194   let HasNativeOperands = 1;
    195   let DisableEncoding = "$literal";
    196   let Op3 = 1;
    197   let UseNamedOperandTable = 1;
    198   let ALUInst = 1;
    199 
    200   let Inst{31-0}  = Word0;
    201   let Inst{63-32} = Word1;
    202 }
    203 
    204 class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
    205                       InstrItinClass itin = VecALU> :
    206   InstR600 <(outs R600_Reg32:$dst),
    207           ins,
    208           asm,
    209           pattern,
    210           itin>;
    211 
    212 
    213 
    214 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
    215 
    216 def TEX_SHADOW : PatLeaf<
    217   (imm),
    218   [{uint32_t TType = (uint32_t)N->getZExtValue();
    219     return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
    220   }]
    221 >;
    222 
    223 def TEX_RECT : PatLeaf<
    224   (imm),
    225   [{uint32_t TType = (uint32_t)N->getZExtValue();
    226     return TType == 5;
    227   }]
    228 >;
    229 
    230 def TEX_ARRAY : PatLeaf<
    231   (imm),
    232   [{uint32_t TType = (uint32_t)N->getZExtValue();
    233     return TType == 9 || TType == 10 || TType == 15 || TType == 16;
    234   }]
    235 >;
    236 
    237 def TEX_SHADOW_ARRAY : PatLeaf<
    238   (imm),
    239   [{uint32_t TType = (uint32_t)N->getZExtValue();
    240     return TType == 11 || TType == 12 || TType == 17;
    241   }]
    242 >;
    243 
    244 class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
    245                  dag ins, string asm, list<dag> pattern> :
    246     InstR600ISA <outs, ins, asm, pattern>,
    247     CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF  {
    248 
    249   let rat_id = 0;
    250   let rat_inst = ratinst;
    251   let rim         = 0;
    252   // XXX: Have a separate instruction for non-indexed writes.
    253   let type        = 1;
    254   let rw_rel      = 0;
    255   let elem_size   = 0;
    256 
    257   let array_size  = 0;
    258   let comp_mask   = mask;
    259   let burst_count = 0;
    260   let vpm         = 0;
    261   let cf_inst = cfinst;
    262   let mark        = 0;
    263   let barrier     = 1;
    264 
    265   let Inst{31-0} = Word0;
    266   let Inst{63-32} = Word1;
    267 
    268 }
    269 
    270 class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
    271     : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>,
    272       VTX_WORD1_GPR {
    273 
    274   // Static fields
    275   let DST_REL = 0;
    276   // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
    277   // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
    278   // however, based on my testing if USE_CONST_FIELDS is set, then all
    279   // these fields need to be set to 0.
    280   let USE_CONST_FIELDS = 0;
    281   let NUM_FORMAT_ALL = 1;
    282   let FORMAT_COMP_ALL = 0;
    283   let SRF_MODE_ALL = 0;
    284 
    285   let Inst{63-32} = Word1;
    286   // LLVM can only encode 64-bit instructions, so these fields are manually
    287   // encoded in R600CodeEmitter
    288   //
    289   // bits<16> OFFSET;
    290   // bits<2>  ENDIAN_SWAP = 0;
    291   // bits<1>  CONST_BUF_NO_STRIDE = 0;
    292   // bits<1>  MEGA_FETCH = 0;
    293   // bits<1>  ALT_CONST = 0;
    294   // bits<2>  BUFFER_INDEX_MODE = 0;
    295 
    296   // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
    297   // is done in R600CodeEmitter
    298   //
    299   // Inst{79-64} = OFFSET;
    300   // Inst{81-80} = ENDIAN_SWAP;
    301   // Inst{82}    = CONST_BUF_NO_STRIDE;
    302   // Inst{83}    = MEGA_FETCH;
    303   // Inst{84}    = ALT_CONST;
    304   // Inst{86-85} = BUFFER_INDEX_MODE;
    305   // Inst{95-86} = 0; Reserved
    306 
    307   // VTX_WORD3 (Padding)
    308   //
    309   // Inst{127-96} = 0;
    310 
    311   let VTXInst = 1;
    312 }
    313 
    314 class LoadParamFrag <PatFrag load_type> : PatFrag <
    315   (ops node:$ptr), (load_type node:$ptr),
    316   [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
    317 >;
    318 
    319 def load_param : LoadParamFrag<load>;
    320 def load_param_exti8 : LoadParamFrag<az_extloadi8>;
    321 def load_param_exti16 : LoadParamFrag<az_extloadi16>;
    322 
    323 def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">;
    324 def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">;
    325 def isEG : Predicate<
    326   "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
    327   "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
    328   "!Subtarget.hasCaymanISA()">;
    329 
    330 def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
    331 def isEGorCayman : Predicate<"Subtarget.getGeneration() == "
    332                              "AMDGPUSubtarget::EVERGREEN"
    333                             "|| Subtarget.getGeneration() =="
    334                             "AMDGPUSubtarget::NORTHERN_ISLANDS">;
    335 
    336 def isR600toCayman : Predicate<
    337                      "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
    338 
    339 //===----------------------------------------------------------------------===//
    340 // R600 SDNodes
    341 //===----------------------------------------------------------------------===//
    342 
    343 def INTERP_PAIR_XY :  AMDGPUShaderInst <
    344   (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
    345   (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
    346   "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
    347   []>;
    348 
    349 def INTERP_PAIR_ZW :  AMDGPUShaderInst <
    350   (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
    351   (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
    352   "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
    353   []>;
    354 
    355 def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
    356   SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
    357   [SDNPVariadic]
    358 >;
    359 
    360 def DOT4 : SDNode<"AMDGPUISD::DOT4",
    361   SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
    362       SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
    363       SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
    364   []
    365 >;
    366 
    367 def COS_HW : SDNode<"AMDGPUISD::COS_HW",
    368   SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
    369 >;
    370 
    371 def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
    372   SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
    373 >;
    374 
    375 def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
    376 
    377 def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
    378 
    379 multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
    380 def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
    381           (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
    382           (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
    383           (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
    384           (i32 imm:$DST_SEL_W),
    385           (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
    386           (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
    387           (i32 imm:$COORD_TYPE_W)),
    388           (inst R600_Reg128:$SRC_GPR,
    389           imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
    390           imm:$offsetx, imm:$offsety, imm:$offsetz,
    391           imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
    392           imm:$DST_SEL_W,
    393           imm:$RESOURCE_ID, imm:$SAMPLER_ID,
    394           imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
    395           imm:$COORD_TYPE_W)>;
    396 }
    397 
    398 //===----------------------------------------------------------------------===//
    399 // Interpolation Instructions
    400 //===----------------------------------------------------------------------===//
    401 
    402 def INTERP_VEC_LOAD :  AMDGPUShaderInst <
    403   (outs R600_Reg128:$dst),
    404   (ins i32imm:$src0),
    405   "INTERP_LOAD $src0 : $dst",
    406   []>;
    407 
    408 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
    409   let bank_swizzle = 5;
    410 }
    411 
    412 def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
    413   let bank_swizzle = 5;
    414 }
    415 
    416 def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
    417 
    418 //===----------------------------------------------------------------------===//
    419 // Export Instructions
    420 //===----------------------------------------------------------------------===//
    421 
    422 def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
    423 
    424 def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
    425   [SDNPHasChain, SDNPSideEffect]>;
    426 
    427 class ExportWord0 {
    428   field bits<32> Word0;
    429 
    430   bits<13> arraybase;
    431   bits<2> type;
    432   bits<7> gpr;
    433   bits<2> elem_size;
    434 
    435   let Word0{12-0} = arraybase;
    436   let Word0{14-13} = type;
    437   let Word0{21-15} = gpr;
    438   let Word0{22} = 0; // RW_REL
    439   let Word0{29-23} = 0; // INDEX_GPR
    440   let Word0{31-30} = elem_size;
    441 }
    442 
    443 class ExportSwzWord1 {
    444   field bits<32> Word1;
    445 
    446   bits<3> sw_x;
    447   bits<3> sw_y;
    448   bits<3> sw_z;
    449   bits<3> sw_w;
    450   bits<1> eop;
    451   bits<8> inst;
    452 
    453   let Word1{2-0} = sw_x;
    454   let Word1{5-3} = sw_y;
    455   let Word1{8-6} = sw_z;
    456   let Word1{11-9} = sw_w;
    457 }
    458 
    459 class ExportBufWord1 {
    460   field bits<32> Word1;
    461 
    462   bits<12> arraySize;
    463   bits<4> compMask;
    464   bits<1> eop;
    465   bits<8> inst;
    466 
    467   let Word1{11-0} = arraySize;
    468   let Word1{15-12} = compMask;
    469 }
    470 
    471 multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
    472   def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
    473     (ExportInst
    474         (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
    475         0, 61, 0, 7, 7, 7, cf_inst, 0)
    476   >;
    477 
    478   def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
    479     (ExportInst
    480         (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
    481         0, 61, 7, 0, 7, 7, cf_inst, 0)
    482   >;
    483 
    484   def : Pat<(int_R600_store_dummy (i32 imm:$type)),
    485     (ExportInst
    486         (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
    487   >;
    488 
    489   def : Pat<(int_R600_store_dummy 1),
    490     (ExportInst
    491         (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
    492   >;
    493 
    494   def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
    495     (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
    496         (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
    497         imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
    498   >;
    499 
    500 }
    501 
    502 multiclass SteamOutputExportPattern<Instruction ExportInst,
    503     bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
    504 // Stream0
    505   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    506       (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
    507       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    508       4095, imm:$mask, buf0inst, 0)>;
    509 // Stream1
    510   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    511       (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
    512       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    513       4095, imm:$mask, buf1inst, 0)>;
    514 // Stream2
    515   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    516       (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
    517       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    518       4095, imm:$mask, buf2inst, 0)>;
    519 // Stream3
    520   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    521       (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
    522       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    523       4095, imm:$mask, buf3inst, 0)>;
    524 }
    525 
    526 // Export Instructions should not be duplicated by TailDuplication pass
    527 // (which assumes that duplicable instruction are affected by exec mask)
    528 let usesCustomInserter = 1, isNotDuplicable = 1 in {
    529 
    530 class ExportSwzInst : InstR600ISA<(
    531     outs),
    532     (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
    533     RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst,
    534     i32imm:$eop),
    535     !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"),
    536     []>, ExportWord0, ExportSwzWord1 {
    537   let elem_size = 3;
    538   let Inst{31-0} = Word0;
    539   let Inst{63-32} = Word1;
    540 }
    541 
    542 } // End usesCustomInserter = 1
    543 
    544 class ExportBufInst : InstR600ISA<(
    545     outs),
    546     (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
    547     i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
    548     !strconcat("EXPORT", " $gpr"),
    549     []>, ExportWord0, ExportBufWord1 {
    550   let elem_size = 0;
    551   let Inst{31-0} = Word0;
    552   let Inst{63-32} = Word1;
    553 }
    554 
    555 //===----------------------------------------------------------------------===//
    556 // Control Flow Instructions
    557 //===----------------------------------------------------------------------===//
    558 
    559 
    560 def KCACHE : InstFlag<"printKCache">;
    561 
    562 class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
    563 (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
    564 KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
    565 i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
    566 i32imm:$COUNT, i32imm:$Enabled),
    567 !strconcat(OpName, " $COUNT, @$ADDR, "
    568 "KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
    569 [] >, CF_ALU_WORD0, CF_ALU_WORD1 {
    570   field bits<64> Inst;
    571 
    572   let CF_INST = inst;
    573   let ALT_CONST = 0;
    574   let WHOLE_QUAD_MODE = 0;
    575   let BARRIER = 1;
    576 
    577   let Inst{31-0} = Word0;
    578   let Inst{63-32} = Word1;
    579 }
    580 
    581 class CF_WORD0_R600 {
    582   field bits<32> Word0;
    583 
    584   bits<32> ADDR;
    585 
    586   let Word0 = ADDR;
    587 }
    588 
    589 class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
    590 ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
    591   field bits<64> Inst;
    592   bits<4> CNT;
    593 
    594   let CF_INST = inst;
    595   let BARRIER = 1;
    596   let CF_CONST = 0;
    597   let VALID_PIXEL_MODE = 0;
    598   let COND = 0;
    599   let COUNT = CNT{2-0};
    600   let CALL_COUNT = 0;
    601   let COUNT_3 = CNT{3};
    602   let END_OF_PROGRAM = 0;
    603   let WHOLE_QUAD_MODE = 0;
    604 
    605   let Inst{31-0} = Word0;
    606   let Inst{63-32} = Word1;
    607 }
    608 
    609 class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
    610 ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
    611   field bits<64> Inst;
    612 
    613   let CF_INST = inst;
    614   let BARRIER = 1;
    615   let JUMPTABLE_SEL = 0;
    616   let CF_CONST = 0;
    617   let VALID_PIXEL_MODE = 0;
    618   let COND = 0;
    619   let END_OF_PROGRAM = 0;
    620 
    621   let Inst{31-0} = Word0;
    622   let Inst{63-32} = Word1;
    623 }
    624 
    625 def CF_ALU : ALU_CLAUSE<8, "ALU">;
    626 def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
    627 def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
    628 
    629 def FETCH_CLAUSE : AMDGPUInst <(outs),
    630 (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
    631   field bits<8> Inst;
    632   bits<8> num;
    633   let Inst = num;
    634 }
    635 
    636 def ALU_CLAUSE : AMDGPUInst <(outs),
    637 (ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
    638   field bits<8> Inst;
    639   bits<8> num;
    640   let Inst = num;
    641 }
    642 
    643 def LITERALS : AMDGPUInst <(outs),
    644 (ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
    645   field bits<64> Inst;
    646   bits<32> literal1;
    647   bits<32> literal2;
    648 
    649   let Inst{31-0} = literal1;
    650   let Inst{63-32} = literal2;
    651 }
    652 
    653 def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
    654   field bits<64> Inst;
    655 }
    656 
    657 let Predicates = [isR600toCayman] in {
    658 
    659 //===----------------------------------------------------------------------===//
    660 // Common Instructions R600, R700, Evergreen, Cayman
    661 //===----------------------------------------------------------------------===//
    662 
    663 def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
    664 // Non-IEEE MUL: 0 * anything = 0
    665 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
    666 def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
    667 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
    668 def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
    669 
    670 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
    671 // so some of the instruction names don't match the asm string.
    672 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
    673 def SETE : R600_2OP <
    674   0x08, "SETE",
    675   [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
    676 >;
    677 
    678 def SGT : R600_2OP <
    679   0x09, "SETGT",
    680   [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
    681 >;
    682 
    683 def SGE : R600_2OP <
    684   0xA, "SETGE",
    685   [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
    686 >;
    687 
    688 def SNE : R600_2OP <
    689   0xB, "SETNE",
    690   [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
    691 >;
    692 
    693 def SETE_DX10 : R600_2OP <
    694   0xC, "SETE_DX10",
    695   [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
    696 >;
    697 
    698 def SETGT_DX10 : R600_2OP <
    699   0xD, "SETGT_DX10",
    700   [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
    701 >;
    702 
    703 def SETGE_DX10 : R600_2OP <
    704   0xE, "SETGE_DX10",
    705   [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
    706 >;
    707 
    708 def SETNE_DX10 : R600_2OP <
    709   0xF, "SETNE_DX10",
    710   [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
    711 >;
    712 
    713 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
    714 def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
    715 def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
    716 def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
    717 def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
    718 
    719 def MOV : R600_1OP <0x19, "MOV", []>;
    720 
    721 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
    722 
    723 class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
    724   (outs R600_Reg32:$dst),
    725   (ins immType:$imm),
    726   "",
    727   []
    728 >;
    729 
    730 } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
    731 
    732 def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
    733 def : Pat <
    734   (imm:$val),
    735   (MOV_IMM_I32 imm:$val)
    736 >;
    737 
    738 def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
    739 def : Pat <
    740   (fpimm:$val),
    741   (MOV_IMM_F32  fpimm:$val)
    742 >;
    743 
    744 def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
    745 def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
    746 def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
    747 def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
    748 
    749 let hasSideEffects = 1 in {
    750 
    751 def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
    752 
    753 } // end hasSideEffects
    754 
    755 def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
    756 def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
    757 def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
    758 def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
    759 def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
    760 def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
    761 def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
    762 def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
    763 def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
    764 def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
    765 
    766 def SETE_INT : R600_2OP <
    767   0x3A, "SETE_INT",
    768   [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
    769 >;
    770 
    771 def SETGT_INT : R600_2OP <
    772   0x3B, "SETGT_INT",
    773   [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
    774 >;
    775 
    776 def SETGE_INT : R600_2OP <
    777   0x3C, "SETGE_INT",
    778   [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
    779 >;
    780 
    781 def SETNE_INT : R600_2OP <
    782   0x3D, "SETNE_INT",
    783   [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
    784 >;
    785 
    786 def SETGT_UINT : R600_2OP <
    787   0x3E, "SETGT_UINT",
    788   [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
    789 >;
    790 
    791 def SETGE_UINT : R600_2OP <
    792   0x3F, "SETGE_UINT",
    793   [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
    794 >;
    795 
    796 def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
    797 def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
    798 def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
    799 def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
    800 
    801 def CNDE_INT : R600_3OP <
    802   0x1C, "CNDE_INT",
    803   [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
    804 >;
    805 
    806 def CNDGE_INT : R600_3OP <
    807   0x1E, "CNDGE_INT",
    808   [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
    809 >;
    810 
    811 def CNDGT_INT : R600_3OP <
    812   0x1D, "CNDGT_INT",
    813   [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
    814 >;
    815 
    816 //===----------------------------------------------------------------------===//
    817 // Texture instructions
    818 //===----------------------------------------------------------------------===//
    819 
    820 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
    821 
    822 class R600_TEX <bits<11> inst, string opName> :
    823   InstR600 <(outs R600_Reg128:$DST_GPR),
    824           (ins R600_Reg128:$SRC_GPR,
    825           RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
    826           i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
    827           RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
    828           i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
    829           CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
    830           CT:$COORD_TYPE_W),
    831           !strconcat(opName,
    832           " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
    833           "$SRC_GPR.$srcx$srcy$srcz$srcw "
    834           "RID:$RESOURCE_ID SID:$SAMPLER_ID "
    835           "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
    836           [],
    837           NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
    838   let Inst{31-0} = Word0;
    839   let Inst{63-32} = Word1;
    840 
    841   let TEX_INST = inst{4-0};
    842   let SRC_REL = 0;
    843   let DST_REL = 0;
    844   let LOD_BIAS = 0;
    845 
    846   let INST_MOD = 0;
    847   let FETCH_WHOLE_QUAD = 0;
    848   let ALT_CONST = 0;
    849   let SAMPLER_INDEX_MODE = 0;
    850   let RESOURCE_INDEX_MODE = 0;
    851 
    852   let TEXInst = 1;
    853 }
    854 
    855 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
    856 
    857 
    858 
    859 def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
    860 def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
    861 def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
    862 def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
    863 def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
    864 def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
    865 def TEX_LD : R600_TEX <0x03, "TEX_LD">;
    866 def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
    867 def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
    868 def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
    869 def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
    870 def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
    871 def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
    872 def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
    873 
    874 defm : TexPattern<0, TEX_SAMPLE>;
    875 defm : TexPattern<1, TEX_SAMPLE_C>;
    876 defm : TexPattern<2, TEX_SAMPLE_L>;
    877 defm : TexPattern<3, TEX_SAMPLE_C_L>;
    878 defm : TexPattern<4, TEX_SAMPLE_LB>;
    879 defm : TexPattern<5, TEX_SAMPLE_C_LB>;
    880 defm : TexPattern<6, TEX_LD, v4i32>;
    881 defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
    882 defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
    883 defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
    884 
    885 //===----------------------------------------------------------------------===//
    886 // Helper classes for common instructions
    887 //===----------------------------------------------------------------------===//
    888 
    889 class MUL_LIT_Common <bits<5> inst> : R600_3OP <
    890   inst, "MUL_LIT",
    891   []
    892 >;
    893 
    894 class MULADD_Common <bits<5> inst> : R600_3OP <
    895   inst, "MULADD",
    896   []
    897 >;
    898 
    899 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
    900   inst, "MULADD_IEEE",
    901   [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
    902 >;
    903 
    904 class CNDE_Common <bits<5> inst> : R600_3OP <
    905   inst, "CNDE",
    906   [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
    907 >;
    908 
    909 class CNDGT_Common <bits<5> inst> : R600_3OP <
    910   inst, "CNDGT",
    911   [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
    912 >;
    913 
    914 class CNDGE_Common <bits<5> inst> : R600_3OP <
    915   inst, "CNDGE",
    916   [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
    917 >;
    918 
    919 
    920 let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
    921 class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
    922 // Slot X
    923    UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
    924    OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X,
    925    R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X,
    926    R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X,
    927    R600_Pred:$pred_sel_X,
    928 // Slot Y
    929    UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y,
    930    OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y,
    931    R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y,
    932    R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y,
    933    R600_Pred:$pred_sel_Y,
    934 // Slot Z
    935    UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z,
    936    OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z,
    937    R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z,
    938    R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z,
    939    R600_Pred:$pred_sel_Z,
    940 // Slot W
    941    UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W,
    942    OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W,
    943    R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W,
    944    R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W,
    945    R600_Pred:$pred_sel_W,
    946    LITERAL:$literal0, LITERAL:$literal1),
    947   "",
    948   pattern,
    949   AnyALU> {
    950 
    951   let UseNamedOperandTable = 1;
    952 
    953 }
    954 }
    955 
    956 def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4
    957   R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X,
    958   R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y,
    959   R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z,
    960   R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>;
    961 
    962 
    963 class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>;
    964 
    965 
    966 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
    967 multiclass CUBE_Common <bits<11> inst> {
    968 
    969   def _pseudo : InstR600 <
    970     (outs R600_Reg128:$dst),
    971     (ins R600_Reg128:$src0),
    972     "CUBE $dst $src0",
    973     [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
    974     VecALU
    975   > {
    976     let isPseudo = 1;
    977     let UseNamedOperandTable = 1;
    978   }
    979 
    980   def _real : R600_2OP <inst, "CUBE", []>;
    981 }
    982 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
    983 
    984 class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
    985   inst, "EXP_IEEE", fexp2
    986 > {
    987   let TransOnly = 1;
    988   let Itinerary = TransALU;
    989 }
    990 
    991 class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
    992   inst, "FLT_TO_INT", fp_to_sint
    993 > {
    994   let TransOnly = 1;
    995   let Itinerary = TransALU;
    996 }
    997 
    998 class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
    999   inst, "INT_TO_FLT", sint_to_fp
   1000 > {
   1001   let TransOnly = 1;
   1002   let Itinerary = TransALU;
   1003 }
   1004 
   1005 class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   1006   inst, "FLT_TO_UINT", fp_to_uint
   1007 > {
   1008   let TransOnly = 1;
   1009   let Itinerary = TransALU;
   1010 }
   1011 
   1012 class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   1013   inst, "UINT_TO_FLT", uint_to_fp
   1014 > {
   1015   let TransOnly = 1;
   1016   let Itinerary = TransALU;
   1017 }
   1018 
   1019 class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
   1020   inst, "LOG_CLAMPED", []
   1021 >;
   1022 
   1023 class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   1024   inst, "LOG_IEEE", flog2
   1025 > {
   1026   let TransOnly = 1;
   1027   let Itinerary = TransALU;
   1028 }
   1029 
   1030 class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
   1031 class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
   1032 class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
   1033 class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
   1034   inst, "MULHI_INT", mulhs
   1035 > {
   1036   let TransOnly = 1;
   1037   let Itinerary = TransALU;
   1038 }
   1039 class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
   1040   inst, "MULHI", mulhu
   1041 > {
   1042   let TransOnly = 1;
   1043   let Itinerary = TransALU;
   1044 }
   1045 class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
   1046   inst, "MULLO_INT", mul
   1047 > {
   1048   let TransOnly = 1;
   1049   let Itinerary = TransALU;
   1050 }
   1051 class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
   1052   let TransOnly = 1;
   1053   let Itinerary = TransALU;
   1054 }
   1055 
   1056 class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
   1057   inst, "RECIP_CLAMPED", []
   1058 > {
   1059   let TransOnly = 1;
   1060   let Itinerary = TransALU;
   1061 }
   1062 
   1063 class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
   1064   inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
   1065 > {
   1066   let TransOnly = 1;
   1067   let Itinerary = TransALU;
   1068 }
   1069 
   1070 class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   1071   inst, "RECIP_UINT", AMDGPUurecip
   1072 > {
   1073   let TransOnly = 1;
   1074   let Itinerary = TransALU;
   1075 }
   1076 
   1077 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
   1078   inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
   1079 > {
   1080   let TransOnly = 1;
   1081   let Itinerary = TransALU;
   1082 }
   1083 
   1084 class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
   1085   inst, "RECIPSQRT_IEEE", []
   1086 > {
   1087   let TransOnly = 1;
   1088   let Itinerary = TransALU;
   1089 }
   1090 
   1091 class SIN_Common <bits<11> inst> : R600_1OP <
   1092   inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
   1093   let Trig = 1;
   1094   let TransOnly = 1;
   1095   let Itinerary = TransALU;
   1096 }
   1097 
   1098 class COS_Common <bits<11> inst> : R600_1OP <
   1099   inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
   1100   let Trig = 1;
   1101   let TransOnly = 1;
   1102   let Itinerary = TransALU;
   1103 }
   1104 
   1105 //===----------------------------------------------------------------------===//
   1106 // Helper patterns for complex intrinsics
   1107 //===----------------------------------------------------------------------===//
   1108 
   1109 multiclass DIV_Common <InstR600 recip_ieee> {
   1110 def : Pat<
   1111   (int_AMDGPU_div f32:$src0, f32:$src1),
   1112   (MUL_IEEE $src0, (recip_ieee $src1))
   1113 >;
   1114 
   1115 def : Pat<
   1116   (fdiv f32:$src0, f32:$src1),
   1117   (MUL_IEEE $src0, (recip_ieee $src1))
   1118 >;
   1119 }
   1120 
   1121 class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
   1122   : Pat <
   1123   (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
   1124   (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
   1125 >;
   1126 
   1127 //===----------------------------------------------------------------------===//
   1128 // R600 / R700 Instructions
   1129 //===----------------------------------------------------------------------===//
   1130 
   1131 let Predicates = [isR600] in {
   1132 
   1133   def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
   1134   def MULADD_r600 : MULADD_Common<0x10>;
   1135   def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
   1136   def CNDE_r600 : CNDE_Common<0x18>;
   1137   def CNDGT_r600 : CNDGT_Common<0x19>;
   1138   def CNDGE_r600 : CNDGE_Common<0x1A>;
   1139   def DOT4_r600 : DOT4_Common<0x50>;
   1140   defm CUBE_r600 : CUBE_Common<0x52>;
   1141   def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
   1142   def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
   1143   def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
   1144   def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
   1145   def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
   1146   def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
   1147   def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
   1148   def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
   1149   def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
   1150   def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
   1151   def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
   1152   def SIN_r600 : SIN_Common<0x6E>;
   1153   def COS_r600 : COS_Common<0x6F>;
   1154   def ASHR_r600 : ASHR_Common<0x70>;
   1155   def LSHR_r600 : LSHR_Common<0x71>;
   1156   def LSHL_r600 : LSHL_Common<0x72>;
   1157   def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
   1158   def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
   1159   def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
   1160   def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
   1161   def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
   1162 
   1163   defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
   1164   def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
   1165   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
   1166 
   1167   def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
   1168 
   1169   def R600_ExportSwz : ExportSwzInst {
   1170     let Word1{20-17} = 0; // BURST_COUNT
   1171     let Word1{21} = eop;
   1172     let Word1{22} = 1; // VALID_PIXEL_MODE
   1173     let Word1{30-23} = inst;
   1174     let Word1{31} = 1; // BARRIER
   1175   }
   1176   defm : ExportPattern<R600_ExportSwz, 39>;
   1177 
   1178   def R600_ExportBuf : ExportBufInst {
   1179     let Word1{20-17} = 0; // BURST_COUNT
   1180     let Word1{21} = eop;
   1181     let Word1{22} = 1; // VALID_PIXEL_MODE
   1182     let Word1{30-23} = inst;
   1183     let Word1{31} = 1; // BARRIER
   1184   }
   1185   defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
   1186 
   1187   def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
   1188   "TEX $CNT @$ADDR"> {
   1189     let POP_COUNT = 0;
   1190   }
   1191   def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
   1192   "VTX $CNT @$ADDR"> {
   1193     let POP_COUNT = 0;
   1194   }
   1195   def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
   1196   "LOOP_START_DX10 @$ADDR"> {
   1197     let POP_COUNT = 0;
   1198     let CNT = 0;
   1199   }
   1200   def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
   1201     let POP_COUNT = 0;
   1202     let CNT = 0;
   1203   }
   1204   def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
   1205   "LOOP_BREAK @$ADDR"> {
   1206     let POP_COUNT = 0;
   1207     let CNT = 0;
   1208   }
   1209   def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
   1210   "CONTINUE @$ADDR"> {
   1211     let POP_COUNT = 0;
   1212     let CNT = 0;
   1213   }
   1214   def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   1215   "JUMP @$ADDR POP:$POP_COUNT"> {
   1216     let CNT = 0;
   1217   }
   1218   def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   1219   "ELSE @$ADDR POP:$POP_COUNT"> {
   1220     let CNT = 0;
   1221   }
   1222   def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
   1223     let ADDR = 0;
   1224     let CNT = 0;
   1225     let POP_COUNT = 0;
   1226   }
   1227   def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   1228   "POP @$ADDR POP:$POP_COUNT"> {
   1229     let CNT = 0;
   1230   }
   1231   def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
   1232     let CNT = 0;
   1233     let POP_COUNT = 0;
   1234     let ADDR = 0;
   1235     let END_OF_PROGRAM = 1;
   1236   }
   1237 
   1238 }
   1239 
   1240 //===----------------------------------------------------------------------===//
   1241 // R700 Only instructions
   1242 //===----------------------------------------------------------------------===//
   1243 
   1244 let Predicates = [isR700] in {
   1245   def SIN_r700 : SIN_Common<0x6E>;
   1246   def COS_r700 : COS_Common<0x6F>;
   1247 }
   1248 
   1249 //===----------------------------------------------------------------------===//
   1250 // Evergreen Only instructions
   1251 //===----------------------------------------------------------------------===//
   1252 
   1253 let Predicates = [isEG] in {
   1254 
   1255 def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
   1256 defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
   1257 
   1258 def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
   1259 def MULHI_INT_eg : MULHI_INT_Common<0x90>;
   1260 def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
   1261 def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
   1262 def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
   1263 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
   1264 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
   1265 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
   1266 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
   1267 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
   1268 def SIN_eg : SIN_Common<0x8D>;
   1269 def COS_eg : COS_Common<0x8E>;
   1270 
   1271 def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
   1272 def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
   1273 
   1274 //===----------------------------------------------------------------------===//
   1275 // Memory read/write instructions
   1276 //===----------------------------------------------------------------------===//
   1277 let usesCustomInserter = 1 in {
   1278 
   1279 class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name,
   1280                               list<dag> pattern>
   1281     : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> {
   1282 }
   1283 
   1284 } // End usesCustomInserter = 1
   1285 
   1286 // 32-bit store
   1287 def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
   1288   (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   1289   0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop",
   1290   [(global_store i32:$rw_gpr, i32:$index_gpr)]
   1291 >;
   1292 
   1293 // 64-bit store
   1294 def RAT_WRITE_CACHELESS_64_eg : RAT_WRITE_CACHELESS_eg <
   1295   (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   1296   0x3, "RAT_WRITE_CACHELESS_64_eg $rw_gpr.XY, $index_gpr, $eop",
   1297   [(global_store v2i32:$rw_gpr, i32:$index_gpr)]
   1298 >;
   1299 
   1300 //128-bit store
   1301 def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
   1302   (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   1303   0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop",
   1304   [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
   1305 >;
   1306 
   1307 class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   1308     : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
   1309 
   1310   // Static fields
   1311   let VC_INST = 0;
   1312   let FETCH_TYPE = 2;
   1313   let FETCH_WHOLE_QUAD = 0;
   1314   let BUFFER_ID = buffer_id;
   1315   let SRC_REL = 0;
   1316   // XXX: We can infer this field based on the SRC_GPR.  This would allow us
   1317   // to store vertex addresses in any channel, not just X.
   1318   let SRC_SEL_X = 0;
   1319 
   1320   let Inst{31-0} = Word0;
   1321 }
   1322 
   1323 class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
   1324     : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
   1325                    (outs R600_TReg32_X:$dst_gpr), pattern> {
   1326 
   1327   let MEGA_FETCH_COUNT = 1;
   1328   let DST_SEL_X = 0;
   1329   let DST_SEL_Y = 7;   // Masked
   1330   let DST_SEL_Z = 7;   // Masked
   1331   let DST_SEL_W = 7;   // Masked
   1332   let DATA_FORMAT = 1; // FMT_8
   1333 }
   1334 
   1335 class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
   1336     : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
   1337                    (outs R600_TReg32_X:$dst_gpr), pattern> {
   1338   let MEGA_FETCH_COUNT = 2;
   1339   let DST_SEL_X = 0;
   1340   let DST_SEL_Y = 7;   // Masked
   1341   let DST_SEL_Z = 7;   // Masked
   1342   let DST_SEL_W = 7;   // Masked
   1343   let DATA_FORMAT = 5; // FMT_16
   1344 
   1345 }
   1346 
   1347 class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
   1348     : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
   1349                    (outs R600_TReg32_X:$dst_gpr), pattern> {
   1350 
   1351   let MEGA_FETCH_COUNT = 4;
   1352   let DST_SEL_X        = 0;
   1353   let DST_SEL_Y        = 7;   // Masked
   1354   let DST_SEL_Z        = 7;   // Masked
   1355   let DST_SEL_W        = 7;   // Masked
   1356   let DATA_FORMAT      = 0xD; // COLOR_32
   1357 
   1358   // This is not really necessary, but there were some GPU hangs that appeared
   1359   // to be caused by ALU instructions in the next instruction group that wrote
   1360   // to the $src_gpr registers of the VTX_READ.
   1361   // e.g.
   1362   // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
   1363   // %T2_X<def> = MOV %ZERO
   1364   //Adding this constraint prevents this from happening.
   1365   let Constraints = "$src_gpr.ptr = $dst_gpr";
   1366 }
   1367 
   1368 class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
   1369     : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id,
   1370                    (outs R600_Reg64:$dst_gpr), pattern> {
   1371 
   1372   let MEGA_FETCH_COUNT = 8;
   1373   let DST_SEL_X        = 0;
   1374   let DST_SEL_Y        = 1;
   1375   let DST_SEL_Z        = 7;
   1376   let DST_SEL_W        = 7;
   1377   let DATA_FORMAT      = 0x1D; // COLOR_32_32
   1378 }
   1379 
   1380 class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
   1381     : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
   1382                    (outs R600_Reg128:$dst_gpr), pattern> {
   1383 
   1384   let MEGA_FETCH_COUNT = 16;
   1385   let DST_SEL_X        =  0;
   1386   let DST_SEL_Y        =  1;
   1387   let DST_SEL_Z        =  2;
   1388   let DST_SEL_W        =  3;
   1389   let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
   1390 
   1391   // XXX: Need to force VTX_READ_128 instructions to write to the same register
   1392   // that holds its buffer address to avoid potential hangs.  We can't use
   1393   // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
   1394   // registers are different sizes.
   1395 }
   1396 
   1397 //===----------------------------------------------------------------------===//
   1398 // VTX Read from parameter memory space
   1399 //===----------------------------------------------------------------------===//
   1400 
   1401 def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
   1402   [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
   1403 >;
   1404 
   1405 def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
   1406   [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
   1407 >;
   1408 
   1409 def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
   1410   [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
   1411 >;
   1412 
   1413 def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
   1414   [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
   1415 >;
   1416 
   1417 def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
   1418   [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
   1419 >;
   1420 
   1421 //===----------------------------------------------------------------------===//
   1422 // VTX Read from global memory space
   1423 //===----------------------------------------------------------------------===//
   1424 
   1425 // 8-bit reads
   1426 def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
   1427   [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
   1428 >;
   1429 
   1430 def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1,
   1431   [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
   1432 >;
   1433 
   1434 // 32-bit reads
   1435 def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
   1436   [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
   1437 >;
   1438 
   1439 // 64-bit reads
   1440 def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1,
   1441   [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
   1442 >;
   1443 
   1444 // 128-bit reads
   1445 def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
   1446   [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
   1447 >;
   1448 
   1449 } // End Predicates = [isEG]
   1450 
   1451 //===----------------------------------------------------------------------===//
   1452 // Evergreen / Cayman Instructions
   1453 //===----------------------------------------------------------------------===//
   1454 
   1455 let Predicates = [isEGorCayman] in {
   1456 
   1457   // BFE_UINT - bit_extract, an optimization for mask and shift
   1458   // Src0 = Input
   1459   // Src1 = Offset
   1460   // Src2 = Width
   1461   //
   1462   // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
   1463   //
   1464   // Example Usage:
   1465   // (Offset, Width)
   1466   //
   1467   // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0
   1468   // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8
   1469   // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
   1470   // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
   1471   def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
   1472     [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
   1473                                                i32:$src2))],
   1474     VecALU
   1475   >;
   1476   def : BFEPattern <BFE_UINT_eg>;
   1477 
   1478   def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
   1479   defm : BFIPatterns <BFI_INT_eg>;
   1480 
   1481   def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
   1482     [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
   1483   >;
   1484   def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
   1485   def : ROTRPattern <BIT_ALIGN_INT_eg>;
   1486 
   1487   def MULADD_eg : MULADD_Common<0x14>;
   1488   def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
   1489   def ASHR_eg : ASHR_Common<0x15>;
   1490   def LSHR_eg : LSHR_Common<0x16>;
   1491   def LSHL_eg : LSHL_Common<0x17>;
   1492   def CNDE_eg : CNDE_Common<0x19>;
   1493   def CNDGT_eg : CNDGT_Common<0x1A>;
   1494   def CNDGE_eg : CNDGE_Common<0x1B>;
   1495   def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
   1496   def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
   1497   def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
   1498     [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU
   1499   >;
   1500   def DOT4_eg : DOT4_Common<0xBE>;
   1501   defm CUBE_eg : CUBE_Common<0xC0>;
   1502 
   1503 let hasSideEffects = 1 in {
   1504   def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
   1505 }
   1506 
   1507   def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
   1508 
   1509   def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
   1510     let Pattern = [];
   1511     let TransOnly = 0;
   1512     let Itinerary = AnyALU;
   1513   }
   1514 
   1515   def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
   1516 
   1517   def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
   1518     let Pattern = [];
   1519   }
   1520 
   1521   def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
   1522 
   1523 def GROUP_BARRIER : InstR600 <
   1524     (outs), (ins), "  GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
   1525     R600ALU_Word0,
   1526     R600ALU_Word1_OP2 <0x54> {
   1527 
   1528   let dst = 0;
   1529   let dst_rel = 0;
   1530   let src0 = 0;
   1531   let src0_rel = 0;
   1532   let src0_neg = 0;
   1533   let src0_abs = 0;
   1534   let src1 = 0;
   1535   let src1_rel = 0;
   1536   let src1_neg = 0;
   1537   let src1_abs = 0;
   1538   let write = 0;
   1539   let omod = 0;
   1540   let clamp = 0;
   1541   let last = 1;
   1542   let bank_swizzle = 0;
   1543   let pred_sel = 0;
   1544   let update_exec_mask = 0;
   1545   let update_pred = 0;
   1546 
   1547   let Inst{31-0}  = Word0;
   1548   let Inst{63-32} = Word1;
   1549 
   1550   let ALUInst = 1;
   1551 }
   1552 
   1553 //===----------------------------------------------------------------------===//
   1554 // LDS Instructions
   1555 //===----------------------------------------------------------------------===//
   1556 class R600_LDS  <bits<6> op, dag outs, dag ins, string asm,
   1557                  list<dag> pattern = []> :
   1558 
   1559     InstR600 <outs, ins, asm, pattern, XALU>,
   1560     R600_ALU_LDS_Word0,
   1561     R600LDS_Word1 {
   1562 
   1563   bits<6>  offset = 0;
   1564   let lds_op = op;
   1565 
   1566   let Word1{27} = offset{0};
   1567   let Word1{12} = offset{1};
   1568   let Word1{28} = offset{2};
   1569   let Word1{31} = offset{3};
   1570   let Word0{12} = offset{4};
   1571   let Word0{25} = offset{5};
   1572 
   1573 
   1574   let Inst{31-0}  = Word0;
   1575   let Inst{63-32} = Word1;
   1576 
   1577   let ALUInst = 1;
   1578   let HasNativeOperands = 1;
   1579   let UseNamedOperandTable = 1;
   1580 }
   1581 
   1582 class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
   1583   lds_op,
   1584   (outs R600_Reg32:$dst),
   1585   (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
   1586        LAST:$last, R600_Pred:$pred_sel,
   1587        BANK_SWIZZLE:$bank_swizzle),
   1588   "  "#name#" $last OQAP, $src0$src0_rel $pred_sel",
   1589   pattern
   1590   > {
   1591 
   1592   let src1 = 0;
   1593   let src1_rel = 0;
   1594   let src2 = 0;
   1595   let src2_rel = 0;
   1596 
   1597   let Defs = [OQAP];
   1598   let usesCustomInserter = 1;
   1599   let LDS_1A = 1;
   1600   let DisableEncoding = "$dst";
   1601 }
   1602 
   1603 class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> :
   1604     R600_LDS <
   1605   lds_op,
   1606   (outs),
   1607   (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
   1608        R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
   1609        LAST:$last, R600_Pred:$pred_sel,
   1610        BANK_SWIZZLE:$bank_swizzle),
   1611   "  "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel",
   1612   pattern
   1613   > {
   1614 
   1615   let src2 = 0;
   1616   let src2_rel = 0;
   1617   let LDS_1A1D = 1;
   1618 }
   1619 
   1620 def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
   1621   [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
   1622 >;
   1623 
   1624 def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
   1625   [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
   1626 >;
   1627 
   1628   // TRUNC is used for the FLT_TO_INT instructions to work around a
   1629   // perceived problem where the rounding modes are applied differently
   1630   // depending on the instruction and the slot they are in.
   1631   // See:
   1632   // https://bugs.freedesktop.org/show_bug.cgi?id=50232
   1633   // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
   1634   //
   1635   // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
   1636   // which do not need to be truncated since the fp values are 0.0f or 1.0f.
   1637   // We should look into handling these cases separately.
   1638   def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
   1639 
   1640   def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
   1641 
   1642   // SHA-256 Patterns
   1643   def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
   1644 
   1645   def EG_ExportSwz : ExportSwzInst {
   1646     let Word1{19-16} = 0; // BURST_COUNT
   1647     let Word1{20} = 1; // VALID_PIXEL_MODE
   1648     let Word1{21} = eop;
   1649     let Word1{29-22} = inst;
   1650     let Word1{30} = 0; // MARK
   1651     let Word1{31} = 1; // BARRIER
   1652   }
   1653   defm : ExportPattern<EG_ExportSwz, 83>;
   1654 
   1655   def EG_ExportBuf : ExportBufInst {
   1656     let Word1{19-16} = 0; // BURST_COUNT
   1657     let Word1{20} = 1; // VALID_PIXEL_MODE
   1658     let Word1{21} = eop;
   1659     let Word1{29-22} = inst;
   1660     let Word1{30} = 0; // MARK
   1661     let Word1{31} = 1; // BARRIER
   1662   }
   1663   defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
   1664 
   1665   def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
   1666   "TEX $COUNT @$ADDR"> {
   1667     let POP_COUNT = 0;
   1668   }
   1669   def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
   1670   "VTX $COUNT @$ADDR"> {
   1671     let POP_COUNT = 0;
   1672   }
   1673   def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
   1674   "LOOP_START_DX10 @$ADDR"> {
   1675     let POP_COUNT = 0;
   1676     let COUNT = 0;
   1677   }
   1678   def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
   1679     let POP_COUNT = 0;
   1680     let COUNT = 0;
   1681   }
   1682   def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
   1683   "LOOP_BREAK @$ADDR"> {
   1684     let POP_COUNT = 0;
   1685     let COUNT = 0;
   1686   }
   1687   def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
   1688   "CONTINUE @$ADDR"> {
   1689     let POP_COUNT = 0;
   1690     let COUNT = 0;
   1691   }
   1692   def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   1693   "JUMP @$ADDR POP:$POP_COUNT"> {
   1694     let COUNT = 0;
   1695   }
   1696   def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   1697   "ELSE @$ADDR POP:$POP_COUNT"> {
   1698     let COUNT = 0;
   1699   }
   1700   def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
   1701     let ADDR = 0;
   1702     let COUNT = 0;
   1703     let POP_COUNT = 0;
   1704   }
   1705   def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   1706   "POP @$ADDR POP:$POP_COUNT"> {
   1707     let COUNT = 0;
   1708   }
   1709   def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
   1710     let COUNT = 0;
   1711     let POP_COUNT = 0;
   1712     let ADDR = 0;
   1713     let END_OF_PROGRAM = 1;
   1714   }
   1715 
   1716 } // End Predicates = [isEGorCayman]
   1717 
   1718 //===----------------------------------------------------------------------===//
   1719 // Regist loads and stores - for indirect addressing
   1720 //===----------------------------------------------------------------------===//
   1721 
   1722 defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
   1723 
   1724 //===----------------------------------------------------------------------===//
   1725 // Cayman Instructions
   1726 //===----------------------------------------------------------------------===//
   1727 
   1728 let Predicates = [isCayman] in {
   1729 
   1730 def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
   1731   [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU
   1732 >;
   1733 def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
   1734   [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU
   1735 >;
   1736 
   1737 let isVector = 1 in {
   1738 
   1739 def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
   1740 
   1741 def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
   1742 def MULHI_INT_cm : MULHI_INT_Common<0x90>;
   1743 def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
   1744 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
   1745 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
   1746 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
   1747 def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
   1748 def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
   1749 def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
   1750 def SIN_cm : SIN_Common<0x8D>;
   1751 def COS_cm : COS_Common<0x8E>;
   1752 } // End isVector = 1
   1753 
   1754 def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
   1755 
   1756 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
   1757 
   1758 // RECIP_UINT emulation for Cayman
   1759 // The multiplication scales from [0,1] to the unsigned integer range
   1760 def : Pat <
   1761   (AMDGPUurecip i32:$src0),
   1762   (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
   1763                             (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
   1764 >;
   1765 
   1766   def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
   1767     let ADDR = 0;
   1768     let POP_COUNT = 0;
   1769     let COUNT = 0;
   1770   }
   1771 
   1772 def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
   1773 
   1774 
   1775 class  RAT_STORE_DWORD_cm <bits<4> mask, dag ins, list<dag> pat> : EG_CF_RAT <
   1776   0x57, 0x14, mask, (outs), ins,
   1777   "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", pat
   1778 > {
   1779   let eop = 0; // This bit is not used on Cayman.
   1780 }
   1781 
   1782 def RAT_STORE_DWORD32_cm : RAT_STORE_DWORD_cm <0x1,
   1783   (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
   1784   [(global_store i32:$rw_gpr, i32:$index_gpr)]
   1785 >;
   1786 
   1787 def RAT_STORE_DWORD64_cm : RAT_STORE_DWORD_cm <0x3,
   1788   (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr),
   1789   [(global_store v2i32:$rw_gpr, i32:$index_gpr)]
   1790 >;
   1791 
   1792 class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   1793     : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
   1794 
   1795   // Static fields
   1796   let VC_INST = 0;
   1797   let FETCH_TYPE = 2;
   1798   let FETCH_WHOLE_QUAD = 0;
   1799   let BUFFER_ID = buffer_id;
   1800   let SRC_REL = 0;
   1801   // XXX: We can infer this field based on the SRC_GPR.  This would allow us
   1802   // to store vertex addresses in any channel, not just X.
   1803   let SRC_SEL_X = 0;
   1804   let SRC_SEL_Y = 0;
   1805   let STRUCTURED_READ = 0;
   1806   let LDS_REQ = 0;
   1807   let COALESCED_READ = 0;
   1808 
   1809   let Inst{31-0} = Word0;
   1810 }
   1811 
   1812 class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
   1813     : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
   1814                    (outs R600_TReg32_X:$dst_gpr), pattern> {
   1815 
   1816   let DST_SEL_X = 0;
   1817   let DST_SEL_Y = 7;   // Masked
   1818   let DST_SEL_Z = 7;   // Masked
   1819   let DST_SEL_W = 7;   // Masked
   1820   let DATA_FORMAT = 1; // FMT_8
   1821 }
   1822 
   1823 class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
   1824     : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
   1825                    (outs R600_TReg32_X:$dst_gpr), pattern> {
   1826   let DST_SEL_X = 0;
   1827   let DST_SEL_Y = 7;   // Masked
   1828   let DST_SEL_Z = 7;   // Masked
   1829   let DST_SEL_W = 7;   // Masked
   1830   let DATA_FORMAT = 5; // FMT_16
   1831 
   1832 }
   1833 
   1834 class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
   1835     : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
   1836                    (outs R600_TReg32_X:$dst_gpr), pattern> {
   1837 
   1838   let DST_SEL_X        = 0;
   1839   let DST_SEL_Y        = 7;   // Masked
   1840   let DST_SEL_Z        = 7;   // Masked
   1841   let DST_SEL_W        = 7;   // Masked
   1842   let DATA_FORMAT      = 0xD; // COLOR_32
   1843 
   1844   // This is not really necessary, but there were some GPU hangs that appeared
   1845   // to be caused by ALU instructions in the next instruction group that wrote
   1846   // to the $src_gpr registers of the VTX_READ.
   1847   // e.g.
   1848   // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
   1849   // %T2_X<def> = MOV %ZERO
   1850   //Adding this constraint prevents this from happening.
   1851   let Constraints = "$src_gpr.ptr = $dst_gpr";
   1852 }
   1853 
   1854 class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern>
   1855     : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id,
   1856                    (outs R600_Reg64:$dst_gpr), pattern> {
   1857 
   1858   let DST_SEL_X        = 0;
   1859   let DST_SEL_Y        = 1;
   1860   let DST_SEL_Z        = 7;
   1861   let DST_SEL_W        = 7;
   1862   let DATA_FORMAT      = 0x1D; // COLOR_32_32
   1863 }
   1864 
   1865 class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
   1866     : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
   1867                    (outs R600_Reg128:$dst_gpr), pattern> {
   1868 
   1869   let DST_SEL_X        =  0;
   1870   let DST_SEL_Y        =  1;
   1871   let DST_SEL_Z        =  2;
   1872   let DST_SEL_W        =  3;
   1873   let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
   1874 
   1875   // XXX: Need to force VTX_READ_128 instructions to write to the same register
   1876   // that holds its buffer address to avoid potential hangs.  We can't use
   1877   // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
   1878   // registers are different sizes.
   1879 }
   1880 
   1881 //===----------------------------------------------------------------------===//
   1882 // VTX Read from parameter memory space
   1883 //===----------------------------------------------------------------------===//
   1884 def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
   1885   [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
   1886 >;
   1887 
   1888 def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
   1889   [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
   1890 >;
   1891 
   1892 def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
   1893   [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
   1894 >;
   1895 
   1896 def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0,
   1897   [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
   1898 >;
   1899 
   1900 def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
   1901   [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
   1902 >;
   1903 
   1904 //===----------------------------------------------------------------------===//
   1905 // VTX Read from global memory space
   1906 //===----------------------------------------------------------------------===//
   1907 
   1908 // 8-bit reads
   1909 def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1,
   1910   [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
   1911 >;
   1912 
   1913 def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1,
   1914   [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
   1915 >;
   1916 
   1917 // 32-bit reads
   1918 def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1,
   1919   [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
   1920 >;
   1921 
   1922 // 64-bit reads
   1923 def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1,
   1924   [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
   1925 >;
   1926 
   1927 // 128-bit reads
   1928 def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1,
   1929   [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
   1930 >;
   1931 
   1932 } // End isCayman
   1933 
   1934 //===----------------------------------------------------------------------===//
   1935 // Branch Instructions
   1936 //===----------------------------------------------------------------------===//
   1937 
   1938 
   1939 def IF_PREDICATE_SET  : ILFormat<(outs), (ins GPRI32:$src),
   1940   "IF_PREDICATE_SET $src", []>;
   1941 
   1942 //===----------------------------------------------------------------------===//
   1943 // Pseudo instructions
   1944 //===----------------------------------------------------------------------===//
   1945 
   1946 let isPseudo = 1 in {
   1947 
   1948 def PRED_X : InstR600 <
   1949   (outs R600_Predicate_Bit:$dst),
   1950   (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
   1951   "", [], NullALU> {
   1952   let FlagOperandIdx = 3;
   1953 }
   1954 
   1955 let isTerminator = 1, isBranch = 1 in {
   1956 def JUMP_COND : InstR600 <
   1957           (outs),
   1958           (ins brtarget:$target, R600_Predicate_Bit:$p),
   1959           "JUMP $target ($p)",
   1960           [], AnyALU
   1961   >;
   1962 
   1963 def JUMP : InstR600 <
   1964           (outs),
   1965           (ins brtarget:$target),
   1966           "JUMP $target",
   1967           [], AnyALU
   1968   >
   1969 {
   1970   let isPredicable = 1;
   1971   let isBarrier = 1;
   1972 }
   1973 
   1974 }  // End isTerminator = 1, isBranch = 1
   1975 
   1976 let usesCustomInserter = 1 in {
   1977 
   1978 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
   1979 
   1980 def MASK_WRITE : AMDGPUShaderInst <
   1981     (outs),
   1982     (ins R600_Reg32:$src),
   1983     "MASK_WRITE $src",
   1984     []
   1985 >;
   1986 
   1987 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
   1988 
   1989 
   1990 def TXD: InstR600 <
   1991   (outs R600_Reg128:$dst),
   1992   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
   1993        i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   1994   "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   1995   [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
   1996                      imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
   1997   NullALU > {
   1998   let TEXInst = 1;
   1999 }
   2000 
   2001 def TXD_SHADOW: InstR600 <
   2002   (outs R600_Reg128:$dst),
   2003   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
   2004        i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   2005   "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   2006   [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
   2007         imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
   2008    NullALU
   2009 > {
   2010   let TEXInst = 1;
   2011 }
   2012 } // End isPseudo = 1
   2013 } // End usesCustomInserter = 1
   2014 
   2015 def CLAMP_R600 :  CLAMP <R600_Reg32>;
   2016 def FABS_R600 : FABS<R600_Reg32>;
   2017 def FNEG_R600 : FNEG<R600_Reg32>;
   2018 
   2019 //===---------------------------------------------------------------------===//
   2020 // Return instruction
   2021 //===---------------------------------------------------------------------===//
   2022 let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
   2023     usesCustomInserter = 1 in {
   2024   def RETURN          : ILFormat<(outs), (ins variable_ops),
   2025       "RETURN", [(IL_retflag)]>;
   2026 }
   2027 
   2028 
   2029 //===----------------------------------------------------------------------===//
   2030 // Constant Buffer Addressing Support
   2031 //===----------------------------------------------------------------------===//
   2032 
   2033 let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
   2034 def CONST_COPY : Instruction {
   2035   let OutOperandList = (outs R600_Reg32:$dst);
   2036   let InOperandList = (ins i32imm:$src);
   2037   let Pattern =
   2038       [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
   2039   let AsmString = "CONST_COPY";
   2040   let neverHasSideEffects = 1;
   2041   let isAsCheapAsAMove = 1;
   2042   let Itinerary = NullALU;
   2043 }
   2044 } // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
   2045 
   2046 def TEX_VTX_CONSTBUF :
   2047   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
   2048       [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
   2049   VTX_WORD1_GPR, VTX_WORD0_eg {
   2050 
   2051   let VC_INST = 0;
   2052   let FETCH_TYPE = 2;
   2053   let FETCH_WHOLE_QUAD = 0;
   2054   let SRC_REL = 0;
   2055   let SRC_SEL_X = 0;
   2056   let DST_REL = 0;
   2057   let USE_CONST_FIELDS = 0;
   2058   let NUM_FORMAT_ALL = 2;
   2059   let FORMAT_COMP_ALL = 1;
   2060   let SRF_MODE_ALL = 1;
   2061   let MEGA_FETCH_COUNT = 16;
   2062   let DST_SEL_X        = 0;
   2063   let DST_SEL_Y        = 1;
   2064   let DST_SEL_Z        = 2;
   2065   let DST_SEL_W        = 3;
   2066   let DATA_FORMAT      = 35;
   2067 
   2068   let Inst{31-0} = Word0;
   2069   let Inst{63-32} = Word1;
   2070 
   2071 // LLVM can only encode 64-bit instructions, so these fields are manually
   2072 // encoded in R600CodeEmitter
   2073 //
   2074 // bits<16> OFFSET;
   2075 // bits<2>  ENDIAN_SWAP = 0;
   2076 // bits<1>  CONST_BUF_NO_STRIDE = 0;
   2077 // bits<1>  MEGA_FETCH = 0;
   2078 // bits<1>  ALT_CONST = 0;
   2079 // bits<2>  BUFFER_INDEX_MODE = 0;
   2080 
   2081 
   2082 
   2083 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
   2084 // is done in R600CodeEmitter
   2085 //
   2086 // Inst{79-64} = OFFSET;
   2087 // Inst{81-80} = ENDIAN_SWAP;
   2088 // Inst{82}    = CONST_BUF_NO_STRIDE;
   2089 // Inst{83}    = MEGA_FETCH;
   2090 // Inst{84}    = ALT_CONST;
   2091 // Inst{86-85} = BUFFER_INDEX_MODE;
   2092 // Inst{95-86} = 0; Reserved
   2093 
   2094 // VTX_WORD3 (Padding)
   2095 //
   2096 // Inst{127-96} = 0;
   2097   let VTXInst = 1;
   2098 }
   2099 
   2100 def TEX_VTX_TEXBUF:
   2101   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
   2102       [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
   2103 VTX_WORD1_GPR, VTX_WORD0_eg {
   2104 
   2105 let VC_INST = 0;
   2106 let FETCH_TYPE = 2;
   2107 let FETCH_WHOLE_QUAD = 0;
   2108 let SRC_REL = 0;
   2109 let SRC_SEL_X = 0;
   2110 let DST_REL = 0;
   2111 let USE_CONST_FIELDS = 1;
   2112 let NUM_FORMAT_ALL = 0;
   2113 let FORMAT_COMP_ALL = 0;
   2114 let SRF_MODE_ALL = 1;
   2115 let MEGA_FETCH_COUNT = 16;
   2116 let DST_SEL_X        = 0;
   2117 let DST_SEL_Y        = 1;
   2118 let DST_SEL_Z        = 2;
   2119 let DST_SEL_W        = 3;
   2120 let DATA_FORMAT      = 0;
   2121 
   2122 let Inst{31-0} = Word0;
   2123 let Inst{63-32} = Word1;
   2124 
   2125 // LLVM can only encode 64-bit instructions, so these fields are manually
   2126 // encoded in R600CodeEmitter
   2127 //
   2128 // bits<16> OFFSET;
   2129 // bits<2>  ENDIAN_SWAP = 0;
   2130 // bits<1>  CONST_BUF_NO_STRIDE = 0;
   2131 // bits<1>  MEGA_FETCH = 0;
   2132 // bits<1>  ALT_CONST = 0;
   2133 // bits<2>  BUFFER_INDEX_MODE = 0;
   2134 
   2135 
   2136 
   2137 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
   2138 // is done in R600CodeEmitter
   2139 //
   2140 // Inst{79-64} = OFFSET;
   2141 // Inst{81-80} = ENDIAN_SWAP;
   2142 // Inst{82}    = CONST_BUF_NO_STRIDE;
   2143 // Inst{83}    = MEGA_FETCH;
   2144 // Inst{84}    = ALT_CONST;
   2145 // Inst{86-85} = BUFFER_INDEX_MODE;
   2146 // Inst{95-86} = 0; Reserved
   2147 
   2148 // VTX_WORD3 (Padding)
   2149 //
   2150 // Inst{127-96} = 0;
   2151   let VTXInst = 1;
   2152 }
   2153 
   2154 
   2155 
   2156 //===--------------------------------------------------------------------===//
   2157 // Instructions support
   2158 //===--------------------------------------------------------------------===//
   2159 //===---------------------------------------------------------------------===//
   2160 // Custom Inserter for Branches and returns, this eventually will be a
   2161 // seperate pass
   2162 //===---------------------------------------------------------------------===//
   2163 let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
   2164   def BRANCH : ILFormat<(outs), (ins brtarget:$target),
   2165       "; Pseudo unconditional branch instruction",
   2166       [(br bb:$target)]>;
   2167   defm BRANCH_COND : BranchConditional<IL_brcond>;
   2168 }
   2169 
   2170 //===---------------------------------------------------------------------===//
   2171 // Flow and Program control Instructions
   2172 //===---------------------------------------------------------------------===//
   2173 let isTerminator=1 in {
   2174   def SWITCH      : ILFormat< (outs), (ins GPRI32:$src),
   2175   !strconcat("SWITCH", " $src"), []>;
   2176   def CASE        : ILFormat< (outs), (ins GPRI32:$src),
   2177       !strconcat("CASE", " $src"), []>;
   2178   def BREAK       : ILFormat< (outs), (ins),
   2179       "BREAK", []>;
   2180   def CONTINUE    : ILFormat< (outs), (ins),
   2181       "CONTINUE", []>;
   2182   def DEFAULT     : ILFormat< (outs), (ins),
   2183       "DEFAULT", []>;
   2184   def ELSE        : ILFormat< (outs), (ins),
   2185       "ELSE", []>;
   2186   def ENDSWITCH   : ILFormat< (outs), (ins),
   2187       "ENDSWITCH", []>;
   2188   def ENDMAIN     : ILFormat< (outs), (ins),
   2189       "ENDMAIN", []>;
   2190   def END         : ILFormat< (outs), (ins),
   2191       "END", []>;
   2192   def ENDFUNC     : ILFormat< (outs), (ins),
   2193       "ENDFUNC", []>;
   2194   def ENDIF       : ILFormat< (outs), (ins),
   2195       "ENDIF", []>;
   2196   def WHILELOOP   : ILFormat< (outs), (ins),
   2197       "WHILE", []>;
   2198   def ENDLOOP     : ILFormat< (outs), (ins),
   2199       "ENDLOOP", []>;
   2200   def FUNC        : ILFormat< (outs), (ins),
   2201       "FUNC", []>;
   2202   def RETDYN      : ILFormat< (outs), (ins),
   2203       "RET_DYN", []>;
   2204   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   2205   defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
   2206   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   2207   defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
   2208   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   2209   defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
   2210   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   2211   defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
   2212   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   2213   defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
   2214   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   2215   defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
   2216   defm IFC         : BranchInstr2<"IFC">;
   2217   defm BREAKC      : BranchInstr2<"BREAKC">;
   2218   defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
   2219 }
   2220 
   2221 //===----------------------------------------------------------------------===//
   2222 // ISel Patterns
   2223 //===----------------------------------------------------------------------===//
   2224 
   2225 // CND*_INT Pattterns for f32 True / False values
   2226 
   2227 class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
   2228   (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
   2229   (cnd $src0, $src1, $src2)
   2230 >;
   2231 
   2232 def : CND_INT_f32 <CNDE_INT,  SETEQ>;
   2233 def : CND_INT_f32 <CNDGT_INT, SETGT>;
   2234 def : CND_INT_f32 <CNDGE_INT, SETGE>;
   2235 
   2236 //CNDGE_INT extra pattern
   2237 def : Pat <
   2238   (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
   2239   (CNDGE_INT $src0, $src1, $src2)
   2240 >;
   2241 
   2242 // KIL Patterns
   2243 def KILP : Pat <
   2244   (int_AMDGPU_kilp),
   2245   (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
   2246 >;
   2247 
   2248 def KIL : Pat <
   2249   (int_AMDGPU_kill f32:$src0),
   2250   (MASK_WRITE (KILLGT (f32 ZERO), $src0))
   2251 >;
   2252 
   2253 // SGT Reverse args
   2254 def : Pat <
   2255   (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
   2256   (SGT $src1, $src0)
   2257 >;
   2258 
   2259 // SGE Reverse args
   2260 def : Pat <
   2261   (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
   2262   (SGE $src1, $src0)
   2263 >;
   2264 
   2265 // SETGT_DX10 reverse args
   2266 def : Pat <
   2267   (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
   2268   (SETGT_DX10 $src1, $src0)
   2269 >;
   2270 
   2271 // SETGE_DX10 reverse args
   2272 def : Pat <
   2273   (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
   2274   (SETGE_DX10 $src1, $src0)
   2275 >;
   2276 
   2277 // SETGT_INT reverse args
   2278 def : Pat <
   2279   (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
   2280   (SETGT_INT $src1, $src0)
   2281 >;
   2282 
   2283 // SETGE_INT reverse args
   2284 def : Pat <
   2285   (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
   2286   (SETGE_INT $src1, $src0)
   2287 >;
   2288 
   2289 // SETGT_UINT reverse args
   2290 def : Pat <
   2291   (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
   2292   (SETGT_UINT $src1, $src0)
   2293 >;
   2294 
   2295 // SETGE_UINT reverse args
   2296 def : Pat <
   2297   (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
   2298   (SETGE_UINT $src1, $src0)
   2299 >;
   2300 
   2301 // The next two patterns are special cases for handling 'true if ordered' and
   2302 // 'true if unordered' conditionals.  The assumption here is that the behavior of
   2303 // SETE and SNE conforms to the Direct3D 10 rules for floating point values
   2304 // described here:
   2305 // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
   2306 // We assume that  SETE returns false when one of the operands is NAN and
   2307 // SNE returns true when on of the operands is NAN
   2308 
   2309 //SETE - 'true if ordered'
   2310 def : Pat <
   2311   (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
   2312   (SETE $src0, $src1)
   2313 >;
   2314 
   2315 //SETE_DX10 - 'true if ordered'
   2316 def : Pat <
   2317   (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
   2318   (SETE_DX10 $src0, $src1)
   2319 >;
   2320 
   2321 //SNE - 'true if unordered'
   2322 def : Pat <
   2323   (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
   2324   (SNE $src0, $src1)
   2325 >;
   2326 
   2327 //SETNE_DX10 - 'true if ordered'
   2328 def : Pat <
   2329   (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
   2330   (SETNE_DX10 $src0, $src1)
   2331 >;
   2332 
   2333 def : Extract_Element <f32, v4f32, 0, sub0>;
   2334 def : Extract_Element <f32, v4f32, 1, sub1>;
   2335 def : Extract_Element <f32, v4f32, 2, sub2>;
   2336 def : Extract_Element <f32, v4f32, 3, sub3>;
   2337 
   2338 def : Insert_Element <f32, v4f32, 0, sub0>;
   2339 def : Insert_Element <f32, v4f32, 1, sub1>;
   2340 def : Insert_Element <f32, v4f32, 2, sub2>;
   2341 def : Insert_Element <f32, v4f32, 3, sub3>;
   2342 
   2343 def : Extract_Element <i32, v4i32, 0, sub0>;
   2344 def : Extract_Element <i32, v4i32, 1, sub1>;
   2345 def : Extract_Element <i32, v4i32, 2, sub2>;
   2346 def : Extract_Element <i32, v4i32, 3, sub3>;
   2347 
   2348 def : Insert_Element <i32, v4i32, 0, sub0>;
   2349 def : Insert_Element <i32, v4i32, 1, sub1>;
   2350 def : Insert_Element <i32, v4i32, 2, sub2>;
   2351 def : Insert_Element <i32, v4i32, 3, sub3>;
   2352 
   2353 def : Vector4_Build <v4f32, f32>;
   2354 def : Vector4_Build <v4i32, i32>;
   2355 
   2356 def : Extract_Element <f32, v2f32, 0, sub0>;
   2357 def : Extract_Element <f32, v2f32, 1, sub1>;
   2358 
   2359 def : Insert_Element <f32, v2f32, 0, sub0>;
   2360 def : Insert_Element <f32, v2f32, 1, sub1>;
   2361 
   2362 def : Extract_Element <i32, v2i32, 0, sub0>;
   2363 def : Extract_Element <i32, v2i32, 1, sub1>;
   2364 
   2365 def : Insert_Element <i32, v2i32, 0, sub0>;
   2366 def : Insert_Element <i32, v2i32, 1, sub1>;
   2367 
   2368 // bitconvert patterns
   2369 
   2370 def : BitConvert <i32, f32, R600_Reg32>;
   2371 def : BitConvert <f32, i32, R600_Reg32>;
   2372 def : BitConvert <v2f32, v2i32, R600_Reg64>;
   2373 def : BitConvert <v2i32, v2f32, R600_Reg64>;
   2374 def : BitConvert <v4f32, v4i32, R600_Reg128>;
   2375 def : BitConvert <v4i32, v4f32, R600_Reg128>;
   2376 
   2377 // DWORDADDR pattern
   2378 def : DwordAddrPat  <i32, R600_Reg32>;
   2379 
   2380 } // End isR600toCayman Predicate
   2381