Home | History | Annotate | Download | only in R600
      1 //===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // R600 Tablegen instruction definitions
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 include "R600Intrinsics.td"
     15 
     16 class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
     17                 InstrItinClass itin>
     18     : AMDGPUInst <outs, ins, asm, pattern> {
     19 
     20   field bits<64> Inst;
     21   bit Trig = 0;
     22   bit Op3 = 0;
     23   bit isVector = 0;
     24   bits<2> FlagOperandIdx = 0;
     25   bit Op1 = 0;
     26   bit Op2 = 0;
     27   bit HasNativeOperands = 0;
     28 
     29   bits<11> op_code = inst;
     30   //let Inst = inst;
     31   let Namespace = "AMDGPU";
     32   let OutOperandList = outs;
     33   let InOperandList = ins;
     34   let AsmString = asm;
     35   let Pattern = pattern;
     36   let Itinerary = itin;
     37 
     38   let TSFlags{4} = Trig;
     39   let TSFlags{5} = Op3;
     40 
     41   // Vector instructions are instructions that must fill all slots in an
     42   // instruction group
     43   let TSFlags{6} = isVector;
     44   let TSFlags{8-7} = FlagOperandIdx;
     45   let TSFlags{9} = HasNativeOperands;
     46   let TSFlags{10} = Op1;
     47   let TSFlags{11} = Op2;
     48 }
     49 
     50 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
     51     AMDGPUInst <outs, ins, asm, pattern> {
     52   field bits<64> Inst;
     53 
     54   let Namespace = "AMDGPU";
     55 }
     56 
     57 def MEMxi : Operand<iPTR> {
     58   let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
     59   let PrintMethod = "printMemOperand";
     60 }
     61 
     62 def MEMrr : Operand<iPTR> {
     63   let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
     64 }
     65 
     66 // Operands for non-registers
     67 
     68 class InstFlag<string PM = "printOperand", int Default = 0>
     69     : OperandWithDefaultOps <i32, (ops (i32 Default))> {
     70   let PrintMethod = PM;
     71 }
     72 
     73 // src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
     74 def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
     75   let PrintMethod = "printSel";
     76 }
     77 
     78 def LITERAL : InstFlag<"printLiteral">;
     79 
     80 def WRITE : InstFlag <"printWrite", 1>;
     81 def OMOD : InstFlag <"printOMOD">;
     82 def REL : InstFlag <"printRel">;
     83 def CLAMP : InstFlag <"printClamp">;
     84 def NEG : InstFlag <"printNeg">;
     85 def ABS : InstFlag <"printAbs">;
     86 def UEM : InstFlag <"printUpdateExecMask">;
     87 def UP : InstFlag <"printUpdatePred">;
     88 
     89 // XXX: The r600g finalizer in Mesa expects last to be one in most cases.
     90 // Once we start using the packetizer in this backend we should have this
     91 // default to 0.
     92 def LAST : InstFlag<"printLast", 1>;
     93 
     94 def FRAMEri : Operand<iPTR> {
     95   let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
     96 }
     97 
     98 def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
     99 def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
    100 def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
    101 def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
    102 def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
    103 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
    104 
    105 class R600ALU_Word0 {
    106   field bits<32> Word0;
    107 
    108   bits<11> src0;
    109   bits<1>  src0_neg;
    110   bits<1>  src0_rel;
    111   bits<11> src1;
    112   bits<1>  src1_rel;
    113   bits<1>  src1_neg;
    114   bits<3>  index_mode = 0;
    115   bits<2>  pred_sel;
    116   bits<1>  last;
    117 
    118   bits<9>  src0_sel  = src0{8-0};
    119   bits<2>  src0_chan = src0{10-9};
    120   bits<9>  src1_sel  = src1{8-0};
    121   bits<2>  src1_chan = src1{10-9};
    122 
    123   let Word0{8-0}   = src0_sel;
    124   let Word0{9}     = src0_rel;
    125   let Word0{11-10} = src0_chan;
    126   let Word0{12}    = src0_neg;
    127   let Word0{21-13} = src1_sel;
    128   let Word0{22}    = src1_rel;
    129   let Word0{24-23} = src1_chan;
    130   let Word0{25}    = src1_neg;
    131   let Word0{28-26} = index_mode;
    132   let Word0{30-29} = pred_sel;
    133   let Word0{31}    = last;
    134 }
    135 
    136 class R600ALU_Word1 {
    137   field bits<32> Word1;
    138 
    139   bits<11> dst;
    140   bits<3>  bank_swizzle = 0;
    141   bits<1>  dst_rel;
    142   bits<1>  clamp;
    143 
    144   bits<7>  dst_sel  = dst{6-0};
    145   bits<2>  dst_chan = dst{10-9};
    146 
    147   let Word1{20-18} = bank_swizzle;
    148   let Word1{27-21} = dst_sel;
    149   let Word1{28}    = dst_rel;
    150   let Word1{30-29} = dst_chan;
    151   let Word1{31}    = clamp;
    152 }
    153 
    154 class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
    155 
    156   bits<1>  src0_abs;
    157   bits<1>  src1_abs;
    158   bits<1>  update_exec_mask;
    159   bits<1>  update_pred;
    160   bits<1>  write;
    161   bits<2>  omod;
    162 
    163   let Word1{0}     = src0_abs;
    164   let Word1{1}     = src1_abs;
    165   let Word1{2}     = update_exec_mask;
    166   let Word1{3}     = update_pred;
    167   let Word1{4}     = write;
    168   let Word1{6-5}   = omod;
    169   let Word1{17-7}  = alu_inst;
    170 }
    171 
    172 class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
    173 
    174   bits<11> src2;
    175   bits<1>  src2_rel;
    176   bits<1>  src2_neg;
    177 
    178   bits<9>  src2_sel = src2{8-0};
    179   bits<2>  src2_chan = src2{10-9};
    180 
    181   let Word1{8-0}   = src2_sel;
    182   let Word1{9}     = src2_rel;
    183   let Word1{11-10} = src2_chan;
    184   let Word1{12}    = src2_neg;
    185   let Word1{17-13} = alu_inst;
    186 }
    187 
    188 class VTX_WORD0 {
    189   field bits<32> Word0;
    190   bits<7> SRC_GPR;
    191   bits<5> VC_INST;
    192   bits<2> FETCH_TYPE;
    193   bits<1> FETCH_WHOLE_QUAD;
    194   bits<8> BUFFER_ID;
    195   bits<1> SRC_REL;
    196   bits<2> SRC_SEL_X;
    197   bits<6> MEGA_FETCH_COUNT;
    198 
    199   let Word0{4-0}   = VC_INST;
    200   let Word0{6-5}   = FETCH_TYPE;
    201   let Word0{7}     = FETCH_WHOLE_QUAD;
    202   let Word0{15-8}  = BUFFER_ID;
    203   let Word0{22-16} = SRC_GPR;
    204   let Word0{23}    = SRC_REL;
    205   let Word0{25-24} = SRC_SEL_X;
    206   let Word0{31-26} = MEGA_FETCH_COUNT;
    207 }
    208 
    209 class VTX_WORD1_GPR {
    210   field bits<32> Word1;
    211   bits<7> DST_GPR;
    212   bits<1> DST_REL;
    213   bits<3> DST_SEL_X;
    214   bits<3> DST_SEL_Y;
    215   bits<3> DST_SEL_Z;
    216   bits<3> DST_SEL_W;
    217   bits<1> USE_CONST_FIELDS;
    218   bits<6> DATA_FORMAT;
    219   bits<2> NUM_FORMAT_ALL;
    220   bits<1> FORMAT_COMP_ALL;
    221   bits<1> SRF_MODE_ALL;
    222 
    223   let Word1{6-0} = DST_GPR;
    224   let Word1{7}    = DST_REL;
    225   let Word1{8}    = 0; // Reserved
    226   let Word1{11-9} = DST_SEL_X;
    227   let Word1{14-12} = DST_SEL_Y;
    228   let Word1{17-15} = DST_SEL_Z;
    229   let Word1{20-18} = DST_SEL_W;
    230   let Word1{21}    = USE_CONST_FIELDS;
    231   let Word1{27-22} = DATA_FORMAT;
    232   let Word1{29-28} = NUM_FORMAT_ALL;
    233   let Word1{30}    = FORMAT_COMP_ALL;
    234   let Word1{31}    = SRF_MODE_ALL;
    235 }
    236 
    237 /*
    238 XXX: R600 subtarget uses a slightly different encoding than the other
    239 subtargets.  We currently handle this in R600MCCodeEmitter, but we may
    240 want to use these instruction classes in the future.
    241 
    242 class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
    243 
    244   bits<1>  fog_merge;
    245   bits<10> alu_inst;
    246 
    247   let Inst{37}    = fog_merge;
    248   let Inst{39-38} = omod;
    249   let Inst{49-40} = alu_inst;
    250 }
    251 
    252 class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
    253 
    254   bits<11> alu_inst;
    255 
    256   let Inst{38-37} = omod;
    257   let Inst{49-39} = alu_inst;
    258 }
    259 */
    260 
    261 def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
    262                                      (ops PRED_SEL_OFF)>;
    263 
    264 
    265 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
    266 
    267 // Class for instructions with only one source register.
    268 // If you add new ins to this instruction, make sure they are listed before
    269 // $literal, because the backend currently assumes that the last operand is
    270 // a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
    271 // R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
    272 // and R600InstrInfo::getOperandIdx().
    273 class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
    274                 InstrItinClass itin = AnyALU> :
    275     InstR600 <0,
    276               (outs R600_Reg32:$dst),
    277               (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
    278                    R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
    279                    LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
    280               !strconcat(opName,
    281                    "$clamp $dst$write$dst_rel$omod, "
    282                    "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
    283                    "$literal $pred_sel$last"),
    284               pattern,
    285               itin>,
    286     R600ALU_Word0,
    287     R600ALU_Word1_OP2 <inst> {
    288 
    289   let src1 = 0;
    290   let src1_rel = 0;
    291   let src1_neg = 0;
    292   let src1_abs = 0;
    293   let update_exec_mask = 0;
    294   let update_pred = 0;
    295   let HasNativeOperands = 1;
    296   let Op1 = 1;
    297   let DisableEncoding = "$literal";
    298 
    299   let Inst{31-0}  = Word0;
    300   let Inst{63-32} = Word1;
    301 }
    302 
    303 class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
    304                     InstrItinClass itin = AnyALU> :
    305     R600_1OP <inst, opName,
    306               [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
    307 >;
    308 
    309 // If you add our change the operands for R600_2OP instructions, you must
    310 // also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
    311 // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
    312 class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
    313                 InstrItinClass itin = AnyALU> :
    314   InstR600 <inst,
    315           (outs R600_Reg32:$dst),
    316           (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
    317                OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
    318                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
    319                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
    320                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
    321           !strconcat(opName,
    322                 "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
    323                 "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
    324                 "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
    325                 "$literal $pred_sel$last"),
    326           pattern,
    327           itin>,
    328     R600ALU_Word0,
    329     R600ALU_Word1_OP2 <inst> {
    330 
    331   let HasNativeOperands = 1;
    332   let Op2 = 1;
    333   let DisableEncoding = "$literal";
    334 
    335   let Inst{31-0}  = Word0;
    336   let Inst{63-32} = Word1;
    337 }
    338 
    339 class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
    340                        InstrItinClass itim = AnyALU> :
    341     R600_2OP <inst, opName,
    342               [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
    343                                            R600_Reg32:$src1))]
    344 >;
    345 
    346 // If you add our change the operands for R600_3OP instructions, you must
    347 // also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
    348 // R600InstrInfo::buildDefaultInstruction(), and
    349 // R600InstrInfo::getOperandIdx().
    350 class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
    351                 InstrItinClass itin = AnyALU> :
    352   InstR600 <0,
    353           (outs R600_Reg32:$dst),
    354           (ins REL:$dst_rel, CLAMP:$clamp,
    355                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
    356                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
    357                R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
    358                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
    359           !strconcat(opName, "$clamp $dst$dst_rel, "
    360                              "$src0_neg$src0$src0_sel$src0_rel, "
    361                              "$src1_neg$src1$src1_sel$src1_rel, "
    362                              "$src2_neg$src2$src2_sel$src2_rel, "
    363                              "$literal $pred_sel$last"),
    364           pattern,
    365           itin>,
    366     R600ALU_Word0,
    367     R600ALU_Word1_OP3<inst>{
    368 
    369   let HasNativeOperands = 1;
    370   let DisableEncoding = "$literal";
    371   let Op3 = 1;
    372 
    373   let Inst{31-0}  = Word0;
    374   let Inst{63-32} = Word1;
    375 }
    376 
    377 class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
    378                       InstrItinClass itin = VecALU> :
    379   InstR600 <inst,
    380           (outs R600_Reg32:$dst),
    381           ins,
    382           asm,
    383           pattern,
    384           itin>;
    385 
    386 class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
    387                 InstrItinClass itin = AnyALU> :
    388   InstR600 <inst,
    389           (outs R600_Reg128:$dst),
    390           (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
    391           !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
    392           pattern,
    393           itin>{
    394     let Inst {10-0} = inst;
    395   }
    396 
    397 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
    398 
    399 def TEX_SHADOW : PatLeaf<
    400   (imm),
    401   [{uint32_t TType = (uint32_t)N->getZExtValue();
    402     return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
    403   }]
    404 >;
    405 
    406 def TEX_RECT : PatLeaf<
    407   (imm),
    408   [{uint32_t TType = (uint32_t)N->getZExtValue();
    409     return TType == 5;
    410   }]
    411 >;
    412 
    413 def TEX_ARRAY : PatLeaf<
    414   (imm),
    415   [{uint32_t TType = (uint32_t)N->getZExtValue();
    416     return TType == 9 || TType == 10 || TType == 15 || TType == 16;
    417   }]
    418 >;
    419 
    420 def TEX_SHADOW_ARRAY : PatLeaf<
    421   (imm),
    422   [{uint32_t TType = (uint32_t)N->getZExtValue();
    423     return TType == 11 || TType == 12 || TType == 17;
    424   }]
    425 >;
    426 
    427 class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
    428                  dag ins, string asm, list<dag> pattern> :
    429     InstR600ISA <outs, ins, asm, pattern> {
    430   bits<7>  RW_GPR;
    431   bits<7>  INDEX_GPR;
    432 
    433   bits<2>  RIM;
    434   bits<2>  TYPE;
    435   bits<1>  RW_REL;
    436   bits<2>  ELEM_SIZE;
    437 
    438   bits<12> ARRAY_SIZE;
    439   bits<4>  COMP_MASK;
    440   bits<4>  BURST_COUNT;
    441   bits<1>  VPM;
    442   bits<1>  eop;
    443   bits<1>  MARK;
    444   bits<1>  BARRIER;
    445 
    446   // CF_ALLOC_EXPORT_WORD0_RAT
    447   let Inst{3-0}   = rat_id;
    448   let Inst{9-4}   = rat_inst;
    449   let Inst{10}    = 0; // Reserved
    450   let Inst{12-11} = RIM;
    451   let Inst{14-13} = TYPE;
    452   let Inst{21-15} = RW_GPR;
    453   let Inst{22}    = RW_REL;
    454   let Inst{29-23} = INDEX_GPR;
    455   let Inst{31-30} = ELEM_SIZE;
    456 
    457   // CF_ALLOC_EXPORT_WORD1_BUF
    458   let Inst{43-32} = ARRAY_SIZE;
    459   let Inst{47-44} = COMP_MASK;
    460   let Inst{51-48} = BURST_COUNT;
    461   let Inst{52}    = VPM;
    462   let Inst{53}    = eop;
    463   let Inst{61-54} = cf_inst;
    464   let Inst{62}    = MARK;
    465   let Inst{63}    = BARRIER;
    466 }
    467 
    468 class LoadParamFrag <PatFrag load_type> : PatFrag <
    469   (ops node:$ptr), (load_type node:$ptr),
    470   [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
    471 >;
    472 
    473 def load_param : LoadParamFrag<load>;
    474 def load_param_zexti8 : LoadParamFrag<zextloadi8>;
    475 def load_param_zexti16 : LoadParamFrag<zextloadi16>;
    476 
    477 def isR600 : Predicate<"Subtarget.device()"
    478                             "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
    479 def isR700 : Predicate<"Subtarget.device()"
    480                             "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
    481                             "Subtarget.device()->getDeviceFlag()"
    482                             ">= OCL_DEVICE_RV710">;
    483 def isEG : Predicate<
    484   "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
    485   "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
    486   "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
    487 
    488 def isCayman : Predicate<"Subtarget.device()"
    489                             "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
    490 def isEGorCayman : Predicate<"Subtarget.device()"
    491                             "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
    492                             "|| Subtarget.device()->getGeneration() =="
    493                             "AMDGPUDeviceInfo::HD6XXX">;
    494 
    495 def isR600toCayman : Predicate<
    496                      "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
    497 
    498 //===----------------------------------------------------------------------===//
    499 // R600 SDNodes
    500 //===----------------------------------------------------------------------===//
    501 
    502 def INTERP_PAIR_XY :  AMDGPUShaderInst <
    503   (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
    504   (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
    505   "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
    506   []>;
    507 
    508 def INTERP_PAIR_ZW :  AMDGPUShaderInst <
    509   (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
    510   (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
    511   "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
    512   []>;
    513 
    514 def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
    515   SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
    516   [SDNPVariadic]
    517 >;
    518 
    519 //===----------------------------------------------------------------------===//
    520 // Interpolation Instructions
    521 //===----------------------------------------------------------------------===//
    522 
    523 def INTERP_VEC_LOAD :  AMDGPUShaderInst <
    524   (outs R600_Reg128:$dst),
    525   (ins i32imm:$src0),
    526   "INTERP_LOAD $src0 : $dst",
    527   []>;
    528 
    529 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
    530   let bank_swizzle = 5;
    531 }
    532 
    533 def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
    534   let bank_swizzle = 5;
    535 }
    536 
    537 def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
    538 
    539 //===----------------------------------------------------------------------===//
    540 // Export Instructions
    541 //===----------------------------------------------------------------------===//
    542 
    543 def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
    544 
    545 def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
    546   [SDNPHasChain, SDNPSideEffect]>;
    547 
    548 class ExportWord0 {
    549   field bits<32> Word0;
    550 
    551   bits<13> arraybase;
    552   bits<2> type;
    553   bits<7> gpr;
    554   bits<2> elem_size;
    555 
    556   let Word0{12-0} = arraybase;
    557   let Word0{14-13} = type;
    558   let Word0{21-15} = gpr;
    559   let Word0{22} = 0; // RW_REL
    560   let Word0{29-23} = 0; // INDEX_GPR
    561   let Word0{31-30} = elem_size;
    562 }
    563 
    564 class ExportSwzWord1 {
    565   field bits<32> Word1;
    566 
    567   bits<3> sw_x;
    568   bits<3> sw_y;
    569   bits<3> sw_z;
    570   bits<3> sw_w;
    571   bits<1> eop;
    572   bits<8> inst;
    573 
    574   let Word1{2-0} = sw_x;
    575   let Word1{5-3} = sw_y;
    576   let Word1{8-6} = sw_z;
    577   let Word1{11-9} = sw_w;
    578 }
    579 
    580 class ExportBufWord1 {
    581   field bits<32> Word1;
    582 
    583   bits<12> arraySize;
    584   bits<4> compMask;
    585   bits<1> eop;
    586   bits<8> inst;
    587 
    588   let Word1{11-0} = arraySize;
    589   let Word1{15-12} = compMask;
    590 }
    591 
    592 multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
    593   def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
    594     (ExportInst
    595         (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
    596         0, 61, 0, 7, 7, 7, cf_inst, 0)
    597   >;
    598 
    599   def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
    600     (ExportInst
    601         (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
    602         0, 61, 7, 0, 7, 7, cf_inst, 0)
    603   >;
    604 
    605   def : Pat<(int_R600_store_dummy (i32 imm:$type)),
    606     (ExportInst
    607         (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
    608   >;
    609 
    610   def : Pat<(int_R600_store_dummy 1),
    611     (ExportInst
    612         (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
    613   >;
    614 
    615   def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
    616     (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
    617         (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
    618         imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
    619   >;
    620 
    621 }
    622 
    623 multiclass SteamOutputExportPattern<Instruction ExportInst,
    624     bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
    625 // Stream0
    626   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    627       (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
    628       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    629       4095, imm:$mask, buf0inst, 0)>;
    630 // Stream1
    631   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    632       (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
    633       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    634       4095, imm:$mask, buf1inst, 0)>;
    635 // Stream2
    636   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    637       (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
    638       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    639       4095, imm:$mask, buf2inst, 0)>;
    640 // Stream3
    641   def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
    642       (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
    643       (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
    644       4095, imm:$mask, buf3inst, 0)>;
    645 }
    646 
    647 let usesCustomInserter = 1 in {
    648 
    649 class ExportSwzInst : InstR600ISA<(
    650     outs),
    651     (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
    652     i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
    653     i32imm:$eop),
    654     !strconcat("EXPORT", " $gpr"),
    655     []>, ExportWord0, ExportSwzWord1 {
    656   let elem_size = 3;
    657   let Inst{31-0} = Word0;
    658   let Inst{63-32} = Word1;
    659 }
    660 
    661 } // End usesCustomInserter = 1
    662 
    663 class ExportBufInst : InstR600ISA<(
    664     outs),
    665     (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
    666     i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
    667     !strconcat("EXPORT", " $gpr"),
    668     []>, ExportWord0, ExportBufWord1 {
    669   let elem_size = 0;
    670   let Inst{31-0} = Word0;
    671   let Inst{63-32} = Word1;
    672 }
    673 
    674 let Predicates = [isR600toCayman] in {
    675 
    676 //===----------------------------------------------------------------------===//
    677 // Common Instructions R600, R700, Evergreen, Cayman
    678 //===----------------------------------------------------------------------===//
    679 
    680 def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
    681 // Non-IEEE MUL: 0 * anything = 0
    682 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
    683 def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
    684 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
    685 def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
    686 
    687 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
    688 // so some of the instruction names don't match the asm string.
    689 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
    690 def SETE : R600_2OP <
    691   0x08, "SETE",
    692   [(set R600_Reg32:$dst,
    693    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
    694              COND_EQ))]
    695 >;
    696 
    697 def SGT : R600_2OP <
    698   0x09, "SETGT",
    699   [(set R600_Reg32:$dst,
    700    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
    701               COND_GT))]
    702 >;
    703 
    704 def SGE : R600_2OP <
    705   0xA, "SETGE",
    706   [(set R600_Reg32:$dst,
    707    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
    708               COND_GE))]
    709 >;
    710 
    711 def SNE : R600_2OP <
    712   0xB, "SETNE",
    713   [(set R600_Reg32:$dst,
    714    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
    715     COND_NE))]
    716 >;
    717 
    718 def SETE_DX10 : R600_2OP <
    719   0xC, "SETE_DX10",
    720   [(set R600_Reg32:$dst,
    721    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
    722     COND_EQ))]
    723 >;
    724 
    725 def SETGT_DX10 : R600_2OP <
    726   0xD, "SETGT_DX10",
    727   [(set R600_Reg32:$dst,
    728    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
    729     COND_GT))]
    730 >;
    731 
    732 def SETGE_DX10 : R600_2OP <
    733   0xE, "SETGE_DX10",
    734   [(set R600_Reg32:$dst,
    735    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
    736     COND_GE))]
    737 >;
    738 
    739 def SETNE_DX10 : R600_2OP <
    740   0xF, "SETNE_DX10",
    741   [(set R600_Reg32:$dst,
    742     (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
    743      COND_NE))]
    744 >;
    745 
    746 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
    747 def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
    748 def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
    749 def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
    750 def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
    751 
    752 def MOV : R600_1OP <0x19, "MOV", []>;
    753 
    754 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
    755 
    756 class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
    757   (outs R600_Reg32:$dst),
    758   (ins immType:$imm),
    759   "",
    760   []
    761 >;
    762 
    763 } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
    764 
    765 def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
    766 def : Pat <
    767   (imm:$val),
    768   (MOV_IMM_I32 imm:$val)
    769 >;
    770 
    771 def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
    772 def : Pat <
    773   (fpimm:$val),
    774   (MOV_IMM_F32  fpimm:$val)
    775 >;
    776 
    777 def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
    778 def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
    779 def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
    780 def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
    781 
    782 let hasSideEffects = 1 in {
    783 
    784 def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
    785 
    786 } // end hasSideEffects
    787 
    788 def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
    789 def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
    790 def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
    791 def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
    792 def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
    793 def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
    794 def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
    795 def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
    796 def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
    797 def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
    798 
    799 def SETE_INT : R600_2OP <
    800   0x3A, "SETE_INT",
    801   [(set (i32 R600_Reg32:$dst),
    802    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
    803 >;
    804 
    805 def SETGT_INT : R600_2OP <
    806   0x3B, "SETGT_INT",
    807   [(set (i32 R600_Reg32:$dst),
    808    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
    809 >;
    810 
    811 def SETGE_INT : R600_2OP <
    812   0x3C, "SETGE_INT",
    813   [(set (i32 R600_Reg32:$dst),
    814    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
    815 >;
    816 
    817 def SETNE_INT : R600_2OP <
    818   0x3D, "SETNE_INT",
    819   [(set (i32 R600_Reg32:$dst),
    820    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
    821 >;
    822 
    823 def SETGT_UINT : R600_2OP <
    824   0x3E, "SETGT_UINT",
    825   [(set (i32 R600_Reg32:$dst),
    826    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
    827 >;
    828 
    829 def SETGE_UINT : R600_2OP <
    830   0x3F, "SETGE_UINT",
    831   [(set (i32 R600_Reg32:$dst),
    832     (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
    833 >;
    834 
    835 def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
    836 def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
    837 def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
    838 def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
    839 
    840 def CNDE_INT : R600_3OP <
    841   0x1C, "CNDE_INT",
    842   [(set (i32 R600_Reg32:$dst),
    843    (selectcc (i32 R600_Reg32:$src0), 0,
    844        (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
    845        COND_EQ))]
    846 >;
    847 
    848 def CNDGE_INT : R600_3OP <
    849   0x1E, "CNDGE_INT",
    850   [(set (i32 R600_Reg32:$dst),
    851    (selectcc (i32 R600_Reg32:$src0), 0,
    852        (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
    853        COND_GE))]
    854 >;
    855 
    856 def CNDGT_INT : R600_3OP <
    857   0x1D, "CNDGT_INT",
    858   [(set (i32 R600_Reg32:$dst),
    859    (selectcc (i32 R600_Reg32:$src0), 0,
    860        (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
    861        COND_GT))]
    862 >;
    863 
    864 //===----------------------------------------------------------------------===//
    865 // Texture instructions
    866 //===----------------------------------------------------------------------===//
    867 
    868 def TEX_LD : R600_TEX <
    869   0x03, "TEX_LD",
    870   [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    871 > {
    872 let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
    873 let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
    874 }
    875 
    876 def TEX_GET_TEXTURE_RESINFO : R600_TEX <
    877   0x04, "TEX_GET_TEXTURE_RESINFO",
    878   [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    879 >;
    880 
    881 def TEX_GET_GRADIENTS_H : R600_TEX <
    882   0x07, "TEX_GET_GRADIENTS_H",
    883   [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    884 >;
    885 
    886 def TEX_GET_GRADIENTS_V : R600_TEX <
    887   0x08, "TEX_GET_GRADIENTS_V",
    888   [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    889 >;
    890 
    891 def TEX_SET_GRADIENTS_H : R600_TEX <
    892   0x0B, "TEX_SET_GRADIENTS_H",
    893   []
    894 >;
    895 
    896 def TEX_SET_GRADIENTS_V : R600_TEX <
    897   0x0C, "TEX_SET_GRADIENTS_V",
    898   []
    899 >;
    900 
    901 def TEX_SAMPLE : R600_TEX <
    902   0x10, "TEX_SAMPLE",
    903   [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    904 >;
    905 
    906 def TEX_SAMPLE_C : R600_TEX <
    907   0x18, "TEX_SAMPLE_C",
    908   [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
    909 >;
    910 
    911 def TEX_SAMPLE_L : R600_TEX <
    912   0x11, "TEX_SAMPLE_L",
    913   [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    914 >;
    915 
    916 def TEX_SAMPLE_C_L : R600_TEX <
    917   0x19, "TEX_SAMPLE_C_L",
    918   [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
    919 >;
    920 
    921 def TEX_SAMPLE_LB : R600_TEX <
    922   0x12, "TEX_SAMPLE_LB",
    923   [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
    924 >;
    925 
    926 def TEX_SAMPLE_C_LB : R600_TEX <
    927   0x1A, "TEX_SAMPLE_C_LB",
    928   [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
    929 >;
    930 
    931 def TEX_SAMPLE_G : R600_TEX <
    932   0x14, "TEX_SAMPLE_G",
    933   []
    934 >;
    935 
    936 def TEX_SAMPLE_C_G : R600_TEX <
    937   0x1C, "TEX_SAMPLE_C_G",
    938   []
    939 >;
    940 
    941 //===----------------------------------------------------------------------===//
    942 // Helper classes for common instructions
    943 //===----------------------------------------------------------------------===//
    944 
    945 class MUL_LIT_Common <bits<5> inst> : R600_3OP <
    946   inst, "MUL_LIT",
    947   []
    948 >;
    949 
    950 class MULADD_Common <bits<5> inst> : R600_3OP <
    951   inst, "MULADD",
    952   []
    953 >;
    954 
    955 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
    956   inst, "MULADD_IEEE",
    957   [(set (f32 R600_Reg32:$dst),
    958    (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
    959 >;
    960 
    961 class CNDE_Common <bits<5> inst> : R600_3OP <
    962   inst, "CNDE",
    963   [(set R600_Reg32:$dst,
    964    (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
    965        (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
    966        COND_EQ))]
    967 >;
    968 
    969 class CNDGT_Common <bits<5> inst> : R600_3OP <
    970   inst, "CNDGT",
    971   [(set R600_Reg32:$dst,
    972    (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
    973        (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
    974        COND_GT))]
    975 >;
    976 
    977 class CNDGE_Common <bits<5> inst> : R600_3OP <
    978   inst, "CNDGE",
    979   [(set R600_Reg32:$dst,
    980    (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
    981        (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
    982        COND_GE))]
    983 >;
    984 
    985 multiclass DOT4_Common <bits<11> inst> {
    986 
    987   def _pseudo : R600_REDUCTION <inst,
    988     (ins R600_Reg128:$src0, R600_Reg128:$src1),
    989     "DOT4 $dst $src0, $src1",
    990     [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
    991   >;
    992 
    993   def _real : R600_2OP <inst, "DOT4", []>;
    994 }
    995 
    996 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
    997 multiclass CUBE_Common <bits<11> inst> {
    998 
    999   def _pseudo : InstR600 <
   1000     inst,
   1001     (outs R600_Reg128:$dst),
   1002     (ins R600_Reg128:$src),
   1003     "CUBE $dst $src",
   1004     [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
   1005     VecALU
   1006   > {
   1007     let isPseudo = 1;
   1008   }
   1009 
   1010   def _real : R600_2OP <inst, "CUBE", []>;
   1011 }
   1012 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
   1013 
   1014 class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   1015   inst, "EXP_IEEE", fexp2
   1016 >;
   1017 
   1018 class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
   1019   inst, "FLT_TO_INT", fp_to_sint
   1020 >;
   1021 
   1022 class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   1023   inst, "INT_TO_FLT", sint_to_fp
   1024 >;
   1025 
   1026 class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   1027   inst, "FLT_TO_UINT", fp_to_uint
   1028 >;
   1029 
   1030 class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   1031   inst, "UINT_TO_FLT", uint_to_fp
   1032 >;
   1033 
   1034 class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
   1035   inst, "LOG_CLAMPED", []
   1036 >;
   1037 
   1038 class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   1039   inst, "LOG_IEEE", flog2
   1040 >;
   1041 
   1042 class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
   1043 class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
   1044 class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
   1045 class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
   1046   inst, "MULHI_INT", mulhs
   1047 >;
   1048 class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
   1049   inst, "MULHI", mulhu
   1050 >;
   1051 class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
   1052   inst, "MULLO_INT", mul
   1053 >;
   1054 class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
   1055 
   1056 class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
   1057   inst, "RECIP_CLAMPED", []
   1058 >;
   1059 
   1060 class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
   1061   inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
   1062 >;
   1063 
   1064 class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   1065   inst, "RECIP_UINT", AMDGPUurecip
   1066 >;
   1067 
   1068 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
   1069   inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
   1070 >;
   1071 
   1072 class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
   1073   inst, "RECIPSQRT_IEEE", []
   1074 >;
   1075 
   1076 class SIN_Common <bits<11> inst> : R600_1OP <
   1077   inst, "SIN", []>{
   1078   let Trig = 1;
   1079 }
   1080 
   1081 class COS_Common <bits<11> inst> : R600_1OP <
   1082   inst, "COS", []> {
   1083   let Trig = 1;
   1084 }
   1085 
   1086 //===----------------------------------------------------------------------===//
   1087 // Helper patterns for complex intrinsics
   1088 //===----------------------------------------------------------------------===//
   1089 
   1090 multiclass DIV_Common <InstR600 recip_ieee> {
   1091 def : Pat<
   1092   (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
   1093   (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
   1094 >;
   1095 
   1096 def : Pat<
   1097   (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
   1098   (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
   1099 >;
   1100 }
   1101 
   1102 class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
   1103   (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
   1104   (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
   1105 >;
   1106 
   1107 //===----------------------------------------------------------------------===//
   1108 // R600 / R700 Instructions
   1109 //===----------------------------------------------------------------------===//
   1110 
   1111 let Predicates = [isR600] in {
   1112 
   1113   def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
   1114   def MULADD_r600 : MULADD_Common<0x10>;
   1115   def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
   1116   def CNDE_r600 : CNDE_Common<0x18>;
   1117   def CNDGT_r600 : CNDGT_Common<0x19>;
   1118   def CNDGE_r600 : CNDGE_Common<0x1A>;
   1119   defm DOT4_r600 : DOT4_Common<0x50>;
   1120   defm CUBE_r600 : CUBE_Common<0x52>;
   1121   def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
   1122   def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
   1123   def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
   1124   def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
   1125   def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
   1126   def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
   1127   def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
   1128   def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
   1129   def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
   1130   def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
   1131   def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
   1132   def SIN_r600 : SIN_Common<0x6E>;
   1133   def COS_r600 : COS_Common<0x6F>;
   1134   def ASHR_r600 : ASHR_Common<0x70>;
   1135   def LSHR_r600 : LSHR_Common<0x71>;
   1136   def LSHL_r600 : LSHL_Common<0x72>;
   1137   def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
   1138   def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
   1139   def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
   1140   def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
   1141   def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
   1142 
   1143   defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
   1144   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
   1145 
   1146   def : Pat<(fsqrt R600_Reg32:$src),
   1147     (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
   1148 
   1149   def R600_ExportSwz : ExportSwzInst {
   1150     let Word1{20-17} = 1; // BURST_COUNT
   1151     let Word1{21} = eop;
   1152     let Word1{22} = 1; // VALID_PIXEL_MODE
   1153     let Word1{30-23} = inst;
   1154     let Word1{31} = 1; // BARRIER
   1155   }
   1156   defm : ExportPattern<R600_ExportSwz, 39>;
   1157 
   1158   def R600_ExportBuf : ExportBufInst {
   1159     let Word1{20-17} = 1; // BURST_COUNT
   1160     let Word1{21} = eop;
   1161     let Word1{22} = 1; // VALID_PIXEL_MODE
   1162     let Word1{30-23} = inst;
   1163     let Word1{31} = 1; // BARRIER
   1164   }
   1165   defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
   1166 }
   1167 
   1168 // Helper pattern for normalizing inputs to triginomic instructions for R700+
   1169 // cards.
   1170 class COS_PAT <InstR600 trig> : Pat<
   1171   (fcos R600_Reg32:$src),
   1172   (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
   1173 >;
   1174 
   1175 class SIN_PAT <InstR600 trig> : Pat<
   1176   (fsin R600_Reg32:$src),
   1177   (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
   1178 >;
   1179 
   1180 //===----------------------------------------------------------------------===//
   1181 // R700 Only instructions
   1182 //===----------------------------------------------------------------------===//
   1183 
   1184 let Predicates = [isR700] in {
   1185   def SIN_r700 : SIN_Common<0x6E>;
   1186   def COS_r700 : COS_Common<0x6F>;
   1187 
   1188   // R700 normalizes inputs to SIN/COS the same as EG
   1189   def : SIN_PAT <SIN_r700>;
   1190   def : COS_PAT <COS_r700>;
   1191 }
   1192 
   1193 //===----------------------------------------------------------------------===//
   1194 // Evergreen Only instructions
   1195 //===----------------------------------------------------------------------===//
   1196 
   1197 let Predicates = [isEG] in {
   1198 
   1199 def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
   1200 defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
   1201 
   1202 def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
   1203 def MULHI_INT_eg : MULHI_INT_Common<0x90>;
   1204 def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
   1205 def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
   1206 def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
   1207 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
   1208 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
   1209 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
   1210 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
   1211 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
   1212 def SIN_eg : SIN_Common<0x8D>;
   1213 def COS_eg : COS_Common<0x8E>;
   1214 
   1215 def : SIN_PAT <SIN_eg>;
   1216 def : COS_PAT <COS_eg>;
   1217 def : Pat<(fsqrt R600_Reg32:$src),
   1218   (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
   1219 } // End Predicates = [isEG]
   1220 
   1221 //===----------------------------------------------------------------------===//
   1222 // Evergreen / Cayman Instructions
   1223 //===----------------------------------------------------------------------===//
   1224 
   1225 let Predicates = [isEGorCayman] in {
   1226 
   1227   // BFE_UINT - bit_extract, an optimization for mask and shift
   1228   // Src0 = Input
   1229   // Src1 = Offset
   1230   // Src2 = Width
   1231   //
   1232   // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
   1233   //
   1234   // Example Usage:
   1235   // (Offset, Width)
   1236   //
   1237   // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0
   1238   // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8
   1239   // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
   1240   // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
   1241   def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
   1242     [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
   1243                                                       R600_Reg32:$src1,
   1244                                                       R600_Reg32:$src2))],
   1245     VecALU
   1246   >;
   1247 
   1248   def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
   1249     [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
   1250                                           R600_Reg32:$src2))],
   1251     VecALU
   1252   >;
   1253 
   1254   def MULADD_eg : MULADD_Common<0x14>;
   1255   def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
   1256   def ASHR_eg : ASHR_Common<0x15>;
   1257   def LSHR_eg : LSHR_Common<0x16>;
   1258   def LSHL_eg : LSHL_Common<0x17>;
   1259   def CNDE_eg : CNDE_Common<0x19>;
   1260   def CNDGT_eg : CNDGT_Common<0x1A>;
   1261   def CNDGE_eg : CNDGE_Common<0x1B>;
   1262   def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
   1263   def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
   1264   defm DOT4_eg : DOT4_Common<0xBE>;
   1265   defm CUBE_eg : CUBE_Common<0xC0>;
   1266 
   1267 let hasSideEffects = 1 in {
   1268   def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
   1269 }
   1270 
   1271   def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
   1272 
   1273   def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
   1274     let Pattern = [];
   1275   }
   1276 
   1277   def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
   1278 
   1279   def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
   1280     let Pattern = [];
   1281   }
   1282 
   1283   def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
   1284 
   1285   // TRUNC is used for the FLT_TO_INT instructions to work around a
   1286   // perceived problem where the rounding modes are applied differently
   1287   // depending on the instruction and the slot they are in.
   1288   // See:
   1289   // https://bugs.freedesktop.org/show_bug.cgi?id=50232
   1290   // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
   1291   //
   1292   // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
   1293   // which do not need to be truncated since the fp values are 0.0f or 1.0f.
   1294   // We should look into handling these cases separately.
   1295   def : Pat<(fp_to_sint R600_Reg32:$src0),
   1296     (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
   1297 
   1298   def : Pat<(fp_to_uint R600_Reg32:$src0),
   1299     (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
   1300 
   1301   def EG_ExportSwz : ExportSwzInst {
   1302     let Word1{19-16} = 1; // BURST_COUNT
   1303     let Word1{20} = 1; // VALID_PIXEL_MODE
   1304     let Word1{21} = eop;
   1305     let Word1{29-22} = inst;
   1306     let Word1{30} = 0; // MARK
   1307     let Word1{31} = 1; // BARRIER
   1308   }
   1309   defm : ExportPattern<EG_ExportSwz, 83>;
   1310 
   1311   def EG_ExportBuf : ExportBufInst {
   1312     let Word1{19-16} = 1; // BURST_COUNT
   1313     let Word1{20} = 1; // VALID_PIXEL_MODE
   1314     let Word1{21} = eop;
   1315     let Word1{29-22} = inst;
   1316     let Word1{30} = 0; // MARK
   1317     let Word1{31} = 1; // BARRIER
   1318   }
   1319   defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
   1320 
   1321 //===----------------------------------------------------------------------===//
   1322 // Memory read/write instructions
   1323 //===----------------------------------------------------------------------===//
   1324 let usesCustomInserter = 1 in {
   1325 
   1326 class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
   1327                               list<dag> pattern>
   1328     : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
   1329                  !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
   1330   let RIM         = 0;
   1331   // XXX: Have a separate instruction for non-indexed writes.
   1332   let TYPE        = 1;
   1333   let RW_REL      = 0;
   1334   let ELEM_SIZE   = 0;
   1335 
   1336   let ARRAY_SIZE  = 0;
   1337   let COMP_MASK   = comp_mask;
   1338   let BURST_COUNT = 0;
   1339   let VPM         = 0;
   1340   let MARK        = 0;
   1341   let BARRIER     = 1;
   1342 }
   1343 
   1344 } // End usesCustomInserter = 1
   1345 
   1346 // 32-bit store
   1347 def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
   1348   (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   1349   0x1, "RAT_WRITE_CACHELESS_32_eg",
   1350   [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
   1351 >;
   1352 
   1353 //128-bit store
   1354 def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
   1355   (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   1356   0xf, "RAT_WRITE_CACHELESS_128",
   1357   [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
   1358 >;
   1359 
   1360 class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   1361     : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
   1362       VTX_WORD1_GPR, VTX_WORD0 {
   1363 
   1364   // Static fields
   1365   let VC_INST = 0;
   1366   let FETCH_TYPE = 2;
   1367   let FETCH_WHOLE_QUAD = 0;
   1368   let BUFFER_ID = buffer_id;
   1369   let SRC_REL = 0;
   1370   // XXX: We can infer this field based on the SRC_GPR.  This would allow us
   1371   // to store vertex addresses in any channel, not just X.
   1372   let SRC_SEL_X = 0;
   1373   let DST_REL = 0;
   1374   // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
   1375   // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
   1376   // however, based on my testing if USE_CONST_FIELDS is set, then all
   1377   // these fields need to be set to 0.
   1378   let USE_CONST_FIELDS = 0;
   1379   let NUM_FORMAT_ALL = 1;
   1380   let FORMAT_COMP_ALL = 0;
   1381   let SRF_MODE_ALL = 0;
   1382 
   1383   let Inst{31-0} = Word0;
   1384   let Inst{63-32} = Word1;
   1385   // LLVM can only encode 64-bit instructions, so these fields are manually
   1386   // encoded in R600CodeEmitter
   1387   //
   1388   // bits<16> OFFSET;
   1389   // bits<2>  ENDIAN_SWAP = 0;
   1390   // bits<1>  CONST_BUF_NO_STRIDE = 0;
   1391   // bits<1>  MEGA_FETCH = 0;
   1392   // bits<1>  ALT_CONST = 0;
   1393   // bits<2>  BUFFER_INDEX_MODE = 0;
   1394 
   1395 
   1396 
   1397   // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
   1398   // is done in R600CodeEmitter
   1399   //
   1400   // Inst{79-64} = OFFSET;
   1401   // Inst{81-80} = ENDIAN_SWAP;
   1402   // Inst{82}    = CONST_BUF_NO_STRIDE;
   1403   // Inst{83}    = MEGA_FETCH;
   1404   // Inst{84}    = ALT_CONST;
   1405   // Inst{86-85} = BUFFER_INDEX_MODE;
   1406   // Inst{95-86} = 0; Reserved
   1407 
   1408   // VTX_WORD3 (Padding)
   1409   //
   1410   // Inst{127-96} = 0;
   1411 }
   1412 
   1413 class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
   1414     : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
   1415                    pattern> {
   1416 
   1417   let MEGA_FETCH_COUNT = 1;
   1418   let DST_SEL_X = 0;
   1419   let DST_SEL_Y = 7;   // Masked
   1420   let DST_SEL_Z = 7;   // Masked
   1421   let DST_SEL_W = 7;   // Masked
   1422   let DATA_FORMAT = 1; // FMT_8
   1423 }
   1424 
   1425 class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
   1426     : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
   1427                     pattern> {
   1428   let MEGA_FETCH_COUNT = 2;
   1429   let DST_SEL_X = 0;
   1430   let DST_SEL_Y = 7;   // Masked
   1431   let DST_SEL_Z = 7;   // Masked
   1432   let DST_SEL_W = 7;   // Masked
   1433   let DATA_FORMAT = 5; // FMT_16
   1434 
   1435 }
   1436 
   1437 class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
   1438     : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
   1439                    pattern> {
   1440 
   1441   let MEGA_FETCH_COUNT = 4;
   1442   let DST_SEL_X        = 0;
   1443   let DST_SEL_Y        = 7;   // Masked
   1444   let DST_SEL_Z        = 7;   // Masked
   1445   let DST_SEL_W        = 7;   // Masked
   1446   let DATA_FORMAT      = 0xD; // COLOR_32
   1447 
   1448   // This is not really necessary, but there were some GPU hangs that appeared
   1449   // to be caused by ALU instructions in the next instruction group that wrote
   1450   // to the $ptr registers of the VTX_READ.
   1451   // e.g.
   1452   // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
   1453   // %T2_X<def> = MOV %ZERO
   1454   //Adding this constraint prevents this from happening.
   1455   let Constraints = "$ptr.ptr = $dst";
   1456 }
   1457 
   1458 class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
   1459     : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
   1460                    pattern> {
   1461 
   1462   let MEGA_FETCH_COUNT = 16;
   1463   let DST_SEL_X        =  0;
   1464   let DST_SEL_Y        =  1;
   1465   let DST_SEL_Z        =  2;
   1466   let DST_SEL_W        =  3;
   1467   let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
   1468 
   1469   // XXX: Need to force VTX_READ_128 instructions to write to the same register
   1470   // that holds its buffer address to avoid potential hangs.  We can't use
   1471   // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
   1472   // registers are different sizes.
   1473 }
   1474 
   1475 //===----------------------------------------------------------------------===//
   1476 // VTX Read from parameter memory space
   1477 //===----------------------------------------------------------------------===//
   1478 
   1479 def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
   1480   [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
   1481 >;
   1482 
   1483 def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
   1484   [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
   1485 >;
   1486 
   1487 def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
   1488   [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
   1489 >;
   1490 
   1491 def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
   1492   [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
   1493 >;
   1494 
   1495 //===----------------------------------------------------------------------===//
   1496 // VTX Read from global memory space
   1497 //===----------------------------------------------------------------------===//
   1498 
   1499 // 8-bit reads
   1500 def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
   1501   [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
   1502 >;
   1503 
   1504 // 32-bit reads
   1505 def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
   1506   [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
   1507 >;
   1508 
   1509 // 128-bit reads
   1510 def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
   1511   [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
   1512 >;
   1513 
   1514 //===----------------------------------------------------------------------===//
   1515 // Constant Loads
   1516 // XXX: We are currently storing all constants in the global address space.
   1517 //===----------------------------------------------------------------------===//
   1518 
   1519 def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
   1520   [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
   1521 >;
   1522 
   1523 }
   1524 
   1525 //===----------------------------------------------------------------------===//
   1526 // Regist loads and stores - for indirect addressing
   1527 //===----------------------------------------------------------------------===//
   1528 
   1529 defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
   1530 
   1531 let Predicates = [isCayman] in {
   1532 
   1533 let isVector = 1 in {
   1534 
   1535 def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
   1536 
   1537 def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
   1538 def MULHI_INT_cm : MULHI_INT_Common<0x90>;
   1539 def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
   1540 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
   1541 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
   1542 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
   1543 def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
   1544 def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
   1545 def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
   1546 def SIN_cm : SIN_Common<0x8D>;
   1547 def COS_cm : COS_Common<0x8E>;
   1548 } // End isVector = 1
   1549 
   1550 def : SIN_PAT <SIN_cm>;
   1551 def : COS_PAT <COS_cm>;
   1552 
   1553 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
   1554 
   1555 // RECIP_UINT emulation for Cayman
   1556 def : Pat <
   1557   (AMDGPUurecip R600_Reg32:$src0),
   1558   (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
   1559                             (MOV_IMM_I32 0x4f800000)))
   1560 >;
   1561 
   1562 
   1563 def : Pat<(fsqrt R600_Reg32:$src),
   1564   (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
   1565 
   1566 } // End isCayman
   1567 
   1568 //===----------------------------------------------------------------------===//
   1569 // Branch Instructions
   1570 //===----------------------------------------------------------------------===//
   1571 
   1572 
   1573 def IF_PREDICATE_SET  : ILFormat<(outs), (ins GPRI32:$src),
   1574   "IF_PREDICATE_SET $src", []>;
   1575 
   1576 def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
   1577   "PREDICATED_BREAK $src", []>;
   1578 
   1579 //===----------------------------------------------------------------------===//
   1580 // Pseudo instructions
   1581 //===----------------------------------------------------------------------===//
   1582 
   1583 let isPseudo = 1 in {
   1584 
   1585 def PRED_X : InstR600 <
   1586   0, (outs R600_Predicate_Bit:$dst),
   1587   (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
   1588   "", [], NullALU> {
   1589   let FlagOperandIdx = 3;
   1590 }
   1591 
   1592 let isTerminator = 1, isBranch = 1 in {
   1593 def JUMP_COND : InstR600 <0x10,
   1594           (outs),
   1595           (ins brtarget:$target, R600_Predicate_Bit:$p),
   1596           "JUMP $target ($p)",
   1597           [], AnyALU
   1598   >;
   1599 
   1600 def JUMP : InstR600 <0x10,
   1601           (outs),
   1602           (ins brtarget:$target),
   1603           "JUMP $target",
   1604           [], AnyALU
   1605   >
   1606 {
   1607   let isPredicable = 1;
   1608   let isBarrier = 1;
   1609 }
   1610 
   1611 }  // End isTerminator = 1, isBranch = 1
   1612 
   1613 let usesCustomInserter = 1 in {
   1614 
   1615 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
   1616 
   1617 def MASK_WRITE : AMDGPUShaderInst <
   1618     (outs),
   1619     (ins R600_Reg32:$src),
   1620     "MASK_WRITE $src",
   1621     []
   1622 >;
   1623 
   1624 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
   1625 
   1626 
   1627 def TXD: AMDGPUShaderInst <
   1628   (outs R600_Reg128:$dst),
   1629   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   1630   "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   1631   [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
   1632 >;
   1633 
   1634 def TXD_SHADOW: AMDGPUShaderInst <
   1635   (outs R600_Reg128:$dst),
   1636   (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   1637   "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
   1638   [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
   1639 >;
   1640 
   1641 } // End isPseudo = 1
   1642 } // End usesCustomInserter = 1
   1643 
   1644 def CLAMP_R600 :  CLAMP <R600_Reg32>;
   1645 def FABS_R600 : FABS<R600_Reg32>;
   1646 def FNEG_R600 : FNEG<R600_Reg32>;
   1647 
   1648 //===---------------------------------------------------------------------===//
   1649 // Return instruction
   1650 //===---------------------------------------------------------------------===//
   1651 let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
   1652     usesCustomInserter = 1 in {
   1653   def RETURN          : ILFormat<(outs), (ins variable_ops),
   1654       "RETURN", [(IL_retflag)]>;
   1655 }
   1656 
   1657 
   1658 //===----------------------------------------------------------------------===//
   1659 // Constant Buffer Addressing Support
   1660 //===----------------------------------------------------------------------===//
   1661 
   1662 let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
   1663 def CONST_COPY : Instruction {
   1664   let OutOperandList = (outs R600_Reg32:$dst);
   1665   let InOperandList = (ins i32imm:$src);
   1666   let Pattern =
   1667       [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
   1668   let AsmString = "CONST_COPY";
   1669   let neverHasSideEffects = 1;
   1670   let isAsCheapAsAMove = 1;
   1671   let Itinerary = NullALU;
   1672 }
   1673 } // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
   1674 
   1675 def TEX_VTX_CONSTBUF :
   1676   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
   1677       [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
   1678   VTX_WORD1_GPR, VTX_WORD0 {
   1679 
   1680   let VC_INST = 0;
   1681   let FETCH_TYPE = 2;
   1682   let FETCH_WHOLE_QUAD = 0;
   1683   let SRC_REL = 0;
   1684   let SRC_SEL_X = 0;
   1685   let DST_REL = 0;
   1686   let USE_CONST_FIELDS = 0;
   1687   let NUM_FORMAT_ALL = 2;
   1688   let FORMAT_COMP_ALL = 1;
   1689   let SRF_MODE_ALL = 1;
   1690   let MEGA_FETCH_COUNT = 16;
   1691   let DST_SEL_X        = 0;
   1692   let DST_SEL_Y        = 1;
   1693   let DST_SEL_Z        = 2;
   1694   let DST_SEL_W        = 3;
   1695   let DATA_FORMAT      = 35;
   1696 
   1697   let Inst{31-0} = Word0;
   1698   let Inst{63-32} = Word1;
   1699 
   1700 // LLVM can only encode 64-bit instructions, so these fields are manually
   1701 // encoded in R600CodeEmitter
   1702 //
   1703 // bits<16> OFFSET;
   1704 // bits<2>  ENDIAN_SWAP = 0;
   1705 // bits<1>  CONST_BUF_NO_STRIDE = 0;
   1706 // bits<1>  MEGA_FETCH = 0;
   1707 // bits<1>  ALT_CONST = 0;
   1708 // bits<2>  BUFFER_INDEX_MODE = 0;
   1709 
   1710 
   1711 
   1712 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
   1713 // is done in R600CodeEmitter
   1714 //
   1715 // Inst{79-64} = OFFSET;
   1716 // Inst{81-80} = ENDIAN_SWAP;
   1717 // Inst{82}    = CONST_BUF_NO_STRIDE;
   1718 // Inst{83}    = MEGA_FETCH;
   1719 // Inst{84}    = ALT_CONST;
   1720 // Inst{86-85} = BUFFER_INDEX_MODE;
   1721 // Inst{95-86} = 0; Reserved
   1722 
   1723 // VTX_WORD3 (Padding)
   1724 //
   1725 // Inst{127-96} = 0;
   1726 }
   1727 
   1728 def TEX_VTX_TEXBUF:
   1729   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
   1730       [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
   1731 VTX_WORD1_GPR, VTX_WORD0 {
   1732 
   1733 let VC_INST = 0;
   1734 let FETCH_TYPE = 2;
   1735 let FETCH_WHOLE_QUAD = 0;
   1736 let SRC_REL = 0;
   1737 let SRC_SEL_X = 0;
   1738 let DST_REL = 0;
   1739 let USE_CONST_FIELDS = 1;
   1740 let NUM_FORMAT_ALL = 0;
   1741 let FORMAT_COMP_ALL = 0;
   1742 let SRF_MODE_ALL = 1;
   1743 let MEGA_FETCH_COUNT = 16;
   1744 let DST_SEL_X        = 0;
   1745 let DST_SEL_Y        = 1;
   1746 let DST_SEL_Z        = 2;
   1747 let DST_SEL_W        = 3;
   1748 let DATA_FORMAT      = 0;
   1749 
   1750 let Inst{31-0} = Word0;
   1751 let Inst{63-32} = Word1;
   1752 
   1753 // LLVM can only encode 64-bit instructions, so these fields are manually
   1754 // encoded in R600CodeEmitter
   1755 //
   1756 // bits<16> OFFSET;
   1757 // bits<2>  ENDIAN_SWAP = 0;
   1758 // bits<1>  CONST_BUF_NO_STRIDE = 0;
   1759 // bits<1>  MEGA_FETCH = 0;
   1760 // bits<1>  ALT_CONST = 0;
   1761 // bits<2>  BUFFER_INDEX_MODE = 0;
   1762 
   1763 
   1764 
   1765 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
   1766 // is done in R600CodeEmitter
   1767 //
   1768 // Inst{79-64} = OFFSET;
   1769 // Inst{81-80} = ENDIAN_SWAP;
   1770 // Inst{82}    = CONST_BUF_NO_STRIDE;
   1771 // Inst{83}    = MEGA_FETCH;
   1772 // Inst{84}    = ALT_CONST;
   1773 // Inst{86-85} = BUFFER_INDEX_MODE;
   1774 // Inst{95-86} = 0; Reserved
   1775 
   1776 // VTX_WORD3 (Padding)
   1777 //
   1778 // Inst{127-96} = 0;
   1779 }
   1780 
   1781 
   1782 
   1783 //===--------------------------------------------------------------------===//
   1784 // Instructions support
   1785 //===--------------------------------------------------------------------===//
   1786 //===---------------------------------------------------------------------===//
   1787 // Custom Inserter for Branches and returns, this eventually will be a
   1788 // seperate pass
   1789 //===---------------------------------------------------------------------===//
   1790 let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
   1791   def BRANCH : ILFormat<(outs), (ins brtarget:$target),
   1792       "; Pseudo unconditional branch instruction",
   1793       [(br bb:$target)]>;
   1794   defm BRANCH_COND : BranchConditional<IL_brcond>;
   1795 }
   1796 
   1797 //===---------------------------------------------------------------------===//
   1798 // Flow and Program control Instructions
   1799 //===---------------------------------------------------------------------===//
   1800 let isTerminator=1 in {
   1801   def SWITCH      : ILFormat< (outs), (ins GPRI32:$src),
   1802   !strconcat("SWITCH", " $src"), []>;
   1803   def CASE        : ILFormat< (outs), (ins GPRI32:$src),
   1804       !strconcat("CASE", " $src"), []>;
   1805   def BREAK       : ILFormat< (outs), (ins),
   1806       "BREAK", []>;
   1807   def CONTINUE    : ILFormat< (outs), (ins),
   1808       "CONTINUE", []>;
   1809   def DEFAULT     : ILFormat< (outs), (ins),
   1810       "DEFAULT", []>;
   1811   def ELSE        : ILFormat< (outs), (ins),
   1812       "ELSE", []>;
   1813   def ENDSWITCH   : ILFormat< (outs), (ins),
   1814       "ENDSWITCH", []>;
   1815   def ENDMAIN     : ILFormat< (outs), (ins),
   1816       "ENDMAIN", []>;
   1817   def END         : ILFormat< (outs), (ins),
   1818       "END", []>;
   1819   def ENDFUNC     : ILFormat< (outs), (ins),
   1820       "ENDFUNC", []>;
   1821   def ENDIF       : ILFormat< (outs), (ins),
   1822       "ENDIF", []>;
   1823   def WHILELOOP   : ILFormat< (outs), (ins),
   1824       "WHILE", []>;
   1825   def ENDLOOP     : ILFormat< (outs), (ins),
   1826       "ENDLOOP", []>;
   1827   def FUNC        : ILFormat< (outs), (ins),
   1828       "FUNC", []>;
   1829   def RETDYN      : ILFormat< (outs), (ins),
   1830       "RET_DYN", []>;
   1831   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   1832   defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
   1833   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   1834   defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
   1835   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   1836   defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
   1837   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   1838   defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
   1839   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   1840   defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
   1841   // This opcode has custom swizzle pattern encoded in Swizzle Encoder
   1842   defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
   1843   defm IFC         : BranchInstr2<"IFC">;
   1844   defm BREAKC      : BranchInstr2<"BREAKC">;
   1845   defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
   1846 }
   1847 
   1848 //===----------------------------------------------------------------------===//
   1849 // ISel Patterns
   1850 //===----------------------------------------------------------------------===//
   1851 
   1852 // CND*_INT Pattterns for f32 True / False values
   1853 
   1854 class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
   1855   (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
   1856                                             R600_Reg32:$src2, cc),
   1857   (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
   1858 >;
   1859 
   1860 def : CND_INT_f32 <CNDE_INT,  SETEQ>;
   1861 def : CND_INT_f32 <CNDGT_INT, SETGT>;
   1862 def : CND_INT_f32 <CNDGE_INT, SETGE>;
   1863 
   1864 //CNDGE_INT extra pattern
   1865 def : Pat <
   1866   (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
   1867                                         (i32 R600_Reg32:$src2), COND_GT),
   1868   (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
   1869 >;
   1870 
   1871 // KIL Patterns
   1872 def KILP : Pat <
   1873   (int_AMDGPU_kilp),
   1874   (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
   1875 >;
   1876 
   1877 def KIL : Pat <
   1878   (int_AMDGPU_kill R600_Reg32:$src0),
   1879   (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
   1880 >;
   1881 
   1882 // SGT Reverse args
   1883 def : Pat <
   1884   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
   1885   (SGT R600_Reg32:$src1, R600_Reg32:$src0)
   1886 >;
   1887 
   1888 // SGE Reverse args
   1889 def : Pat <
   1890   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
   1891   (SGE R600_Reg32:$src1, R600_Reg32:$src0)
   1892 >;
   1893 
   1894 // SETGT_DX10 reverse args
   1895 def : Pat <
   1896   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
   1897   (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
   1898 >;
   1899 
   1900 // SETGE_DX10 reverse args
   1901 def : Pat <
   1902   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
   1903   (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
   1904 >;
   1905 
   1906 // SETGT_INT reverse args
   1907 def : Pat <
   1908   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
   1909   (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
   1910 >;
   1911 
   1912 // SETGE_INT reverse args
   1913 def : Pat <
   1914   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
   1915   (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
   1916 >;
   1917 
   1918 // SETGT_UINT reverse args
   1919 def : Pat <
   1920   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
   1921   (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
   1922 >;
   1923 
   1924 // SETGE_UINT reverse args
   1925 def : Pat <
   1926   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
   1927   (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
   1928 >;
   1929 
   1930 // The next two patterns are special cases for handling 'true if ordered' and
   1931 // 'true if unordered' conditionals.  The assumption here is that the behavior of
   1932 // SETE and SNE conforms to the Direct3D 10 rules for floating point values
   1933 // described here:
   1934 // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
   1935 // We assume that  SETE returns false when one of the operands is NAN and
   1936 // SNE returns true when on of the operands is NAN
   1937 
   1938 //SETE - 'true if ordered'
   1939 def : Pat <
   1940   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
   1941   (SETE R600_Reg32:$src0, R600_Reg32:$src1)
   1942 >;
   1943 
   1944 //SETE_DX10 - 'true if ordered'
   1945 def : Pat <
   1946   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
   1947   (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
   1948 >;
   1949 
   1950 //SNE - 'true if unordered'
   1951 def : Pat <
   1952   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
   1953   (SNE R600_Reg32:$src0, R600_Reg32:$src1)
   1954 >;
   1955 
   1956 //SETNE_DX10 - 'true if ordered'
   1957 def : Pat <
   1958   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
   1959   (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
   1960 >;
   1961 
   1962 def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
   1963 def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
   1964 def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
   1965 def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
   1966 
   1967 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
   1968 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
   1969 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
   1970 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
   1971 
   1972 def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
   1973 def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
   1974 def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
   1975 def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
   1976 
   1977 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
   1978 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
   1979 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
   1980 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
   1981 
   1982 def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
   1983 def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
   1984 
   1985 // bitconvert patterns
   1986 
   1987 def : BitConvert <i32, f32, R600_Reg32>;
   1988 def : BitConvert <f32, i32, R600_Reg32>;
   1989 def : BitConvert <v4f32, v4i32, R600_Reg128>;
   1990 def : BitConvert <v4i32, v4f32, R600_Reg128>;
   1991 
   1992 // DWORDADDR pattern
   1993 def : DwordAddrPat  <i32, R600_Reg32>;
   1994 
   1995 } // End isR600toCayman Predicate
   1996