Home | History | Annotate | Download | only in Hexagon
      1 //===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file describes the Hexagon Vector instructions in TableGen format.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 def V2I1:  PatLeaf<(v2i1  PredRegs:$R)>;
     15 def V4I1:  PatLeaf<(v4i1  PredRegs:$R)>;
     16 def V8I1:  PatLeaf<(v8i1  PredRegs:$R)>;
     17 def V4I8:  PatLeaf<(v4i8  IntRegs:$R)>;
     18 def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
     19 def V8I8:  PatLeaf<(v8i8  DoubleRegs:$R)>;
     20 def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
     21 def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
     22 
     23 
     24 multiclass bitconvert_32<ValueType a, ValueType b> {
     25   def : Pat <(b (bitconvert (a IntRegs:$src))),
     26              (b IntRegs:$src)>;
     27   def : Pat <(a (bitconvert (b IntRegs:$src))),
     28              (a IntRegs:$src)>;
     29 }
     30 
     31 multiclass bitconvert_64<ValueType a, ValueType b> {
     32   def : Pat <(b (bitconvert (a DoubleRegs:$src))),
     33              (b DoubleRegs:$src)>;
     34   def : Pat <(a (bitconvert (b DoubleRegs:$src))),
     35              (a DoubleRegs:$src)>;
     36 }
     37 
     38 multiclass bitconvert_vec<ValueType a, ValueType b> {
     39   def : Pat <(b (bitconvert (a VectorRegs:$src))),
     40              (b  VectorRegs:$src)>;
     41   def : Pat <(a (bitconvert (b VectorRegs:$src))),
     42              (a  VectorRegs:$src)>;
     43 }
     44 
     45 multiclass bitconvert_dblvec<ValueType a, ValueType b> {
     46   def : Pat <(b (bitconvert (a VecDblRegs:$src))),
     47              (b  VecDblRegs:$src)>;
     48   def : Pat <(a (bitconvert (b VecDblRegs:$src))),
     49              (a  VecDblRegs:$src)>;
     50 }
     51 
     52 multiclass bitconvert_predvec<ValueType a, ValueType b> {
     53   def : Pat <(b (bitconvert (a VecPredRegs:$src))),
     54              (b  VectorRegs:$src)>;
     55   def : Pat <(a (bitconvert (b VectorRegs:$src))),
     56              (a  VecPredRegs:$src)>;
     57 }
     58 
     59 multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
     60   def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
     61              (b  VecDblRegs128B:$src)>;
     62   def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
     63              (a  VecDblRegs128B:$src)>;
     64 }
     65 
     66 // Bit convert vector types.
     67 defm : bitconvert_32<v4i8, i32>;
     68 defm : bitconvert_32<v2i16, i32>;
     69 defm : bitconvert_32<v2i16, v4i8>;
     70 
     71 defm : bitconvert_64<v8i8, i64>;
     72 defm : bitconvert_64<v4i16, i64>;
     73 defm : bitconvert_64<v2i32, i64>;
     74 defm : bitconvert_64<v8i8, v4i16>;
     75 defm : bitconvert_64<v8i8, v2i32>;
     76 defm : bitconvert_64<v4i16, v2i32>;
     77 
     78 defm : bitconvert_vec<v64i8, v16i32>;
     79 defm : bitconvert_vec<v8i64 , v16i32>;
     80 defm : bitconvert_vec<v32i16, v16i32>;
     81 
     82 defm : bitconvert_dblvec<v16i64, v128i8>;
     83 defm : bitconvert_dblvec<v32i32, v128i8>;
     84 defm : bitconvert_dblvec<v64i16, v128i8>;
     85 
     86 defm : bitconvert_dblvec128B<v64i32, v128i16>;
     87 defm : bitconvert_dblvec128B<v256i8, v128i16>;
     88 defm : bitconvert_dblvec128B<v32i64, v128i16>;
     89 
     90 defm : bitconvert_dblvec128B<v64i32, v256i8>;
     91 defm : bitconvert_dblvec128B<v32i64, v256i8>;
     92 defm : bitconvert_dblvec128B<v128i16, v256i8>;
     93 
     94 // Vector shift support. Vector shifting in Hexagon is rather different
     95 // from internal representation of LLVM.
     96 // LLVM assumes all shifts (in vector case) will have the form
     97 // <VT> = SHL/SRA/SRL <VT> by <VT>
     98 // while Hexagon has the following format:
     99 // <VT> = SHL/SRA/SRL <VT> by <IT/i32>
    100 // As a result, special care is needed to guarantee correctness and
    101 // performance.
    102 class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
    103   : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
    104       [(set (v4i16 DoubleRegs:$dst),
    105             (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
    106   bits<4> src2;
    107   let Inst{11-8} = src2;
    108 }
    109 
    110 class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
    111   : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
    112       [(set (v2i32 DoubleRegs:$dst),
    113             (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
    114   bits<5> src2;
    115   let Inst{12-8} = src2;
    116 }
    117 
    118 def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
    119           (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
    120 
    121 def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
    122           (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
    123 
    124 def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
    125 def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
    126 def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
    127 
    128 def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
    129 def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
    130 def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
    131 
    132 
    133 def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
    134 def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
    135 
    136 // Replicate the low 8-bits from 32-bits input register into each of the
    137 // four bytes of 32-bits destination register.
    138 def: Pat<(v4i8  (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
    139 
    140 // Replicate the low 16-bits from 32-bits input register into each of the
    141 // four halfwords of 64-bits destination register.
    142 def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
    143 
    144 
    145 class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
    146   : Pat <(Op Type:$Rss, Type:$Rtt),
    147          (MI Type:$Rss, Type:$Rtt)>;
    148 
    149 def: VArith_pat <A2_vaddub, add, V8I8>;
    150 def: VArith_pat <A2_vaddh,  add, V4I16>;
    151 def: VArith_pat <A2_vaddw,  add, V2I32>;
    152 def: VArith_pat <A2_vsubub, sub, V8I8>;
    153 def: VArith_pat <A2_vsubh,  sub, V4I16>;
    154 def: VArith_pat <A2_vsubw,  sub, V2I32>;
    155 
    156 def: VArith_pat <A2_and,    and, V2I16>;
    157 def: VArith_pat <A2_xor,    xor, V2I16>;
    158 def: VArith_pat <A2_or,     or,  V2I16>;
    159 
    160 def: VArith_pat <A2_andp,   and, V8I8>;
    161 def: VArith_pat <A2_andp,   and, V4I16>;
    162 def: VArith_pat <A2_andp,   and, V2I32>;
    163 def: VArith_pat <A2_orp,    or,  V8I8>;
    164 def: VArith_pat <A2_orp,    or,  V4I16>;
    165 def: VArith_pat <A2_orp,    or,  V2I32>;
    166 def: VArith_pat <A2_xorp,   xor, V8I8>;
    167 def: VArith_pat <A2_xorp,   xor, V4I16>;
    168 def: VArith_pat <A2_xorp,   xor, V2I32>;
    169 
    170 def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
    171                                                     (i32 u5ImmPred:$c))))),
    172          (S2_asr_i_vw V2I32:$b, imm:$c)>;
    173 def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
    174                                                     (i32 u5ImmPred:$c))))),
    175          (S2_lsr_i_vw V2I32:$b, imm:$c)>;
    176 def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
    177                                                     (i32 u5ImmPred:$c))))),
    178          (S2_asl_i_vw V2I32:$b, imm:$c)>;
    179 
    180 def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
    181          (S2_asr_i_vh V4I16:$b, imm:$c)>;
    182 def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
    183          (S2_lsr_i_vh V4I16:$b, imm:$c)>;
    184 def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
    185          (S2_asl_i_vh V4I16:$b, imm:$c)>;
    186 
    187 
    188 def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
    189   [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
    190 def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
    191   [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
    192 
    193 def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
    194 def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
    195 def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
    196 def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
    197 def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
    198 def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
    199 
    200 def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
    201          (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
    202 def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
    203          (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
    204 def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
    205          (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
    206 def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
    207          (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
    208 def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
    209          (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
    210 def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
    211          (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
    212 
    213 // Vector shift words by register
    214 def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
    215 def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
    216 def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
    217 def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
    218 
    219 // Vector shift halfwords by register
    220 def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
    221 def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
    222 def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
    223 def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
    224 
    225 class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
    226   : Pat <(Op Value:$Rs, I32:$Rt),
    227          (MI Value:$Rs, I32:$Rt)>;
    228 
    229 def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
    230 def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
    231 def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
    232 def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
    233 def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
    234 def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
    235 
    236 
    237 def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
    238   [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
    239 def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
    240   [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
    241 def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
    242   [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
    243 
    244 def HexagonVCMPBEQ:  SDNode<"HexagonISD::VCMPBEQ",  SDTHexagonVecCompare_v8i8>;
    245 def HexagonVCMPBGT:  SDNode<"HexagonISD::VCMPBGT",  SDTHexagonVecCompare_v8i8>;
    246 def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
    247 def HexagonVCMPHEQ:  SDNode<"HexagonISD::VCMPHEQ",  SDTHexagonVecCompare_v4i16>;
    248 def HexagonVCMPHGT:  SDNode<"HexagonISD::VCMPHGT",  SDTHexagonVecCompare_v4i16>;
    249 def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
    250 def HexagonVCMPWEQ:  SDNode<"HexagonISD::VCMPWEQ",  SDTHexagonVecCompare_v2i32>;
    251 def HexagonVCMPWGT:  SDNode<"HexagonISD::VCMPWGT",  SDTHexagonVecCompare_v2i32>;
    252 def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
    253 
    254 
    255 class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
    256   : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
    257          (MI Value:$Rs, Value:$Rt)>;
    258 
    259 def: vcmp_i1_pat<A2_vcmpbeq,  HexagonVCMPBEQ,  V8I8>;
    260 def: vcmp_i1_pat<A4_vcmpbgt,  HexagonVCMPBGT,  V8I8>;
    261 def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
    262 
    263 def: vcmp_i1_pat<A2_vcmpheq,  HexagonVCMPHEQ,  V4I16>;
    264 def: vcmp_i1_pat<A2_vcmphgt,  HexagonVCMPHGT,  V4I16>;
    265 def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
    266 
    267 def: vcmp_i1_pat<A2_vcmpweq,  HexagonVCMPWEQ,  V2I32>;
    268 def: vcmp_i1_pat<A2_vcmpwgt,  HexagonVCMPWGT,  V2I32>;
    269 def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
    270 
    271 
    272 class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
    273   : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
    274          (MI InVal:$Rs, InVal:$Rt)>;
    275 
    276 def: vcmp_vi1_pat<A2_vcmpweq,  seteq,  V2I32, v2i1>;
    277 def: vcmp_vi1_pat<A2_vcmpwgt,  setgt,  V2I32, v2i1>;
    278 def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
    279 
    280 def: vcmp_vi1_pat<A2_vcmpheq,  seteq,  V4I16, v4i1>;
    281 def: vcmp_vi1_pat<A2_vcmphgt,  setgt,  V4I16, v4i1>;
    282 def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
    283 
    284 
    285 // Hexagon doesn't have a vector multiply with C semantics.
    286 // Instead, generate a pseudo instruction that gets expaneded into two
    287 // scalar MPYI instructions.
    288 // This is expanded by ExpandPostRAPseudos.
    289 let isPseudo = 1 in
    290 def VMULW : PseudoM<(outs DoubleRegs:$Rd),
    291       (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
    292       ".error \"Should never try to emit VMULW\"",
    293       [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
    294 
    295 let isPseudo = 1 in
    296 def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
    297       (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
    298       ".error \"Should never try to emit VMULW_ACC\"",
    299       [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
    300       "$Rd = $Rx">;
    301 
    302 // Adds two v4i8: Hexagon does not have an insn for this one, so we
    303 // use the double add v8i8, and use only the low part of the result.
    304 def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
    305          (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
    306 
    307 // Subtract two v4i8: Hexagon does not have an insn for this one, so we
    308 // use the double sub v8i8, and use only the low part of the result.
    309 def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
    310          (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
    311 
    312 //
    313 // No 32 bit vector mux.
    314 //
    315 def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
    316          (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
    317 def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
    318          (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
    319 
    320 //
    321 // 64-bit vector mux.
    322 //
    323 def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
    324          (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
    325 def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
    326          (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
    327 def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
    328          (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
    329 
    330 //
    331 // No 32 bit vector compare.
    332 //
    333 def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
    334          (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
    335 def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
    336          (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
    337 def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
    338          (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
    339 
    340 def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
    341          (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
    342 def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
    343          (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
    344 def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
    345          (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
    346 
    347 
    348 class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
    349                     ValueType CmpTy>
    350   : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
    351         (InvMI Value:$Rt, Value:$Rs)>;
    352 
    353 // Map from a compare operation to the corresponding instruction with the
    354 // order of operands reversed, e.g.  x > y --> cmp.lt(y,x).
    355 def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  i1>;
    356 def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  v8i1>;
    357 def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, i1>;
    358 def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, v4i1>;
    359 def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, i1>;
    360 def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, v2i1>;
    361 
    362 def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  i1>;
    363 def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  v8i1>;
    364 def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
    365 def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
    366 def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
    367 def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
    368 
    369 // Map from vcmpne(Rss) -> !vcmpew(Rss).
    370 // rs != rt -> !(rs == rt).
    371 def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
    372          (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
    373 
    374 
    375 // Truncate: from vector B copy all 'E'ven 'B'yte elements:
    376 // A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
    377 def: Pat<(v4i8 (trunc V4I16:$Rs)),
    378          (S2_vtrunehb V4I16:$Rs)>;
    379 
    380 // Truncate: from vector B copy all 'O'dd 'B'yte elements:
    381 // A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
    382 // S2_vtrunohb
    383 
    384 // Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
    385 // A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
    386 // S2_vtruneh
    387 
    388 def: Pat<(v2i16 (trunc V2I32:$Rs)),
    389          (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
    390 
    391 
    392 def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
    393 def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
    394 
    395 def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
    396 def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
    397 
    398 def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
    399 def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
    400 def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
    401 def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
    402 def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
    403 def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
    404 
    405 // Sign extends a v2i8 into a v2i32.
    406 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
    407          (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
    408 
    409 // Sign extends a v2i16 into a v2i32.
    410 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
    411          (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
    412 
    413 
    414 // Multiplies two v2i16 and returns a v2i32.  We are using here the
    415 // saturating multiply, as hexagon does not provide a non saturating
    416 // vector multiply, and saturation does not impact the result that is
    417 // in double precision of the operands.
    418 
    419 // Multiplies two v2i16 vectors: as Hexagon does not have a multiply
    420 // with the C semantics for this one, this pattern uses the half word
    421 // multiply vmpyh that takes two v2i16 and returns a v2i32.  This is
    422 // then truncated to fit this back into a v2i16 and to simulate the
    423 // wrap around semantics for unsigned in C.
    424 def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
    425                       (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
    426 
    427 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
    428          (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
    429                              (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
    430 
    431 // Multiplies two v4i16 vectors.
    432 def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
    433          (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
    434                       (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
    435 
    436 def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
    437   (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
    438                (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
    439 
    440 // Multiplies two v4i8 vectors.
    441 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
    442          (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
    443      Requires<[HasV5T]>;
    444 
    445 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
    446          (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
    447 
    448 // Multiplies two v8i8 vectors.
    449 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
    450          (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
    451                       (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
    452      Requires<[HasV5T]>;
    453 
    454 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
    455          (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
    456                       (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
    457 
    458 
    459 class shuffler<SDNode Op, string Str>
    460   : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
    461       "$a = " # Str # "($b, $c)",
    462       [(set (i64 DoubleRegs:$a),
    463             (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
    464       "", S_3op_tc_1_SLOT23>;
    465 
    466 def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
    467   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
    468 
    469 def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
    470 def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
    471 def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
    472 def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
    473 
    474 class ShufflePat<InstHexagon MI, SDNode Op>
    475   : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
    476         (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
    477 
    478 // Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
    479 def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
    480 
    481 // Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
    482 def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
    483 
    484 // Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
    485 def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
    486 
    487 // Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
    488 def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
    489 
    490 
    491 // Truncated store from v4i16 to v4i8.
    492 def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
    493                             (truncstore node:$val, node:$ptr),
    494     [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
    495 
    496 // Truncated store from v2i32 to v2i16.
    497 def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
    498                              (truncstore node:$val, node:$ptr),
    499     [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
    500 
    501 def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
    502          (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
    503                                                       (LoReg $Rs))))>;
    504 
    505 def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
    506          (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
    507 
    508 
    509 // Zero and sign extended load from v2i8 into v2i16.
    510 def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
    511     [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
    512 
    513 def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
    514     [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
    515 
    516 def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
    517          (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
    518 
    519 def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
    520          (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
    521 
    522 def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
    523          (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
    524 
    525 def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
    526          (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
    527