1 //===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the Hexagon Vector instructions in TableGen format. 11 // 12 //===----------------------------------------------------------------------===// 13 14 def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; 15 def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; 16 def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; 17 def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; 18 def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; 19 def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; 20 def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; 21 def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; 22 23 24 multiclass bitconvert_32<ValueType a, ValueType b> { 25 def : Pat <(b (bitconvert (a IntRegs:$src))), 26 (b IntRegs:$src)>; 27 def : Pat <(a (bitconvert (b IntRegs:$src))), 28 (a IntRegs:$src)>; 29 } 30 31 multiclass bitconvert_64<ValueType a, ValueType b> { 32 def : Pat <(b (bitconvert (a DoubleRegs:$src))), 33 (b DoubleRegs:$src)>; 34 def : Pat <(a (bitconvert (b DoubleRegs:$src))), 35 (a DoubleRegs:$src)>; 36 } 37 38 multiclass bitconvert_vec<ValueType a, ValueType b> { 39 def : Pat <(b (bitconvert (a VectorRegs:$src))), 40 (b VectorRegs:$src)>; 41 def : Pat <(a (bitconvert (b VectorRegs:$src))), 42 (a VectorRegs:$src)>; 43 } 44 45 multiclass bitconvert_dblvec<ValueType a, ValueType b> { 46 def : Pat <(b (bitconvert (a VecDblRegs:$src))), 47 (b VecDblRegs:$src)>; 48 def : Pat <(a (bitconvert (b VecDblRegs:$src))), 49 (a VecDblRegs:$src)>; 50 } 51 52 multiclass bitconvert_predvec<ValueType a, ValueType b> { 53 def : Pat <(b (bitconvert (a VecPredRegs:$src))), 54 (b VectorRegs:$src)>; 55 def : Pat <(a (bitconvert (b VectorRegs:$src))), 56 (a VecPredRegs:$src)>; 57 } 58 59 multiclass bitconvert_dblvec128B<ValueType a, ValueType b> { 60 def : Pat <(b (bitconvert (a VecDblRegs128B:$src))), 61 (b VecDblRegs128B:$src)>; 62 def : Pat <(a (bitconvert (b VecDblRegs128B:$src))), 63 (a VecDblRegs128B:$src)>; 64 } 65 66 // Bit convert vector types. 67 defm : bitconvert_32<v4i8, i32>; 68 defm : bitconvert_32<v2i16, i32>; 69 defm : bitconvert_32<v2i16, v4i8>; 70 71 defm : bitconvert_64<v8i8, i64>; 72 defm : bitconvert_64<v4i16, i64>; 73 defm : bitconvert_64<v2i32, i64>; 74 defm : bitconvert_64<v8i8, v4i16>; 75 defm : bitconvert_64<v8i8, v2i32>; 76 defm : bitconvert_64<v4i16, v2i32>; 77 78 defm : bitconvert_vec<v64i8, v16i32>; 79 defm : bitconvert_vec<v8i64 , v16i32>; 80 defm : bitconvert_vec<v32i16, v16i32>; 81 82 defm : bitconvert_dblvec<v16i64, v128i8>; 83 defm : bitconvert_dblvec<v32i32, v128i8>; 84 defm : bitconvert_dblvec<v64i16, v128i8>; 85 86 defm : bitconvert_dblvec128B<v64i32, v128i16>; 87 defm : bitconvert_dblvec128B<v256i8, v128i16>; 88 defm : bitconvert_dblvec128B<v32i64, v128i16>; 89 90 defm : bitconvert_dblvec128B<v64i32, v256i8>; 91 defm : bitconvert_dblvec128B<v32i64, v256i8>; 92 defm : bitconvert_dblvec128B<v128i16, v256i8>; 93 94 // Vector shift support. Vector shifting in Hexagon is rather different 95 // from internal representation of LLVM. 96 // LLVM assumes all shifts (in vector case) will have the form 97 // <VT> = SHL/SRA/SRL <VT> by <VT> 98 // while Hexagon has the following format: 99 // <VT> = SHL/SRA/SRL <VT> by <IT/i32> 100 // As a result, special care is needed to guarantee correctness and 101 // performance. 102 class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> 103 : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, 104 [(set (v4i16 DoubleRegs:$dst), 105 (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { 106 bits<4> src2; 107 let Inst{11-8} = src2; 108 } 109 110 class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> 111 : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, 112 [(set (v2i32 DoubleRegs:$dst), 113 (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { 114 bits<5> src2; 115 let Inst{12-8} = src2; 116 } 117 118 def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), 119 (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; 120 121 def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), 122 (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; 123 124 def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; 125 def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; 126 def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; 127 128 def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; 129 def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; 130 def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; 131 132 133 def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; 134 def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; 135 136 // Replicate the low 8-bits from 32-bits input register into each of the 137 // four bytes of 32-bits destination register. 138 def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; 139 140 // Replicate the low 16-bits from 32-bits input register into each of the 141 // four halfwords of 64-bits destination register. 142 def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; 143 144 145 class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> 146 : Pat <(Op Type:$Rss, Type:$Rtt), 147 (MI Type:$Rss, Type:$Rtt)>; 148 149 def: VArith_pat <A2_vaddub, add, V8I8>; 150 def: VArith_pat <A2_vaddh, add, V4I16>; 151 def: VArith_pat <A2_vaddw, add, V2I32>; 152 def: VArith_pat <A2_vsubub, sub, V8I8>; 153 def: VArith_pat <A2_vsubh, sub, V4I16>; 154 def: VArith_pat <A2_vsubw, sub, V2I32>; 155 156 def: VArith_pat <A2_and, and, V2I16>; 157 def: VArith_pat <A2_xor, xor, V2I16>; 158 def: VArith_pat <A2_or, or, V2I16>; 159 160 def: VArith_pat <A2_andp, and, V8I8>; 161 def: VArith_pat <A2_andp, and, V4I16>; 162 def: VArith_pat <A2_andp, and, V2I32>; 163 def: VArith_pat <A2_orp, or, V8I8>; 164 def: VArith_pat <A2_orp, or, V4I16>; 165 def: VArith_pat <A2_orp, or, V2I32>; 166 def: VArith_pat <A2_xorp, xor, V8I8>; 167 def: VArith_pat <A2_xorp, xor, V4I16>; 168 def: VArith_pat <A2_xorp, xor, V2I32>; 169 170 def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), 171 (i32 u5ImmPred:$c))))), 172 (S2_asr_i_vw V2I32:$b, imm:$c)>; 173 def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), 174 (i32 u5ImmPred:$c))))), 175 (S2_lsr_i_vw V2I32:$b, imm:$c)>; 176 def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), 177 (i32 u5ImmPred:$c))))), 178 (S2_asl_i_vw V2I32:$b, imm:$c)>; 179 180 def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), 181 (S2_asr_i_vh V4I16:$b, imm:$c)>; 182 def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), 183 (S2_lsr_i_vh V4I16:$b, imm:$c)>; 184 def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), 185 (S2_asl_i_vh V4I16:$b, imm:$c)>; 186 187 188 def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, 189 [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; 190 def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, 191 [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; 192 193 def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; 194 def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; 195 def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; 196 def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; 197 def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; 198 def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; 199 200 def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), 201 (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; 202 def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), 203 (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; 204 def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), 205 (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; 206 def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), 207 (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; 208 def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), 209 (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; 210 def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), 211 (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; 212 213 // Vector shift words by register 214 def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; 215 def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; 216 def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>; 217 def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>; 218 219 // Vector shift halfwords by register 220 def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; 221 def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; 222 def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; 223 def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; 224 225 class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> 226 : Pat <(Op Value:$Rs, I32:$Rt), 227 (MI Value:$Rs, I32:$Rt)>; 228 229 def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; 230 def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; 231 def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; 232 def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; 233 def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; 234 def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; 235 236 237 def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, 238 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; 239 def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, 240 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; 241 def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, 242 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; 243 244 def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; 245 def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; 246 def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; 247 def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; 248 def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; 249 def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; 250 def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; 251 def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; 252 def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; 253 254 255 class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> 256 : Pat <(i1 (Op Value:$Rs, Value:$Rt)), 257 (MI Value:$Rs, Value:$Rt)>; 258 259 def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; 260 def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; 261 def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; 262 263 def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; 264 def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; 265 def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; 266 267 def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; 268 def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; 269 def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; 270 271 272 class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> 273 : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), 274 (MI InVal:$Rs, InVal:$Rt)>; 275 276 def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; 277 def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; 278 def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; 279 280 def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; 281 def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; 282 def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; 283 284 285 // Hexagon doesn't have a vector multiply with C semantics. 286 // Instead, generate a pseudo instruction that gets expaneded into two 287 // scalar MPYI instructions. 288 // This is expanded by ExpandPostRAPseudos. 289 let isPseudo = 1 in 290 def VMULW : PseudoM<(outs DoubleRegs:$Rd), 291 (ins DoubleRegs:$Rs, DoubleRegs:$Rt), 292 ".error \"Should never try to emit VMULW\"", 293 [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; 294 295 let isPseudo = 1 in 296 def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), 297 (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), 298 ".error \"Should never try to emit VMULW_ACC\"", 299 [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], 300 "$Rd = $Rx">; 301 302 // Adds two v4i8: Hexagon does not have an insn for this one, so we 303 // use the double add v8i8, and use only the low part of the result. 304 def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), 305 (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; 306 307 // Subtract two v4i8: Hexagon does not have an insn for this one, so we 308 // use the double sub v8i8, and use only the low part of the result. 309 def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), 310 (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; 311 312 // 313 // No 32 bit vector mux. 314 // 315 def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), 316 (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; 317 def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), 318 (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; 319 320 // 321 // 64-bit vector mux. 322 // 323 def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), 324 (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; 325 def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), 326 (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; 327 def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), 328 (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; 329 330 // 331 // No 32 bit vector compare. 332 // 333 def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), 334 (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; 335 def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), 336 (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; 337 def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), 338 (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; 339 340 def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), 341 (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; 342 def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), 343 (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; 344 def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), 345 (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; 346 347 348 class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, 349 ValueType CmpTy> 350 : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), 351 (InvMI Value:$Rt, Value:$Rs)>; 352 353 // Map from a compare operation to the corresponding instruction with the 354 // order of operands reversed, e.g. x > y --> cmp.lt(y,x). 355 def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; 356 def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; 357 def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; 358 def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; 359 def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; 360 def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; 361 362 def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; 363 def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; 364 def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; 365 def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; 366 def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; 367 def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; 368 369 // Map from vcmpne(Rss) -> !vcmpew(Rss). 370 // rs != rt -> !(rs == rt). 371 def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), 372 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; 373 374 375 // Truncate: from vector B copy all 'E'ven 'B'yte elements: 376 // A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; 377 def: Pat<(v4i8 (trunc V4I16:$Rs)), 378 (S2_vtrunehb V4I16:$Rs)>; 379 380 // Truncate: from vector B copy all 'O'dd 'B'yte elements: 381 // A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; 382 // S2_vtrunohb 383 384 // Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: 385 // A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; 386 // S2_vtruneh 387 388 def: Pat<(v2i16 (trunc V2I32:$Rs)), 389 (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; 390 391 392 def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; 393 def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; 394 395 def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; 396 def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; 397 398 def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; 399 def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; 400 def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; 401 def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; 402 def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; 403 def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; 404 405 // Sign extends a v2i8 into a v2i32. 406 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), 407 (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; 408 409 // Sign extends a v2i16 into a v2i32. 410 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), 411 (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; 412 413 414 // Multiplies two v2i16 and returns a v2i32. We are using here the 415 // saturating multiply, as hexagon does not provide a non saturating 416 // vector multiply, and saturation does not impact the result that is 417 // in double precision of the operands. 418 419 // Multiplies two v2i16 vectors: as Hexagon does not have a multiply 420 // with the C semantics for this one, this pattern uses the half word 421 // multiply vmpyh that takes two v2i16 and returns a v2i32. This is 422 // then truncated to fit this back into a v2i16 and to simulate the 423 // wrap around semantics for unsigned in C. 424 def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), 425 (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; 426 427 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), 428 (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), 429 (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; 430 431 // Multiplies two v4i16 vectors. 432 def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), 433 (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), 434 (vmpyh (LoReg $Rs), (LoReg $Rt)))>; 435 436 def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), 437 (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), 438 (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; 439 440 // Multiplies two v4i8 vectors. 441 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), 442 (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, 443 Requires<[HasV5T]>; 444 445 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), 446 (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; 447 448 // Multiplies two v8i8 vectors. 449 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), 450 (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), 451 (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, 452 Requires<[HasV5T]>; 453 454 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), 455 (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), 456 (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; 457 458 459 class shuffler<SDNode Op, string Str> 460 : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), 461 "$a = " # Str # "($b, $c)", 462 [(set (i64 DoubleRegs:$a), 463 (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], 464 "", S_3op_tc_1_SLOT23>; 465 466 def SDTHexagonBinOp64 : SDTypeProfile<1, 2, 467 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; 468 469 def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; 470 def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; 471 def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; 472 def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; 473 474 class ShufflePat<InstHexagon MI, SDNode Op> 475 : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), 476 (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; 477 478 // Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b 479 def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; 480 481 // Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b 482 def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; 483 484 // Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h 485 def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; 486 487 // Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h 488 def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; 489 490 491 // Truncated store from v4i16 to v4i8. 492 def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), 493 (truncstore node:$val, node:$ptr), 494 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; 495 496 // Truncated store from v2i32 to v2i16. 497 def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), 498 (truncstore node:$val, node:$ptr), 499 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; 500 501 def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), 502 (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), 503 (LoReg $Rs))))>; 504 505 def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), 506 (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; 507 508 509 // Zero and sign extended load from v2i8 into v2i16. 510 def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), 511 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; 512 513 def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), 514 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; 515 516 def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), 517 (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; 518 519 def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), 520 (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; 521 522 def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), 523 (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; 524 525 def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), 526 (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; 527