1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the X86 AVX512 instruction set, defining the 11 // instructions, and properties of the instructions which are needed for code 12 // generation, machine code emission, and analysis. 13 // 14 //===----------------------------------------------------------------------===// 15 16 // Group template arguments that can be derived from the vector type (EltNum x 17 // EltVT). These are things like the register class for the writemask, etc. 18 // The idea is to pass one of these as the template argument rather than the 19 // individual arguments. 20 // The template is also used for scalar types, in this case numelts is 1. 21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 22 string suffix = ""> { 23 RegisterClass RC = rc; 24 ValueType EltVT = eltvt; 25 int NumElts = numelts; 26 27 // Corresponding mask register class. 28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 29 30 // Corresponding write-mask register class. 31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 32 33 // The mask VT. 34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 35 36 // Suffix used in the instruction mnemonic. 37 string Suffix = suffix; 38 39 // VTName is a string name for vector VT. For vector types it will be 40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 41 // It is a little bit complex for scalar types, where NumElts = 1. 42 // In this case we build v4f32 or v2f64 43 string VTName = "v" # !if (!eq (NumElts, 1), 44 !if (!eq (EltVT.Size, 32), 4, 45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT; 46 47 // The vector VT. 48 ValueType VT = !cast<ValueType>(VTName); 49 50 string EltTypeName = !cast<string>(EltVT); 51 // Size of the element type in bits, e.g. 32 for v16i32. 52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 53 int EltSize = EltVT.Size; 54 55 // "i" for integer types and "f" for floating-point types 56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 57 58 // Size of RC in bits, e.g. 512 for VR512. 59 int Size = VT.Size; 60 61 // The corresponding memory operand, e.g. i512mem for VR512. 62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 64 // FP scalar memory operand for intrinsics - ssmem/sdmem. 65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)); 67 68 // Load patterns 69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64 70 // due to load promotion during legalization 71 PatFrag LdFrag = !cast<PatFrag>("load" # 72 !if (!eq (TypeVariantName, "i"), 73 !if (!eq (Size, 128), "v2i64", 74 !if (!eq (Size, 256), "v4i64", 75 !if (!eq (Size, 512), "v8i64", 76 VTName))), VTName)); 77 78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # 79 !if (!eq (TypeVariantName, "i"), 80 !if (!eq (Size, 128), "v2i64", 81 !if (!eq (Size, 256), "v4i64", 82 !if (!eq (Size, 512), "v8i64", 83 VTName))), VTName)); 84 85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 86 87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), 88 !cast<ComplexPattern>("sse_load_f32"), 89 !if (!eq (EltTypeName, "f64"), 90 !cast<ComplexPattern>("sse_load_f64"), 91 ?)); 92 93 // The string to specify embedded broadcast in assembly. 94 string BroadcastStr = "{1to" # NumElts # "}"; 95 96 // 8-bit compressed displacement tuple/subvector format. This is only 97 // defined for NumElts <= 8. 98 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 99 !cast<CD8VForm>("CD8VT" # NumElts), ?); 100 101 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 102 !if (!eq (Size, 256), sub_ymm, ?)); 103 104 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 105 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 106 SSEPackedInt)); 107 108 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); 109 110 // A vector tye of the same width with element type i64. This is used to 111 // create patterns for logic ops. 112 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64"); 113 114 // A vector type of the same width with element type i32. This is used to 115 // create the canonical constant zero node ImmAllZerosV. 116 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32"); 117 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); 118 119 string ZSuffix = !if (!eq (Size, 128), "Z128", 120 !if (!eq (Size, 256), "Z256", "Z")); 121 } 122 123 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 124 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 125 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 126 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 127 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 128 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 129 130 // "x" in v32i8x_info means RC = VR256X 131 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 132 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 133 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 134 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 135 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 136 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 137 138 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 139 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 140 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 141 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 142 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 143 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 144 145 // We map scalar types to the smallest (128-bit) vector type 146 // with the appropriate element type. This allows to use the same masking logic. 147 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 148 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 149 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 150 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 151 152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 153 X86VectorVTInfo i128> { 154 X86VectorVTInfo info512 = i512; 155 X86VectorVTInfo info256 = i256; 156 X86VectorVTInfo info128 = i128; 157 } 158 159 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 160 v16i8x_info>; 161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 162 v8i16x_info>; 163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 164 v4i32x_info>; 165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 166 v2i64x_info>; 167 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 168 v4f32x_info>; 169 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 170 v2f64x_info>; 171 172 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 173 ValueType _vt> { 174 RegisterClass KRC = _krc; 175 RegisterClass KRCWM = _krcwm; 176 ValueType KVT = _vt; 177 } 178 179 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 180 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 181 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 182 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 183 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 184 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 185 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 186 187 // This multiclass generates the masking variants from the non-masking 188 // variant. It only provides the assembly pieces for the masking variants. 189 // It assumes custom ISel patterns for masking which can be provided as 190 // template arguments. 191 multiclass AVX512_maskable_custom<bits<8> O, Format F, 192 dag Outs, 193 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 194 string OpcodeStr, 195 string AttSrcAsm, string IntelSrcAsm, 196 list<dag> Pattern, 197 list<dag> MaskingPattern, 198 list<dag> ZeroMaskingPattern, 199 string MaskingConstraint = "", 200 bit IsCommutable = 0, 201 bit IsKCommutable = 0, 202 bit IsKZCommutable = IsCommutable> { 203 let isCommutable = IsCommutable in 204 def NAME: AVX512<O, F, Outs, Ins, 205 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 206 "$dst, "#IntelSrcAsm#"}", 207 Pattern>; 208 209 // Prefer over VMOV*rrk Pat<> 210 let isCommutable = IsKCommutable in 211 def NAME#k: AVX512<O, F, Outs, MaskingIns, 212 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 213 "$dst {${mask}}, "#IntelSrcAsm#"}", 214 MaskingPattern>, 215 EVEX_K { 216 // In case of the 3src subclass this is overridden with a let. 217 string Constraints = MaskingConstraint; 218 } 219 220 // Zero mask does not add any restrictions to commute operands transformation. 221 // So, it is Ok to use IsCommutable instead of IsKCommutable. 222 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<> 223 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 224 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 225 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 226 ZeroMaskingPattern>, 227 EVEX_KZ; 228 } 229 230 231 // Common base class of AVX512_maskable and AVX512_maskable_3src. 232 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 233 dag Outs, 234 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 235 string OpcodeStr, 236 string AttSrcAsm, string IntelSrcAsm, 237 dag RHS, dag MaskingRHS, 238 SDNode Select = vselect, 239 string MaskingConstraint = "", 240 bit IsCommutable = 0, 241 bit IsKCommutable = 0, 242 bit IsKZCommutable = IsCommutable> : 243 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 244 AttSrcAsm, IntelSrcAsm, 245 [(set _.RC:$dst, RHS)], 246 [(set _.RC:$dst, MaskingRHS)], 247 [(set _.RC:$dst, 248 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 249 MaskingConstraint, IsCommutable, 250 IsKCommutable, IsKZCommutable>; 251 252 // This multiclass generates the unconditional/non-masking, the masking and 253 // the zero-masking variant of the vector instruction. In the masking case, the 254 // perserved vector elements come from a new dummy input operand tied to $dst. 255 // This version uses a separate dag for non-masking and masking. 256 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 257 dag Outs, dag Ins, string OpcodeStr, 258 string AttSrcAsm, string IntelSrcAsm, 259 dag RHS, dag MaskRHS, 260 bit IsCommutable = 0, bit IsKCommutable = 0, 261 SDNode Select = vselect> : 262 AVX512_maskable_custom<O, F, Outs, Ins, 263 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 264 !con((ins _.KRCWM:$mask), Ins), 265 OpcodeStr, AttSrcAsm, IntelSrcAsm, 266 [(set _.RC:$dst, RHS)], 267 [(set _.RC:$dst, 268 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 269 [(set _.RC:$dst, 270 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 271 "$src0 = $dst", IsCommutable, IsKCommutable>; 272 273 // This multiclass generates the unconditional/non-masking, the masking and 274 // the zero-masking variant of the vector instruction. In the masking case, the 275 // perserved vector elements come from a new dummy input operand tied to $dst. 276 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 277 dag Outs, dag Ins, string OpcodeStr, 278 string AttSrcAsm, string IntelSrcAsm, 279 dag RHS, 280 bit IsCommutable = 0, bit IsKCommutable = 0, 281 bit IsKZCommutable = IsCommutable, 282 SDNode Select = vselect> : 283 AVX512_maskable_common<O, F, _, Outs, Ins, 284 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 285 !con((ins _.KRCWM:$mask), Ins), 286 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 287 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 288 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 289 IsKZCommutable>; 290 291 // This multiclass generates the unconditional/non-masking, the masking and 292 // the zero-masking variant of the scalar instruction. 293 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 294 dag Outs, dag Ins, string OpcodeStr, 295 string AttSrcAsm, string IntelSrcAsm, 296 dag RHS, 297 bit IsCommutable = 0> : 298 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 299 RHS, IsCommutable, 0, IsCommutable, X86selects>; 300 301 // Similar to AVX512_maskable but in this case one of the source operands 302 // ($src1) is already tied to $dst so we just use that for the preserved 303 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 304 // $src1. 305 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 306 dag Outs, dag NonTiedIns, string OpcodeStr, 307 string AttSrcAsm, string IntelSrcAsm, 308 dag RHS, 309 bit IsCommutable = 0, 310 bit IsKCommutable = 0, 311 SDNode Select = vselect, 312 bit MaskOnly = 0> : 313 AVX512_maskable_common<O, F, _, Outs, 314 !con((ins _.RC:$src1), NonTiedIns), 315 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 316 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 317 OpcodeStr, AttSrcAsm, IntelSrcAsm, 318 !if(MaskOnly, (null_frag), RHS), 319 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 320 Select, "", IsCommutable, IsKCommutable>; 321 322 // Similar to AVX512_maskable_3src but in this case the input VT for the tied 323 // operand differs from the output VT. This requires a bitconvert on 324 // the preserved vector going into the vselect. 325 // NOTE: The unmasked pattern is disabled. 326 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 327 X86VectorVTInfo InVT, 328 dag Outs, dag NonTiedIns, string OpcodeStr, 329 string AttSrcAsm, string IntelSrcAsm, 330 dag RHS, bit IsCommutable = 0> : 331 AVX512_maskable_common<O, F, OutVT, Outs, 332 !con((ins InVT.RC:$src1), NonTiedIns), 333 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 334 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 335 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 336 (vselect InVT.KRCWM:$mask, RHS, 337 (bitconvert InVT.RC:$src1)), 338 vselect, "", IsCommutable>; 339 340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 341 dag Outs, dag NonTiedIns, string OpcodeStr, 342 string AttSrcAsm, string IntelSrcAsm, 343 dag RHS, 344 bit IsCommutable = 0, 345 bit IsKCommutable = 0, 346 bit MaskOnly = 0> : 347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 348 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 349 X86selects, MaskOnly>; 350 351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 352 dag Outs, dag Ins, 353 string OpcodeStr, 354 string AttSrcAsm, string IntelSrcAsm, 355 list<dag> Pattern> : 356 AVX512_maskable_custom<O, F, Outs, Ins, 357 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 358 !con((ins _.KRCWM:$mask), Ins), 359 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 360 "$src0 = $dst">; 361 362 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 363 dag Outs, dag NonTiedIns, 364 string OpcodeStr, 365 string AttSrcAsm, string IntelSrcAsm, 366 list<dag> Pattern> : 367 AVX512_maskable_custom<O, F, Outs, 368 !con((ins _.RC:$src1), NonTiedIns), 369 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 370 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 371 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 372 "">; 373 374 // Instruction with mask that puts result in mask register, 375 // like "compare" and "vptest" 376 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 377 dag Outs, 378 dag Ins, dag MaskingIns, 379 string OpcodeStr, 380 string AttSrcAsm, string IntelSrcAsm, 381 list<dag> Pattern, 382 list<dag> MaskingPattern, 383 bit IsCommutable = 0> { 384 let isCommutable = IsCommutable in 385 def NAME: AVX512<O, F, Outs, Ins, 386 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 387 "$dst, "#IntelSrcAsm#"}", 388 Pattern>; 389 390 def NAME#k: AVX512<O, F, Outs, MaskingIns, 391 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 392 "$dst {${mask}}, "#IntelSrcAsm#"}", 393 MaskingPattern>, EVEX_K; 394 } 395 396 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 397 dag Outs, 398 dag Ins, dag MaskingIns, 399 string OpcodeStr, 400 string AttSrcAsm, string IntelSrcAsm, 401 dag RHS, dag MaskingRHS, 402 bit IsCommutable = 0> : 403 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 404 AttSrcAsm, IntelSrcAsm, 405 [(set _.KRC:$dst, RHS)], 406 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 407 408 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 409 dag Outs, dag Ins, string OpcodeStr, 410 string AttSrcAsm, string IntelSrcAsm, 411 dag RHS, bit IsCommutable = 0> : 412 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 413 !con((ins _.KRCWM:$mask), Ins), 414 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 415 (and _.KRCWM:$mask, RHS), IsCommutable>; 416 417 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _, 418 dag Outs, dag Ins, string OpcodeStr, 419 string AttSrcAsm, string IntelSrcAsm> : 420 AVX512_maskable_custom_cmp<O, F, Outs, 421 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr, 422 AttSrcAsm, IntelSrcAsm, [], []>; 423 424 // This multiclass generates the unconditional/non-masking, the masking and 425 // the zero-masking variant of the vector instruction. In the masking case, the 426 // perserved vector elements come from a new dummy input operand tied to $dst. 427 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _, 428 dag Outs, dag Ins, string OpcodeStr, 429 string AttSrcAsm, string IntelSrcAsm, 430 dag RHS, dag MaskedRHS, 431 bit IsCommutable = 0, SDNode Select = vselect> : 432 AVX512_maskable_custom<O, F, Outs, Ins, 433 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 434 !con((ins _.KRCWM:$mask), Ins), 435 OpcodeStr, AttSrcAsm, IntelSrcAsm, 436 [(set _.RC:$dst, RHS)], 437 [(set _.RC:$dst, 438 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))], 439 [(set _.RC:$dst, 440 (Select _.KRCWM:$mask, MaskedRHS, 441 _.ImmAllZerosV))], 442 "$src0 = $dst", IsCommutable>; 443 444 445 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 446 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 447 // swizzled by ExecutionDomainFix to pxor. 448 // We set canFoldAsLoad because this can be converted to a constant-pool 449 // load of an all-zeros value if folding it would be beneficial. 450 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 451 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 452 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 453 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 454 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 455 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 456 } 457 458 // Alias instructions that allow VPTERNLOG to be used with a mask to create 459 // a mix of all ones and all zeros elements. This is done this way to force 460 // the same register to be used as input for all three sources. 461 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 462 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 463 (ins VK16WM:$mask), "", 464 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 465 (v16i32 immAllOnesV), 466 (v16i32 immAllZerosV)))]>; 467 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 468 (ins VK8WM:$mask), "", 469 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 470 (bc_v8i64 (v16i32 immAllOnesV)), 471 (bc_v8i64 (v16i32 immAllZerosV))))]>; 472 } 473 474 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 475 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 476 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 477 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 478 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 479 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 480 } 481 482 // Alias instructions that map fld0 to xorps for sse or vxorps for avx. 483 // This is expanded by ExpandPostRAPseudos. 484 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 485 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 486 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 487 [(set FR32X:$dst, fp32imm0)]>; 488 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 489 [(set FR64X:$dst, fpimm0)]>; 490 } 491 492 //===----------------------------------------------------------------------===// 493 // AVX-512 - VECTOR INSERT 494 // 495 496 // Supports two different pattern operators for mask and unmasked ops. Allows 497 // null_frag to be passed for one. 498 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 499 X86VectorVTInfo To, 500 SDPatternOperator vinsert_insert, 501 SDPatternOperator vinsert_for_mask, 502 X86FoldableSchedWrite sched> { 503 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 504 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 505 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 506 "vinsert" # From.EltTypeName # "x" # From.NumElts, 507 "$src3, $src2, $src1", "$src1, $src2, $src3", 508 (vinsert_insert:$src3 (To.VT To.RC:$src1), 509 (From.VT From.RC:$src2), 510 (iPTR imm)), 511 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 512 (From.VT From.RC:$src2), 513 (iPTR imm))>, 514 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 515 let mayLoad = 1 in 516 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 517 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 518 "vinsert" # From.EltTypeName # "x" # From.NumElts, 519 "$src3, $src2, $src1", "$src1, $src2, $src3", 520 (vinsert_insert:$src3 (To.VT To.RC:$src1), 521 (From.VT (bitconvert (From.LdFrag addr:$src2))), 522 (iPTR imm)), 523 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 524 (From.VT (bitconvert (From.LdFrag addr:$src2))), 525 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 526 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 527 Sched<[sched.Folded, ReadAfterLd]>; 528 } 529 } 530 531 // Passes the same pattern operator for masked and unmasked ops. 532 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 533 X86VectorVTInfo To, 534 SDPatternOperator vinsert_insert, 535 X86FoldableSchedWrite sched> : 536 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 537 538 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 539 X86VectorVTInfo To, PatFrag vinsert_insert, 540 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 541 let Predicates = p in { 542 def : Pat<(vinsert_insert:$ins 543 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 544 (To.VT (!cast<Instruction>(InstrStr#"rr") 545 To.RC:$src1, From.RC:$src2, 546 (INSERT_get_vinsert_imm To.RC:$ins)))>; 547 548 def : Pat<(vinsert_insert:$ins 549 (To.VT To.RC:$src1), 550 (From.VT (bitconvert (From.LdFrag addr:$src2))), 551 (iPTR imm)), 552 (To.VT (!cast<Instruction>(InstrStr#"rm") 553 To.RC:$src1, addr:$src2, 554 (INSERT_get_vinsert_imm To.RC:$ins)))>; 555 } 556 } 557 558 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 559 ValueType EltVT64, int Opcode256, 560 X86FoldableSchedWrite sched> { 561 562 let Predicates = [HasVLX] in 563 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 564 X86VectorVTInfo< 4, EltVT32, VR128X>, 565 X86VectorVTInfo< 8, EltVT32, VR256X>, 566 vinsert128_insert, sched>, EVEX_V256; 567 568 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 569 X86VectorVTInfo< 4, EltVT32, VR128X>, 570 X86VectorVTInfo<16, EltVT32, VR512>, 571 vinsert128_insert, sched>, EVEX_V512; 572 573 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 574 X86VectorVTInfo< 4, EltVT64, VR256X>, 575 X86VectorVTInfo< 8, EltVT64, VR512>, 576 vinsert256_insert, sched>, VEX_W, EVEX_V512; 577 578 // Even with DQI we'd like to only use these instructions for masking. 579 let Predicates = [HasVLX, HasDQI] in 580 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 581 X86VectorVTInfo< 2, EltVT64, VR128X>, 582 X86VectorVTInfo< 4, EltVT64, VR256X>, 583 null_frag, vinsert128_insert, sched>, 584 VEX_W1X, EVEX_V256; 585 586 // Even with DQI we'd like to only use these instructions for masking. 587 let Predicates = [HasDQI] in { 588 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 589 X86VectorVTInfo< 2, EltVT64, VR128X>, 590 X86VectorVTInfo< 8, EltVT64, VR512>, 591 null_frag, vinsert128_insert, sched>, 592 VEX_W, EVEX_V512; 593 594 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 595 X86VectorVTInfo< 8, EltVT32, VR256X>, 596 X86VectorVTInfo<16, EltVT32, VR512>, 597 null_frag, vinsert256_insert, sched>, 598 EVEX_V512; 599 } 600 } 601 602 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 603 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 604 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 605 606 // Codegen pattern with the alternative types, 607 // Even with AVX512DQ we'll still use these for unmasked operations. 608 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 609 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 612 613 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 614 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 615 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 616 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 617 618 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 619 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 620 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 621 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 622 623 // Codegen pattern with the alternative types insert VEC128 into VEC256 624 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 625 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 626 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 627 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 628 // Codegen pattern with the alternative types insert VEC128 into VEC512 629 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 630 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 631 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 632 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 633 // Codegen pattern with the alternative types insert VEC256 into VEC512 634 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 635 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 636 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 637 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 638 639 640 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 641 X86VectorVTInfo To, X86VectorVTInfo Cast, 642 PatFrag vinsert_insert, 643 SDNodeXForm INSERT_get_vinsert_imm, 644 list<Predicate> p> { 645 let Predicates = p in { 646 def : Pat<(Cast.VT 647 (vselect Cast.KRCWM:$mask, 648 (bitconvert 649 (vinsert_insert:$ins (To.VT To.RC:$src1), 650 (From.VT From.RC:$src2), 651 (iPTR imm))), 652 Cast.RC:$src0)), 653 (!cast<Instruction>(InstrStr#"rrk") 654 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 655 (INSERT_get_vinsert_imm To.RC:$ins))>; 656 def : Pat<(Cast.VT 657 (vselect Cast.KRCWM:$mask, 658 (bitconvert 659 (vinsert_insert:$ins (To.VT To.RC:$src1), 660 (From.VT 661 (bitconvert 662 (From.LdFrag addr:$src2))), 663 (iPTR imm))), 664 Cast.RC:$src0)), 665 (!cast<Instruction>(InstrStr#"rmk") 666 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 667 (INSERT_get_vinsert_imm To.RC:$ins))>; 668 669 def : Pat<(Cast.VT 670 (vselect Cast.KRCWM:$mask, 671 (bitconvert 672 (vinsert_insert:$ins (To.VT To.RC:$src1), 673 (From.VT From.RC:$src2), 674 (iPTR imm))), 675 Cast.ImmAllZerosV)), 676 (!cast<Instruction>(InstrStr#"rrkz") 677 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 678 (INSERT_get_vinsert_imm To.RC:$ins))>; 679 def : Pat<(Cast.VT 680 (vselect Cast.KRCWM:$mask, 681 (bitconvert 682 (vinsert_insert:$ins (To.VT To.RC:$src1), 683 (From.VT 684 (bitconvert 685 (From.LdFrag addr:$src2))), 686 (iPTR imm))), 687 Cast.ImmAllZerosV)), 688 (!cast<Instruction>(InstrStr#"rmkz") 689 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 690 (INSERT_get_vinsert_imm To.RC:$ins))>; 691 } 692 } 693 694 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 695 v8f32x_info, vinsert128_insert, 696 INSERT_get_vinsert128_imm, [HasVLX]>; 697 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 698 v4f64x_info, vinsert128_insert, 699 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 700 701 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 702 v8i32x_info, vinsert128_insert, 703 INSERT_get_vinsert128_imm, [HasVLX]>; 704 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 705 v8i32x_info, vinsert128_insert, 706 INSERT_get_vinsert128_imm, [HasVLX]>; 707 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 708 v8i32x_info, vinsert128_insert, 709 INSERT_get_vinsert128_imm, [HasVLX]>; 710 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 711 v4i64x_info, vinsert128_insert, 712 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 713 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 714 v4i64x_info, vinsert128_insert, 715 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 716 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 717 v4i64x_info, vinsert128_insert, 718 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 719 720 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 721 v16f32_info, vinsert128_insert, 722 INSERT_get_vinsert128_imm, [HasAVX512]>; 723 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 724 v8f64_info, vinsert128_insert, 725 INSERT_get_vinsert128_imm, [HasDQI]>; 726 727 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 728 v16i32_info, vinsert128_insert, 729 INSERT_get_vinsert128_imm, [HasAVX512]>; 730 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 731 v16i32_info, vinsert128_insert, 732 INSERT_get_vinsert128_imm, [HasAVX512]>; 733 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 734 v16i32_info, vinsert128_insert, 735 INSERT_get_vinsert128_imm, [HasAVX512]>; 736 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 737 v8i64_info, vinsert128_insert, 738 INSERT_get_vinsert128_imm, [HasDQI]>; 739 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 740 v8i64_info, vinsert128_insert, 741 INSERT_get_vinsert128_imm, [HasDQI]>; 742 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 743 v8i64_info, vinsert128_insert, 744 INSERT_get_vinsert128_imm, [HasDQI]>; 745 746 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 747 v16f32_info, vinsert256_insert, 748 INSERT_get_vinsert256_imm, [HasDQI]>; 749 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 750 v8f64_info, vinsert256_insert, 751 INSERT_get_vinsert256_imm, [HasAVX512]>; 752 753 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 754 v16i32_info, vinsert256_insert, 755 INSERT_get_vinsert256_imm, [HasDQI]>; 756 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 757 v16i32_info, vinsert256_insert, 758 INSERT_get_vinsert256_imm, [HasDQI]>; 759 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 760 v16i32_info, vinsert256_insert, 761 INSERT_get_vinsert256_imm, [HasDQI]>; 762 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 763 v8i64_info, vinsert256_insert, 764 INSERT_get_vinsert256_imm, [HasAVX512]>; 765 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 766 v8i64_info, vinsert256_insert, 767 INSERT_get_vinsert256_imm, [HasAVX512]>; 768 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 769 v8i64_info, vinsert256_insert, 770 INSERT_get_vinsert256_imm, [HasAVX512]>; 771 772 // vinsertps - insert f32 to XMM 773 let ExeDomain = SSEPackedSingle in { 774 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 775 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 776 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 777 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, 778 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 779 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 780 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 781 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 782 [(set VR128X:$dst, (X86insertps VR128X:$src1, 783 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 784 imm:$src3))]>, 785 EVEX_4V, EVEX_CD8<32, CD8VT1>, 786 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; 787 } 788 789 //===----------------------------------------------------------------------===// 790 // AVX-512 VECTOR EXTRACT 791 //--- 792 793 // Supports two different pattern operators for mask and unmasked ops. Allows 794 // null_frag to be passed for one. 795 multiclass vextract_for_size_split<int Opcode, 796 X86VectorVTInfo From, X86VectorVTInfo To, 797 SDPatternOperator vextract_extract, 798 SDPatternOperator vextract_for_mask, 799 SchedWrite SchedRR, SchedWrite SchedMR> { 800 801 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 802 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 803 (ins From.RC:$src1, u8imm:$idx), 804 "vextract" # To.EltTypeName # "x" # To.NumElts, 805 "$idx, $src1", "$src1, $idx", 806 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 807 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 808 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 809 810 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 811 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 812 "vextract" # To.EltTypeName # "x" # To.NumElts # 813 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 814 [(store (To.VT (vextract_extract:$idx 815 (From.VT From.RC:$src1), (iPTR imm))), 816 addr:$dst)]>, EVEX, 817 Sched<[SchedMR]>; 818 819 let mayStore = 1, hasSideEffects = 0 in 820 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 821 (ins To.MemOp:$dst, To.KRCWM:$mask, 822 From.RC:$src1, u8imm:$idx), 823 "vextract" # To.EltTypeName # "x" # To.NumElts # 824 "\t{$idx, $src1, $dst {${mask}}|" 825 "$dst {${mask}}, $src1, $idx}", []>, 826 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 827 } 828 } 829 830 // Passes the same pattern operator for masked and unmasked ops. 831 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 832 X86VectorVTInfo To, 833 SDPatternOperator vextract_extract, 834 SchedWrite SchedRR, SchedWrite SchedMR> : 835 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 836 837 // Codegen pattern for the alternative types 838 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 839 X86VectorVTInfo To, PatFrag vextract_extract, 840 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 841 let Predicates = p in { 842 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 843 (To.VT (!cast<Instruction>(InstrStr#"rr") 844 From.RC:$src1, 845 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 846 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 847 (iPTR imm))), addr:$dst), 848 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 849 (EXTRACT_get_vextract_imm To.RC:$ext))>; 850 } 851 } 852 853 multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 854 ValueType EltVT64, int Opcode256, 855 SchedWrite SchedRR, SchedWrite SchedMR> { 856 let Predicates = [HasAVX512] in { 857 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 858 X86VectorVTInfo<16, EltVT32, VR512>, 859 X86VectorVTInfo< 4, EltVT32, VR128X>, 860 vextract128_extract, SchedRR, SchedMR>, 861 EVEX_V512, EVEX_CD8<32, CD8VT4>; 862 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 863 X86VectorVTInfo< 8, EltVT64, VR512>, 864 X86VectorVTInfo< 4, EltVT64, VR256X>, 865 vextract256_extract, SchedRR, SchedMR>, 866 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 867 } 868 let Predicates = [HasVLX] in 869 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 870 X86VectorVTInfo< 8, EltVT32, VR256X>, 871 X86VectorVTInfo< 4, EltVT32, VR128X>, 872 vextract128_extract, SchedRR, SchedMR>, 873 EVEX_V256, EVEX_CD8<32, CD8VT4>; 874 875 // Even with DQI we'd like to only use these instructions for masking. 876 let Predicates = [HasVLX, HasDQI] in 877 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 878 X86VectorVTInfo< 4, EltVT64, VR256X>, 879 X86VectorVTInfo< 2, EltVT64, VR128X>, 880 null_frag, vextract128_extract, SchedRR, SchedMR>, 881 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 882 883 // Even with DQI we'd like to only use these instructions for masking. 884 let Predicates = [HasDQI] in { 885 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 886 X86VectorVTInfo< 8, EltVT64, VR512>, 887 X86VectorVTInfo< 2, EltVT64, VR128X>, 888 null_frag, vextract128_extract, SchedRR, SchedMR>, 889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 890 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 891 X86VectorVTInfo<16, EltVT32, VR512>, 892 X86VectorVTInfo< 8, EltVT32, VR256X>, 893 null_frag, vextract256_extract, SchedRR, SchedMR>, 894 EVEX_V512, EVEX_CD8<32, CD8VT8>; 895 } 896 } 897 898 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 899 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 900 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 901 902 // extract_subvector codegen patterns with the alternative types. 903 // Even with AVX512DQ we'll still use these for unmasked operations. 904 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 905 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 908 909 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 910 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 913 914 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 915 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 916 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 917 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 918 919 // Codegen pattern with the alternative types extract VEC128 from VEC256 920 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 921 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 922 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 923 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 924 925 // Codegen pattern with the alternative types extract VEC128 from VEC512 926 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 927 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 928 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 929 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 930 // Codegen pattern with the alternative types extract VEC256 from VEC512 931 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 932 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 933 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 934 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 935 936 937 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a 938 // smaller extract to enable EVEX->VEX. 939 let Predicates = [NoVLX] in { 940 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 941 (v2i64 (VEXTRACTI128rr 942 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 943 (iPTR 1)))>; 944 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 945 (v2f64 (VEXTRACTF128rr 946 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 947 (iPTR 1)))>; 948 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 949 (v4i32 (VEXTRACTI128rr 950 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 951 (iPTR 1)))>; 952 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 953 (v4f32 (VEXTRACTF128rr 954 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 955 (iPTR 1)))>; 956 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 957 (v8i16 (VEXTRACTI128rr 958 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 959 (iPTR 1)))>; 960 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 961 (v16i8 (VEXTRACTI128rr 962 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 963 (iPTR 1)))>; 964 } 965 966 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a 967 // smaller extract to enable EVEX->VEX. 968 let Predicates = [HasVLX] in { 969 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 970 (v2i64 (VEXTRACTI32x4Z256rr 971 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 972 (iPTR 1)))>; 973 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 974 (v2f64 (VEXTRACTF32x4Z256rr 975 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 976 (iPTR 1)))>; 977 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 978 (v4i32 (VEXTRACTI32x4Z256rr 979 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 980 (iPTR 1)))>; 981 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 982 (v4f32 (VEXTRACTF32x4Z256rr 983 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 984 (iPTR 1)))>; 985 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 986 (v8i16 (VEXTRACTI32x4Z256rr 987 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 988 (iPTR 1)))>; 989 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 990 (v16i8 (VEXTRACTI32x4Z256rr 991 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 992 (iPTR 1)))>; 993 } 994 995 996 // Additional patterns for handling a bitcast between the vselect and the 997 // extract_subvector. 998 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 999 X86VectorVTInfo To, X86VectorVTInfo Cast, 1000 PatFrag vextract_extract, 1001 SDNodeXForm EXTRACT_get_vextract_imm, 1002 list<Predicate> p> { 1003 let Predicates = p in { 1004 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 1005 (bitconvert 1006 (To.VT (vextract_extract:$ext 1007 (From.VT From.RC:$src), (iPTR imm)))), 1008 To.RC:$src0)), 1009 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1010 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1011 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1012 1013 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 1014 (bitconvert 1015 (To.VT (vextract_extract:$ext 1016 (From.VT From.RC:$src), (iPTR imm)))), 1017 Cast.ImmAllZerosV)), 1018 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1019 Cast.KRCWM:$mask, From.RC:$src, 1020 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1021 } 1022 } 1023 1024 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1025 v4f32x_info, vextract128_extract, 1026 EXTRACT_get_vextract128_imm, [HasVLX]>; 1027 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1028 v2f64x_info, vextract128_extract, 1029 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1030 1031 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1032 v4i32x_info, vextract128_extract, 1033 EXTRACT_get_vextract128_imm, [HasVLX]>; 1034 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1035 v4i32x_info, vextract128_extract, 1036 EXTRACT_get_vextract128_imm, [HasVLX]>; 1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1038 v4i32x_info, vextract128_extract, 1039 EXTRACT_get_vextract128_imm, [HasVLX]>; 1040 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1041 v2i64x_info, vextract128_extract, 1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1043 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1044 v2i64x_info, vextract128_extract, 1045 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1047 v2i64x_info, vextract128_extract, 1048 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1049 1050 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1051 v4f32x_info, vextract128_extract, 1052 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1053 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1054 v2f64x_info, vextract128_extract, 1055 EXTRACT_get_vextract128_imm, [HasDQI]>; 1056 1057 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1058 v4i32x_info, vextract128_extract, 1059 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1060 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1061 v4i32x_info, vextract128_extract, 1062 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1063 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1064 v4i32x_info, vextract128_extract, 1065 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1066 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1067 v2i64x_info, vextract128_extract, 1068 EXTRACT_get_vextract128_imm, [HasDQI]>; 1069 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1070 v2i64x_info, vextract128_extract, 1071 EXTRACT_get_vextract128_imm, [HasDQI]>; 1072 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1073 v2i64x_info, vextract128_extract, 1074 EXTRACT_get_vextract128_imm, [HasDQI]>; 1075 1076 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1077 v8f32x_info, vextract256_extract, 1078 EXTRACT_get_vextract256_imm, [HasDQI]>; 1079 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1080 v4f64x_info, vextract256_extract, 1081 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1082 1083 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1084 v8i32x_info, vextract256_extract, 1085 EXTRACT_get_vextract256_imm, [HasDQI]>; 1086 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1087 v8i32x_info, vextract256_extract, 1088 EXTRACT_get_vextract256_imm, [HasDQI]>; 1089 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1090 v8i32x_info, vextract256_extract, 1091 EXTRACT_get_vextract256_imm, [HasDQI]>; 1092 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1093 v4i64x_info, vextract256_extract, 1094 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1095 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1096 v4i64x_info, vextract256_extract, 1097 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1098 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1099 v4i64x_info, vextract256_extract, 1100 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1101 1102 // vextractps - extract 32 bits from XMM 1103 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), 1104 (ins VR128X:$src1, u8imm:$src2), 1105 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1106 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1107 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1108 1109 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1110 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1111 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1112 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1113 addr:$dst)]>, 1114 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1115 1116 //===---------------------------------------------------------------------===// 1117 // AVX-512 BROADCAST 1118 //--- 1119 // broadcast with a scalar argument. 1120 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, 1121 string Name, 1122 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { 1123 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1124 (!cast<Instruction>(Name#DestInfo.ZSuffix#r) 1125 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1126 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1127 (X86VBroadcast SrcInfo.FRC:$src), 1128 DestInfo.RC:$src0)), 1129 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk) 1130 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1131 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1132 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, 1133 (X86VBroadcast SrcInfo.FRC:$src), 1134 DestInfo.ImmAllZerosV)), 1135 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz) 1136 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1137 } 1138 1139 // Split version to allow mask and broadcast node to be different types. This 1140 // helps support the 32x2 broadcasts. 1141 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1142 string Name, 1143 SchedWrite SchedRR, SchedWrite SchedRM, 1144 X86VectorVTInfo MaskInfo, 1145 X86VectorVTInfo DestInfo, 1146 X86VectorVTInfo SrcInfo, 1147 SDPatternOperator UnmaskedOp = X86VBroadcast> { 1148 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { 1149 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo, 1150 (outs MaskInfo.RC:$dst), 1151 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src", 1152 (MaskInfo.VT 1153 (bitconvert 1154 (DestInfo.VT 1155 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))), 1156 (MaskInfo.VT 1157 (bitconvert 1158 (DestInfo.VT 1159 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>, 1160 T8PD, EVEX, Sched<[SchedRR]>; 1161 let mayLoad = 1 in 1162 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo, 1163 (outs MaskInfo.RC:$dst), 1164 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 1165 (MaskInfo.VT 1166 (bitconvert 1167 (DestInfo.VT (UnmaskedOp 1168 (SrcInfo.ScalarLdFrag addr:$src))))), 1169 (MaskInfo.VT 1170 (bitconvert 1171 (DestInfo.VT (X86VBroadcast 1172 (SrcInfo.ScalarLdFrag addr:$src)))))>, 1173 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, 1174 Sched<[SchedRM]>; 1175 } 1176 1177 def : Pat<(MaskInfo.VT 1178 (bitconvert 1179 (DestInfo.VT (UnmaskedOp 1180 (SrcInfo.VT (scalar_to_vector 1181 (SrcInfo.ScalarLdFrag addr:$src))))))), 1182 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>; 1183 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, 1184 (bitconvert 1185 (DestInfo.VT 1186 (X86VBroadcast 1187 (SrcInfo.VT (scalar_to_vector 1188 (SrcInfo.ScalarLdFrag addr:$src)))))), 1189 MaskInfo.RC:$src0)), 1190 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk) 1191 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; 1192 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, 1193 (bitconvert 1194 (DestInfo.VT 1195 (X86VBroadcast 1196 (SrcInfo.VT (scalar_to_vector 1197 (SrcInfo.ScalarLdFrag addr:$src)))))), 1198 MaskInfo.ImmAllZerosV)), 1199 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz) 1200 MaskInfo.KRCWM:$mask, addr:$src)>; 1201 } 1202 1203 // Helper class to force mask and broadcast result to same type. 1204 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, 1205 SchedWrite SchedRR, SchedWrite SchedRM, 1206 X86VectorVTInfo DestInfo, 1207 X86VectorVTInfo SrcInfo> : 1208 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, 1209 DestInfo, DestInfo, SrcInfo>; 1210 1211 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1212 AVX512VLVectorVTInfo _> { 1213 let Predicates = [HasAVX512] in { 1214 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1215 WriteFShuffle256Ld, _.info512, _.info128>, 1216 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1217 _.info128>, 1218 EVEX_V512; 1219 } 1220 1221 let Predicates = [HasVLX] in { 1222 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1223 WriteFShuffle256Ld, _.info256, _.info128>, 1224 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1225 _.info128>, 1226 EVEX_V256; 1227 } 1228 } 1229 1230 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1231 AVX512VLVectorVTInfo _> { 1232 let Predicates = [HasAVX512] in { 1233 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1234 WriteFShuffle256Ld, _.info512, _.info128>, 1235 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, 1236 _.info128>, 1237 EVEX_V512; 1238 } 1239 1240 let Predicates = [HasVLX] in { 1241 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1242 WriteFShuffle256Ld, _.info256, _.info128>, 1243 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, 1244 _.info128>, 1245 EVEX_V256; 1246 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, 1247 WriteFShuffle256Ld, _.info128, _.info128>, 1248 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, 1249 _.info128>, 1250 EVEX_V128; 1251 } 1252 } 1253 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1254 avx512vl_f32_info>; 1255 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1256 avx512vl_f64_info>, VEX_W1X; 1257 1258 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1259 X86VectorVTInfo _, SDPatternOperator OpNode, 1260 RegisterClass SrcRC> { 1261 let ExeDomain = _.ExeDomain in 1262 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1263 (ins SrcRC:$src), 1264 "vpbroadcast"##_.Suffix, "$src", "$src", 1265 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX, 1266 Sched<[SchedRR]>; 1267 } 1268 1269 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1270 X86VectorVTInfo _, SDPatternOperator OpNode, 1271 RegisterClass SrcRC, SubRegIndex Subreg> { 1272 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1273 defm r : AVX512_maskable_custom<opc, MRMSrcReg, 1274 (outs _.RC:$dst), (ins GR32:$src), 1275 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1276 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1277 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], 1278 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1279 1280 def : Pat <(_.VT (OpNode SrcRC:$src)), 1281 (!cast<Instruction>(Name#r) 1282 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1283 1284 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1285 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, 1286 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1287 1288 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1289 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, 1290 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1291 } 1292 1293 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1294 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1295 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1296 let Predicates = [prd] in 1297 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1298 OpNode, SrcRC, Subreg>, EVEX_V512; 1299 let Predicates = [prd, HasVLX] in { 1300 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1301 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1302 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1303 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1304 } 1305 } 1306 1307 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1308 SDPatternOperator OpNode, 1309 RegisterClass SrcRC, Predicate prd> { 1310 let Predicates = [prd] in 1311 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1312 SrcRC>, EVEX_V512; 1313 let Predicates = [prd, HasVLX] in { 1314 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1315 SrcRC>, EVEX_V256; 1316 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1317 SrcRC>, EVEX_V128; 1318 } 1319 } 1320 1321 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1322 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1323 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1324 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1325 HasBWI>; 1326 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1327 X86VBroadcast, GR32, HasAVX512>; 1328 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1329 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1330 1331 // Provide aliases for broadcast from the same register class that 1332 // automatically does the extract. 1333 multiclass avx512_int_broadcast_rm_lowering<string Name, 1334 X86VectorVTInfo DestInfo, 1335 X86VectorVTInfo SrcInfo, 1336 X86VectorVTInfo ExtInfo> { 1337 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), 1338 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r") 1339 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>; 1340 } 1341 1342 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1343 AVX512VLVectorVTInfo _, Predicate prd> { 1344 let Predicates = [prd] in { 1345 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1346 WriteShuffle256Ld, _.info512, _.info128>, 1347 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>, 1348 EVEX_V512; 1349 // Defined separately to avoid redefinition. 1350 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>; 1351 } 1352 let Predicates = [prd, HasVLX] in { 1353 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, 1354 WriteShuffle256Ld, _.info256, _.info128>, 1355 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>, 1356 EVEX_V256; 1357 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, 1358 WriteShuffleXLd, _.info128, _.info128>, 1359 EVEX_V128; 1360 } 1361 } 1362 1363 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1364 avx512vl_i8_info, HasBWI>; 1365 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1366 avx512vl_i16_info, HasBWI>; 1367 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1368 avx512vl_i32_info, HasAVX512>; 1369 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1370 avx512vl_i64_info, HasAVX512>, VEX_W1X; 1371 1372 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1373 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1374 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1375 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1376 (_Dst.VT (X86SubVBroadcast 1377 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, 1378 Sched<[SchedWriteShuffle.YMM.Folded]>, 1379 AVX5128IBase, EVEX; 1380 } 1381 1382 // This should be used for the AVX512DQ broadcast instructions. It disables 1383 // the unmasked patterns so that we only use the DQ instructions when masking 1384 // is requested. 1385 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1386 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { 1387 let hasSideEffects = 0, mayLoad = 1 in 1388 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1389 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1390 (null_frag), 1391 (_Dst.VT (X86SubVBroadcast 1392 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, 1393 Sched<[SchedWriteShuffle.YMM.Folded]>, 1394 AVX5128IBase, EVEX; 1395 } 1396 1397 let Predicates = [HasAVX512] in { 1398 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1399 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))), 1400 (VPBROADCASTQZm addr:$src)>; 1401 } 1402 1403 let Predicates = [HasVLX] in { 1404 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 1405 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), 1406 (VPBROADCASTQZ128m addr:$src)>; 1407 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), 1408 (VPBROADCASTQZ256m addr:$src)>; 1409 } 1410 let Predicates = [HasVLX, HasBWI] in { 1411 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 1412 // This means we'll encounter truncated i32 loads; match that here. 1413 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1414 (VPBROADCASTWZ128m addr:$src)>; 1415 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 1416 (VPBROADCASTWZ256m addr:$src)>; 1417 def : Pat<(v8i16 (X86VBroadcast 1418 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1419 (VPBROADCASTWZ128m addr:$src)>; 1420 def : Pat<(v16i16 (X86VBroadcast 1421 (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 1422 (VPBROADCASTWZ256m addr:$src)>; 1423 } 1424 1425 //===----------------------------------------------------------------------===// 1426 // AVX-512 BROADCAST SUBVECTORS 1427 // 1428 1429 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1430 v16i32_info, v4i32x_info>, 1431 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1432 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1433 v16f32_info, v4f32x_info>, 1434 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1435 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1436 v8i64_info, v4i64x_info>, VEX_W, 1437 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1438 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1439 v8f64_info, v4f64x_info>, VEX_W, 1440 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1441 1442 let Predicates = [HasAVX512] in { 1443 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))), 1444 (VBROADCASTF64X4rm addr:$src)>; 1445 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))), 1446 (VBROADCASTI64X4rm addr:$src)>; 1447 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))), 1448 (VBROADCASTI64X4rm addr:$src)>; 1449 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))), 1450 (VBROADCASTI64X4rm addr:$src)>; 1451 1452 // Provide fallback in case the load node that is used in the patterns above 1453 // is used by additional users, which prevents the pattern selection. 1454 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), 1455 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1456 (v4f64 VR256X:$src), 1)>; 1457 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), 1458 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1459 (v8f32 VR256X:$src), 1)>; 1460 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), 1461 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1462 (v4i64 VR256X:$src), 1)>; 1463 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), 1464 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1465 (v8i32 VR256X:$src), 1)>; 1466 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))), 1467 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1468 (v16i16 VR256X:$src), 1)>; 1469 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))), 1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 1471 (v32i8 VR256X:$src), 1)>; 1472 1473 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1474 (VBROADCASTF32X4rm addr:$src)>; 1475 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1476 (VBROADCASTI32X4rm addr:$src)>; 1477 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), 1478 (VBROADCASTI32X4rm addr:$src)>; 1479 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), 1480 (VBROADCASTI32X4rm addr:$src)>; 1481 1482 // Patterns for selects of bitcasted operations. 1483 def : Pat<(vselect VK16WM:$mask, 1484 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1485 (bc_v16f32 (v16i32 immAllZerosV))), 1486 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1487 def : Pat<(vselect VK16WM:$mask, 1488 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1489 VR512:$src0), 1490 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1491 def : Pat<(vselect VK16WM:$mask, 1492 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1493 (v16i32 immAllZerosV)), 1494 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1495 def : Pat<(vselect VK16WM:$mask, 1496 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1497 VR512:$src0), 1498 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1499 1500 def : Pat<(vselect VK8WM:$mask, 1501 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1502 (bc_v8f64 (v16i32 immAllZerosV))), 1503 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1504 def : Pat<(vselect VK8WM:$mask, 1505 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), 1506 VR512:$src0), 1507 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1508 def : Pat<(vselect VK8WM:$mask, 1509 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))), 1510 (bc_v8i64 (v16i32 immAllZerosV))), 1511 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1512 def : Pat<(vselect VK8WM:$mask, 1513 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))), 1514 VR512:$src0), 1515 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1516 } 1517 1518 let Predicates = [HasVLX] in { 1519 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1520 v8i32x_info, v4i32x_info>, 1521 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1522 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1523 v8f32x_info, v4f32x_info>, 1524 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1525 1526 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 1527 (VBROADCASTF32X4Z256rm addr:$src)>; 1528 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 1529 (VBROADCASTI32X4Z256rm addr:$src)>; 1530 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), 1531 (VBROADCASTI32X4Z256rm addr:$src)>; 1532 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), 1533 (VBROADCASTI32X4Z256rm addr:$src)>; 1534 1535 // Patterns for selects of bitcasted operations. 1536 def : Pat<(vselect VK8WM:$mask, 1537 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1538 (bc_v8f32 (v8i32 immAllZerosV))), 1539 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1540 def : Pat<(vselect VK8WM:$mask, 1541 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), 1542 VR256X:$src0), 1543 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1544 def : Pat<(vselect VK8WM:$mask, 1545 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1546 (v8i32 immAllZerosV)), 1547 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1548 def : Pat<(vselect VK8WM:$mask, 1549 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), 1550 VR256X:$src0), 1551 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1552 1553 1554 // Provide fallback in case the load node that is used in the patterns above 1555 // is used by additional users, which prevents the pattern selection. 1556 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 1557 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1558 (v2f64 VR128X:$src), 1)>; 1559 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 1560 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1561 (v4f32 VR128X:$src), 1)>; 1562 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 1563 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1564 (v2i64 VR128X:$src), 1)>; 1565 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 1566 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1567 (v4i32 VR128X:$src), 1)>; 1568 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 1569 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1570 (v8i16 VR128X:$src), 1)>; 1571 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 1573 (v16i8 VR128X:$src), 1)>; 1574 } 1575 1576 let Predicates = [HasVLX, HasDQI] in { 1577 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1578 v4i64x_info, v2i64x_info>, VEX_W1X, 1579 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1580 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1581 v4f64x_info, v2f64x_info>, VEX_W1X, 1582 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1583 1584 // Patterns for selects of bitcasted operations. 1585 def : Pat<(vselect VK4WM:$mask, 1586 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1587 (bc_v4f64 (v8i32 immAllZerosV))), 1588 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1589 def : Pat<(vselect VK4WM:$mask, 1590 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1591 VR256X:$src0), 1592 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1593 def : Pat<(vselect VK4WM:$mask, 1594 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1595 (bc_v4i64 (v8i32 immAllZerosV))), 1596 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1597 def : Pat<(vselect VK4WM:$mask, 1598 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1599 VR256X:$src0), 1600 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1601 } 1602 1603 let Predicates = [HasDQI] in { 1604 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1605 v8i64_info, v2i64x_info>, VEX_W, 1606 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1607 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1608 v16i32_info, v8i32x_info>, 1609 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1610 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1611 v8f64_info, v2f64x_info>, VEX_W, 1612 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1613 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1614 v16f32_info, v8f32x_info>, 1615 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1616 1617 // Patterns for selects of bitcasted operations. 1618 def : Pat<(vselect VK16WM:$mask, 1619 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1620 (bc_v16f32 (v16i32 immAllZerosV))), 1621 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1622 def : Pat<(vselect VK16WM:$mask, 1623 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), 1624 VR512:$src0), 1625 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1626 def : Pat<(vselect VK16WM:$mask, 1627 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1628 (v16i32 immAllZerosV)), 1629 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1630 def : Pat<(vselect VK16WM:$mask, 1631 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), 1632 VR512:$src0), 1633 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1634 1635 def : Pat<(vselect VK8WM:$mask, 1636 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1637 (bc_v8f64 (v16i32 immAllZerosV))), 1638 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1639 def : Pat<(vselect VK8WM:$mask, 1640 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), 1641 VR512:$src0), 1642 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1643 def : Pat<(vselect VK8WM:$mask, 1644 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1645 (bc_v8i64 (v16i32 immAllZerosV))), 1646 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1647 def : Pat<(vselect VK8WM:$mask, 1648 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))), 1649 VR512:$src0), 1650 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1651 } 1652 1653 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1654 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { 1655 let Predicates = [HasDQI] in 1656 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1657 WriteShuffle256Ld, _Dst.info512, 1658 _Src.info512, _Src.info128, null_frag>, 1659 EVEX_V512; 1660 let Predicates = [HasDQI, HasVLX] in 1661 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, 1662 WriteShuffle256Ld, _Dst.info256, 1663 _Src.info256, _Src.info128, null_frag>, 1664 EVEX_V256; 1665 } 1666 1667 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1668 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : 1669 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1670 1671 let Predicates = [HasDQI, HasVLX] in 1672 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, 1673 WriteShuffleXLd, _Dst.info128, 1674 _Src.info128, _Src.info128, null_frag>, 1675 EVEX_V128; 1676 } 1677 1678 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1679 avx512vl_i32_info, avx512vl_i64_info>; 1680 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1681 avx512vl_f32_info, avx512vl_f64_info>; 1682 1683 let Predicates = [HasVLX] in { 1684 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), 1685 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; 1686 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), 1687 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; 1688 } 1689 1690 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), 1691 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; 1692 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), 1693 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; 1694 1695 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), 1696 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; 1697 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), 1698 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; 1699 1700 //===----------------------------------------------------------------------===// 1701 // AVX-512 BROADCAST MASK TO VECTOR REGISTER 1702 //--- 1703 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1704 X86VectorVTInfo _, RegisterClass KRC> { 1705 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1706 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1707 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1708 EVEX, Sched<[WriteShuffle]>; 1709 } 1710 1711 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1712 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1713 let Predicates = [HasCDI] in 1714 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1715 let Predicates = [HasCDI, HasVLX] in { 1716 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1717 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1718 } 1719 } 1720 1721 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1722 avx512vl_i32_info, VK16>; 1723 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1724 avx512vl_i64_info, VK8>, VEX_W; 1725 1726 //===----------------------------------------------------------------------===// 1727 // -- VPERMI2 - 3 source operands form -- 1728 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1729 X86FoldableSchedWrite sched, 1730 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1731 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1732 hasSideEffects = 0 in { 1733 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1734 (ins _.RC:$src2, _.RC:$src3), 1735 OpcodeStr, "$src3, $src2", "$src2, $src3", 1736 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1737 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1738 1739 let mayLoad = 1 in 1740 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1741 (ins _.RC:$src2, _.MemOp:$src3), 1742 OpcodeStr, "$src3, $src2", "$src2, $src3", 1743 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1744 (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>, 1745 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; 1746 } 1747 } 1748 1749 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1750 X86FoldableSchedWrite sched, 1751 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1752 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1753 hasSideEffects = 0, mayLoad = 1 in 1754 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1755 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1756 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1757 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1758 (_.VT (X86VPermt2 _.RC:$src2, 1759 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, 1760 AVX5128IBase, EVEX_4V, EVEX_B, 1761 Sched<[sched.Folded, ReadAfterLd]>; 1762 } 1763 1764 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1765 X86FoldableSchedWrite sched, 1766 AVX512VLVectorVTInfo VTInfo, 1767 AVX512VLVectorVTInfo ShuffleMask> { 1768 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1769 ShuffleMask.info512>, 1770 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1771 ShuffleMask.info512>, EVEX_V512; 1772 let Predicates = [HasVLX] in { 1773 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1774 ShuffleMask.info128>, 1775 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1776 ShuffleMask.info128>, EVEX_V128; 1777 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1778 ShuffleMask.info256>, 1779 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1780 ShuffleMask.info256>, EVEX_V256; 1781 } 1782 } 1783 1784 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1785 X86FoldableSchedWrite sched, 1786 AVX512VLVectorVTInfo VTInfo, 1787 AVX512VLVectorVTInfo Idx, 1788 Predicate Prd> { 1789 let Predicates = [Prd] in 1790 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1791 Idx.info512>, EVEX_V512; 1792 let Predicates = [Prd, HasVLX] in { 1793 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1794 Idx.info128>, EVEX_V128; 1795 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1796 Idx.info256>, EVEX_V256; 1797 } 1798 } 1799 1800 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1802 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1803 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1804 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1806 VEX_W, EVEX_CD8<16, CD8VF>; 1807 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1809 EVEX_CD8<8, CD8VF>; 1810 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1812 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1813 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1814 1815 // Extra patterns to deal with extra bitcasts due to passthru and index being 1816 // different types on the fp versions. 1817 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1818 X86VectorVTInfo IdxVT, 1819 X86VectorVTInfo CastVT> { 1820 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1821 (X86VPermt2 (_.VT _.RC:$src2), 1822 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), 1823 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1824 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1825 _.RC:$src2, _.RC:$src3)>; 1826 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1827 (X86VPermt2 _.RC:$src2, 1828 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1829 (_.LdFrag addr:$src3)), 1830 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1831 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1832 _.RC:$src2, addr:$src3)>; 1833 def : Pat<(_.VT (vselect _.KRCWM:$mask, 1834 (X86VPermt2 _.RC:$src2, 1835 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1836 (X86VBroadcast (_.ScalarLdFrag addr:$src3))), 1837 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1838 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1839 _.RC:$src2, addr:$src3)>; 1840 } 1841 1842 // TODO: Should we add more casts? The vXi64 case is common due to ABI. 1843 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1844 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1845 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1846 1847 // VPERMT2 1848 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1849 X86FoldableSchedWrite sched, 1850 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1851 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1852 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1853 (ins IdxVT.RC:$src2, _.RC:$src3), 1854 OpcodeStr, "$src3, $src2", "$src2, $src3", 1855 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1856 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1857 1858 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1859 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1860 OpcodeStr, "$src3, $src2", "$src2, $src3", 1861 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1862 (bitconvert (_.LdFrag addr:$src3)))), 1>, 1863 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; 1864 } 1865 } 1866 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1867 X86FoldableSchedWrite sched, 1868 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1869 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1870 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1871 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1872 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1873 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1874 (_.VT (X86VPermt2 _.RC:$src1, 1875 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, 1876 AVX5128IBase, EVEX_4V, EVEX_B, 1877 Sched<[sched.Folded, ReadAfterLd]>; 1878 } 1879 1880 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1881 X86FoldableSchedWrite sched, 1882 AVX512VLVectorVTInfo VTInfo, 1883 AVX512VLVectorVTInfo ShuffleMask> { 1884 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1885 ShuffleMask.info512>, 1886 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1887 ShuffleMask.info512>, EVEX_V512; 1888 let Predicates = [HasVLX] in { 1889 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1890 ShuffleMask.info128>, 1891 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1892 ShuffleMask.info128>, EVEX_V128; 1893 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1894 ShuffleMask.info256>, 1895 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1896 ShuffleMask.info256>, EVEX_V256; 1897 } 1898 } 1899 1900 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1901 X86FoldableSchedWrite sched, 1902 AVX512VLVectorVTInfo VTInfo, 1903 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1904 let Predicates = [Prd] in 1905 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1906 Idx.info512>, EVEX_V512; 1907 let Predicates = [Prd, HasVLX] in { 1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1909 Idx.info128>, EVEX_V128; 1910 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1911 Idx.info256>, EVEX_V256; 1912 } 1913 } 1914 1915 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1916 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1917 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1918 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1919 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1920 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1921 VEX_W, EVEX_CD8<16, CD8VF>; 1922 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1923 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1924 EVEX_CD8<8, CD8VF>; 1925 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1926 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1927 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1928 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1929 1930 //===----------------------------------------------------------------------===// 1931 // AVX-512 - BLEND using mask 1932 // 1933 1934 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1935 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1936 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1937 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1938 (ins _.RC:$src1, _.RC:$src2), 1939 !strconcat(OpcodeStr, 1940 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1941 EVEX_4V, Sched<[sched]>; 1942 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1943 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1944 !strconcat(OpcodeStr, 1945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1946 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1947 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1948 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1949 !strconcat(OpcodeStr, 1950 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1951 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1952 let mayLoad = 1 in { 1953 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1954 (ins _.RC:$src1, _.MemOp:$src2), 1955 !strconcat(OpcodeStr, 1956 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1957 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1958 Sched<[sched.Folded, ReadAfterLd]>; 1959 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1960 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1961 !strconcat(OpcodeStr, 1962 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1963 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1964 Sched<[sched.Folded, ReadAfterLd]>; 1965 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1966 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1967 !strconcat(OpcodeStr, 1968 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1969 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1970 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 1971 } 1972 } 1973 } 1974 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1975 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1976 let mayLoad = 1, hasSideEffects = 0 in { 1977 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1978 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1979 !strconcat(OpcodeStr, 1980 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1981 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1982 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1983 Sched<[sched.Folded, ReadAfterLd]>; 1984 1985 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1986 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1987 !strconcat(OpcodeStr, 1988 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1989 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1990 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1991 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 1992 1993 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1994 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1995 !strconcat(OpcodeStr, 1996 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1997 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1998 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1999 Sched<[sched.Folded, ReadAfterLd]>; 2000 } 2001 } 2002 2003 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2004 AVX512VLVectorVTInfo VTInfo> { 2005 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2006 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2007 EVEX_V512; 2008 2009 let Predicates = [HasVLX] in { 2010 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2011 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2012 EVEX_V256; 2013 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2014 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2015 EVEX_V128; 2016 } 2017 } 2018 2019 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2020 AVX512VLVectorVTInfo VTInfo> { 2021 let Predicates = [HasBWI] in 2022 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2023 EVEX_V512; 2024 2025 let Predicates = [HasBWI, HasVLX] in { 2026 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2027 EVEX_V256; 2028 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2029 EVEX_V128; 2030 } 2031 } 2032 2033 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2034 avx512vl_f32_info>; 2035 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2036 avx512vl_f64_info>, VEX_W; 2037 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2038 avx512vl_i32_info>; 2039 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2040 avx512vl_i64_info>, VEX_W; 2041 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2042 avx512vl_i8_info>; 2043 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2044 avx512vl_i16_info>, VEX_W; 2045 2046 //===----------------------------------------------------------------------===// 2047 // Compare Instructions 2048 //===----------------------------------------------------------------------===// 2049 2050 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2051 2052 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, 2053 X86FoldableSchedWrite sched> { 2054 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2055 (outs _.KRC:$dst), 2056 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), 2057 "vcmp${cc}"#_.Suffix, 2058 "$src2, $src1", "$src1, $src2", 2059 (OpNode (_.VT _.RC:$src1), 2060 (_.VT _.RC:$src2), 2061 imm:$cc)>, EVEX_4V, Sched<[sched]>; 2062 let mayLoad = 1 in 2063 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2064 (outs _.KRC:$dst), 2065 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc), 2066 "vcmp${cc}"#_.Suffix, 2067 "$src2, $src1", "$src1, $src2", 2068 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 2069 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, 2070 Sched<[sched.Folded, ReadAfterLd]>; 2071 2072 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2073 (outs _.KRC:$dst), 2074 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), 2075 "vcmp${cc}"#_.Suffix, 2076 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 2077 (OpNodeRnd (_.VT _.RC:$src1), 2078 (_.VT _.RC:$src2), 2079 imm:$cc, 2080 (i32 FROUND_NO_EXC))>, 2081 EVEX_4V, EVEX_B, Sched<[sched]>; 2082 // Accept explicit immediate argument form instead of comparison code. 2083 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2084 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2085 (outs VK1:$dst), 2086 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2087 "vcmp"#_.Suffix, 2088 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, 2089 Sched<[sched]>, NotMemoryFoldable; 2090 let mayLoad = 1 in 2091 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, 2092 (outs _.KRC:$dst), 2093 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2094 "vcmp"#_.Suffix, 2095 "$cc, $src2, $src1", "$src1, $src2, $cc">, 2096 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, 2097 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 2098 2099 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2100 (outs _.KRC:$dst), 2101 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2102 "vcmp"#_.Suffix, 2103 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">, 2104 EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable; 2105 }// let isAsmParserOnly = 1, hasSideEffects = 0 2106 2107 let isCodeGenOnly = 1 in { 2108 let isCommutable = 1 in 2109 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2110 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), 2111 !strconcat("vcmp${cc}", _.Suffix, 2112 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2113 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2114 _.FRC:$src2, 2115 imm:$cc))]>, 2116 EVEX_4V, Sched<[sched]>; 2117 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2118 (outs _.KRC:$dst), 2119 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), 2120 !strconcat("vcmp${cc}", _.Suffix, 2121 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2122 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2123 (_.ScalarLdFrag addr:$src2), 2124 imm:$cc))]>, 2125 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, 2126 Sched<[sched.Folded, ReadAfterLd]>; 2127 } 2128 } 2129 2130 let Predicates = [HasAVX512] in { 2131 let ExeDomain = SSEPackedSingle in 2132 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd, 2133 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2134 let ExeDomain = SSEPackedDouble in 2135 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd, 2136 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2137 } 2138 2139 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2140 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2141 bit IsCommutable> { 2142 let isCommutable = IsCommutable in 2143 def rr : AVX512BI<opc, MRMSrcReg, 2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2145 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2146 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>, 2147 EVEX_4V, Sched<[sched]>; 2148 def rm : AVX512BI<opc, MRMSrcMem, 2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2150 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2151 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), 2152 (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 2153 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 2154 let isCommutable = IsCommutable in 2155 def rrk : AVX512BI<opc, MRMSrcReg, 2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2157 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2158 "$dst {${mask}}, $src1, $src2}"), 2159 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2160 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>, 2161 EVEX_4V, EVEX_K, Sched<[sched]>; 2162 def rmk : AVX512BI<opc, MRMSrcMem, 2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2164 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2165 "$dst {${mask}}, $src1, $src2}"), 2166 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2167 (OpNode (_.VT _.RC:$src1), 2168 (_.VT (bitconvert 2169 (_.LdFrag addr:$src2))))))]>, 2170 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2171 } 2172 2173 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2174 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2175 bit IsCommutable> : 2176 avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> { 2177 def rmb : AVX512BI<opc, MRMSrcMem, 2178 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2179 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2180 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2181 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), 2182 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, 2183 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2184 def rmbk : AVX512BI<opc, MRMSrcMem, 2185 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2186 _.ScalarMemOp:$src2), 2187 !strconcat(OpcodeStr, 2188 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2189 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2190 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2191 (OpNode (_.VT _.RC:$src1), 2192 (X86VBroadcast 2193 (_.ScalarLdFrag addr:$src2)))))]>, 2194 EVEX_4V, EVEX_K, EVEX_B, 2195 Sched<[sched.Folded, ReadAfterLd]>; 2196 } 2197 2198 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode, 2199 X86SchedWriteWidths sched, 2200 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2201 bit IsCommutable = 0> { 2202 let Predicates = [prd] in 2203 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM, 2204 VTInfo.info512, IsCommutable>, EVEX_V512; 2205 2206 let Predicates = [prd, HasVLX] in { 2207 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM, 2208 VTInfo.info256, IsCommutable>, EVEX_V256; 2209 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM, 2210 VTInfo.info128, IsCommutable>, EVEX_V128; 2211 } 2212 } 2213 2214 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2215 PatFrag OpNode, X86SchedWriteWidths sched, 2216 AVX512VLVectorVTInfo VTInfo, 2217 Predicate prd, bit IsCommutable = 0> { 2218 let Predicates = [prd] in 2219 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM, 2220 VTInfo.info512, IsCommutable>, EVEX_V512; 2221 2222 let Predicates = [prd, HasVLX] in { 2223 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM, 2224 VTInfo.info256, IsCommutable>, EVEX_V256; 2225 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM, 2226 VTInfo.info128, IsCommutable>, EVEX_V128; 2227 } 2228 } 2229 2230 // This fragment treats X86cmpm as commutable to help match loads in both 2231 // operands for PCMPEQ. 2232 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2233 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), 2234 (X86setcc_commute node:$src1, node:$src2, SETEQ)>; 2235 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2236 (setcc node:$src1, node:$src2, SETGT)>; 2237 2238 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2239 // increase the pattern complexity the way an immediate would. 2240 let AddedComplexity = 2 in { 2241 // FIXME: Is there a better scheduler class for VPCMP? 2242 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, 2243 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2244 EVEX_CD8<8, CD8VF>, VEX_WIG; 2245 2246 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, 2247 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2248 EVEX_CD8<16, CD8VF>, VEX_WIG; 2249 2250 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, 2251 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2252 EVEX_CD8<32, CD8VF>; 2253 2254 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, 2255 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2256 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2257 2258 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, 2259 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2260 EVEX_CD8<8, CD8VF>, VEX_WIG; 2261 2262 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, 2263 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2264 EVEX_CD8<16, CD8VF>, VEX_WIG; 2265 2266 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, 2267 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2268 EVEX_CD8<32, CD8VF>; 2269 2270 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, 2271 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2272 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2273 } 2274 2275 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2276 PatFrag CommFrag, X86FoldableSchedWrite sched, 2277 X86VectorVTInfo _, string Name> { 2278 let isCommutable = 1 in 2279 def rri : AVX512AIi8<opc, MRMSrcReg, 2280 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc), 2281 !strconcat("vpcmp${cc}", Suffix, 2282 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2283 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2284 (_.VT _.RC:$src2), 2285 cond)))]>, 2286 EVEX_4V, Sched<[sched]>; 2287 def rmi : AVX512AIi8<opc, MRMSrcMem, 2288 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc), 2289 !strconcat("vpcmp${cc}", Suffix, 2290 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2291 [(set _.KRC:$dst, (_.KVT 2292 (Frag:$cc 2293 (_.VT _.RC:$src1), 2294 (_.VT (bitconvert (_.LdFrag addr:$src2))), 2295 cond)))]>, 2296 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 2297 let isCommutable = 1 in 2298 def rrik : AVX512AIi8<opc, MRMSrcReg, 2299 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2300 AVX512ICC:$cc), 2301 !strconcat("vpcmp${cc}", Suffix, 2302 "\t{$src2, $src1, $dst {${mask}}|", 2303 "$dst {${mask}}, $src1, $src2}"), 2304 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2305 (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2306 (_.VT _.RC:$src2), 2307 cond))))]>, 2308 EVEX_4V, EVEX_K, Sched<[sched]>; 2309 def rmik : AVX512AIi8<opc, MRMSrcMem, 2310 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2311 AVX512ICC:$cc), 2312 !strconcat("vpcmp${cc}", Suffix, 2313 "\t{$src2, $src1, $dst {${mask}}|", 2314 "$dst {${mask}}, $src1, $src2}"), 2315 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2316 (_.KVT 2317 (Frag:$cc 2318 (_.VT _.RC:$src1), 2319 (_.VT (bitconvert 2320 (_.LdFrag addr:$src2))), 2321 cond))))]>, 2322 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2323 2324 // Accept explicit immediate argument form instead of comparison code. 2325 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2326 def rri_alt : AVX512AIi8<opc, MRMSrcReg, 2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2328 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", 2329 "$dst, $src1, $src2, $cc}"), []>, 2330 EVEX_4V, Sched<[sched]>, NotMemoryFoldable; 2331 let mayLoad = 1 in 2332 def rmi_alt : AVX512AIi8<opc, MRMSrcMem, 2333 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2334 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", 2335 "$dst, $src1, $src2, $cc}"), []>, 2336 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; 2337 def rrik_alt : AVX512AIi8<opc, MRMSrcReg, 2338 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2339 u8imm:$cc), 2340 !strconcat("vpcmp", Suffix, 2341 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2342 "$dst {${mask}}, $src1, $src2, $cc}"), []>, 2343 EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable; 2344 let mayLoad = 1 in 2345 def rmik_alt : AVX512AIi8<opc, MRMSrcMem, 2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2347 u8imm:$cc), 2348 !strconcat("vpcmp", Suffix, 2349 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2350 "$dst {${mask}}, $src1, $src2, $cc}"), []>, 2351 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>, 2352 NotMemoryFoldable; 2353 } 2354 2355 def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)), 2356 (_.VT _.RC:$src1), cond)), 2357 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2358 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2359 2360 def : Pat<(and _.KRCWM:$mask, 2361 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)), 2362 (_.VT _.RC:$src1), cond))), 2363 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2364 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2365 (CommFrag.OperandTransform $cc))>; 2366 } 2367 2368 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2369 PatFrag CommFrag, X86FoldableSchedWrite sched, 2370 X86VectorVTInfo _, string Name> : 2371 avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> { 2372 def rmib : AVX512AIi8<opc, MRMSrcMem, 2373 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2374 AVX512ICC:$cc), 2375 !strconcat("vpcmp${cc}", Suffix, 2376 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2377 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2378 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2379 (_.VT _.RC:$src1), 2380 (X86VBroadcast 2381 (_.ScalarLdFrag addr:$src2)), 2382 cond)))]>, 2383 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2384 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2385 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2386 _.ScalarMemOp:$src2, AVX512ICC:$cc), 2387 !strconcat("vpcmp${cc}", Suffix, 2388 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2389 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2390 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2391 (_.KVT (Frag:$cc 2392 (_.VT _.RC:$src1), 2393 (X86VBroadcast 2394 (_.ScalarLdFrag addr:$src2)), 2395 cond))))]>, 2396 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2397 2398 // Accept explicit immediate argument form instead of comparison code. 2399 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in { 2400 def rmib_alt : AVX512AIi8<opc, MRMSrcMem, 2401 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2402 u8imm:$cc), 2403 !strconcat("vpcmp", Suffix, 2404 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2405 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, 2406 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, 2407 NotMemoryFoldable; 2408 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem, 2409 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2410 _.ScalarMemOp:$src2, u8imm:$cc), 2411 !strconcat("vpcmp", Suffix, 2412 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2413 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, 2414 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, 2415 NotMemoryFoldable; 2416 } 2417 2418 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 2419 (_.VT _.RC:$src1), cond)), 2420 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2421 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; 2422 2423 def : Pat<(and _.KRCWM:$mask, 2424 (_.KVT (CommFrag:$cc (X86VBroadcast 2425 (_.ScalarLdFrag addr:$src2)), 2426 (_.VT _.RC:$src1), cond))), 2427 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2428 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2429 (CommFrag.OperandTransform $cc))>; 2430 } 2431 2432 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2433 PatFrag CommFrag, X86SchedWriteWidths sched, 2434 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2435 let Predicates = [prd] in 2436 defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM, 2437 VTInfo.info512, NAME>, EVEX_V512; 2438 2439 let Predicates = [prd, HasVLX] in { 2440 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM, 2441 VTInfo.info256, NAME>, EVEX_V256; 2442 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM, 2443 VTInfo.info128, NAME>, EVEX_V128; 2444 } 2445 } 2446 2447 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2448 PatFrag CommFrag, X86SchedWriteWidths sched, 2449 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2450 let Predicates = [prd] in 2451 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM, 2452 VTInfo.info512, NAME>, EVEX_V512; 2453 2454 let Predicates = [prd, HasVLX] in { 2455 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM, 2456 VTInfo.info256, NAME>, EVEX_V256; 2457 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM, 2458 VTInfo.info128, NAME>, EVEX_V128; 2459 } 2460 } 2461 2462 def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2464 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2465 return getI8Imm(SSECC, SDLoc(N)); 2466 }]>; 2467 2468 // Swapped operand version of the above. 2469 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2470 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2471 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2472 SSECC = X86::getSwappedVPCMPImm(SSECC); 2473 return getI8Imm(SSECC, SDLoc(N)); 2474 }]>; 2475 2476 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2477 (setcc node:$src1, node:$src2, node:$cc), [{ 2478 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2479 return !ISD::isUnsignedIntSetCC(CC); 2480 }], X86pcmpm_imm>; 2481 2482 // Same as above, but commutes immediate. Use for load folding. 2483 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2484 (setcc node:$src1, node:$src2, node:$cc), [{ 2485 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2486 return !ISD::isUnsignedIntSetCC(CC); 2487 }], X86pcmpm_imm_commute>; 2488 2489 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2490 (setcc node:$src1, node:$src2, node:$cc), [{ 2491 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2492 return ISD::isUnsignedIntSetCC(CC); 2493 }], X86pcmpm_imm>; 2494 2495 // Same as above, but commutes immediate. Use for load folding. 2496 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2497 (setcc node:$src1, node:$src2, node:$cc), [{ 2498 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2499 return ISD::isUnsignedIntSetCC(CC); 2500 }], X86pcmpm_imm_commute>; 2501 2502 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2503 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute, 2504 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2505 EVEX_CD8<8, CD8VF>; 2506 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute, 2507 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2508 EVEX_CD8<8, CD8VF>; 2509 2510 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute, 2511 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2512 VEX_W, EVEX_CD8<16, CD8VF>; 2513 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute, 2514 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2515 VEX_W, EVEX_CD8<16, CD8VF>; 2516 2517 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute, 2518 SchedWriteVecALU, avx512vl_i32_info, 2519 HasAVX512>, EVEX_CD8<32, CD8VF>; 2520 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute, 2521 SchedWriteVecALU, avx512vl_i32_info, 2522 HasAVX512>, EVEX_CD8<32, CD8VF>; 2523 2524 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute, 2525 SchedWriteVecALU, avx512vl_i64_info, 2526 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2527 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute, 2528 SchedWriteVecALU, avx512vl_i64_info, 2529 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2530 2531 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2532 string Name> { 2533 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2534 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), 2535 "vcmp${cc}"#_.Suffix, 2536 "$src2, $src1", "$src1, $src2", 2537 (X86cmpm (_.VT _.RC:$src1), 2538 (_.VT _.RC:$src2), 2539 imm:$cc), 1>, 2540 Sched<[sched]>; 2541 2542 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2543 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), 2544 "vcmp${cc}"#_.Suffix, 2545 "$src2, $src1", "$src1, $src2", 2546 (X86cmpm (_.VT _.RC:$src1), 2547 (_.VT (bitconvert (_.LdFrag addr:$src2))), 2548 imm:$cc)>, 2549 Sched<[sched.Folded, ReadAfterLd]>; 2550 2551 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2552 (outs _.KRC:$dst), 2553 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), 2554 "vcmp${cc}"#_.Suffix, 2555 "${src2}"##_.BroadcastStr##", $src1", 2556 "$src1, ${src2}"##_.BroadcastStr, 2557 (X86cmpm (_.VT _.RC:$src1), 2558 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 2559 imm:$cc)>, 2560 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2561 // Accept explicit immediate argument form instead of comparison code. 2562 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2563 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2564 (outs _.KRC:$dst), 2565 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2566 "vcmp"#_.Suffix, 2567 "$cc, $src2, $src1", "$src1, $src2, $cc">, 2568 Sched<[sched]>, NotMemoryFoldable; 2569 2570 let mayLoad = 1 in { 2571 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, 2572 (outs _.KRC:$dst), 2573 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2574 "vcmp"#_.Suffix, 2575 "$cc, $src2, $src1", "$src1, $src2, $cc">, 2576 Sched<[sched.Folded, ReadAfterLd]>, 2577 NotMemoryFoldable; 2578 2579 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, 2580 (outs _.KRC:$dst), 2581 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2582 "vcmp"#_.Suffix, 2583 "$cc, ${src2}"##_.BroadcastStr##", $src1", 2584 "$src1, ${src2}"##_.BroadcastStr##", $cc">, 2585 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, 2586 NotMemoryFoldable; 2587 } 2588 } 2589 2590 // Patterns for selecting with loads in other operand. 2591 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2592 CommutableCMPCC:$cc), 2593 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2594 imm:$cc)>; 2595 2596 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2), 2597 (_.VT _.RC:$src1), 2598 CommutableCMPCC:$cc)), 2599 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2600 _.RC:$src1, addr:$src2, 2601 imm:$cc)>; 2602 2603 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 2604 (_.VT _.RC:$src1), CommutableCMPCC:$cc), 2605 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2606 imm:$cc)>; 2607 2608 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast 2609 (_.ScalarLdFrag addr:$src2)), 2610 (_.VT _.RC:$src1), 2611 CommutableCMPCC:$cc)), 2612 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2613 _.RC:$src1, addr:$src2, 2614 imm:$cc)>; 2615 } 2616 2617 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2618 // comparison code form (VCMP[EQ/LT/LE/...] 2619 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2620 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), 2621 "vcmp${cc}"#_.Suffix, 2622 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 2623 (X86cmpmRnd (_.VT _.RC:$src1), 2624 (_.VT _.RC:$src2), 2625 imm:$cc, 2626 (i32 FROUND_NO_EXC))>, 2627 EVEX_B, Sched<[sched]>; 2628 2629 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2630 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, 2631 (outs _.KRC:$dst), 2632 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2633 "vcmp"#_.Suffix, 2634 "$cc, {sae}, $src2, $src1", 2635 "$src1, $src2, {sae}, $cc">, 2636 EVEX_B, Sched<[sched]>, NotMemoryFoldable; 2637 } 2638 } 2639 2640 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 2641 let Predicates = [HasAVX512] in { 2642 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2643 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2644 2645 } 2646 let Predicates = [HasAVX512,HasVLX] in { 2647 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2648 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2649 } 2650 } 2651 2652 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2653 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2654 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2655 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2656 2657 // Patterns to select fp compares with load as first operand. 2658 let Predicates = [HasAVX512] in { 2659 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, 2660 CommutableCMPCC:$cc)), 2661 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; 2662 2663 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, 2664 CommutableCMPCC:$cc)), 2665 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; 2666 } 2667 2668 // ---------------------------------------------------------------- 2669 // FPClass 2670 //handle fpclass instruction mask = op(reg_scalar,imm) 2671 // op(mem_scalar,imm) 2672 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, 2673 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2674 Predicate prd> { 2675 let Predicates = [prd], ExeDomain = _.ExeDomain in { 2676 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2677 (ins _.RC:$src1, i32u8imm:$src2), 2678 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2679 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), 2680 (i32 imm:$src2)))]>, 2681 Sched<[sched]>; 2682 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2683 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2684 OpcodeStr##_.Suffix# 2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2686 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2687 (OpNode (_.VT _.RC:$src1), 2688 (i32 imm:$src2))))]>, 2689 EVEX_K, Sched<[sched]>; 2690 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2691 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2692 OpcodeStr##_.Suffix## 2693 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2694 [(set _.KRC:$dst, 2695 (OpNode _.ScalarIntMemCPat:$src1, 2696 (i32 imm:$src2)))]>, 2697 Sched<[sched.Folded, ReadAfterLd]>; 2698 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2699 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2700 OpcodeStr##_.Suffix## 2701 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2702 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2703 (OpNode _.ScalarIntMemCPat:$src1, 2704 (i32 imm:$src2))))]>, 2705 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2706 } 2707 } 2708 2709 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2710 // fpclass(reg_vec, mem_vec, imm) 2711 // fpclass(reg_vec, broadcast(eltVt), imm) 2712 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, 2713 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2714 string mem, string broadcast>{ 2715 let ExeDomain = _.ExeDomain in { 2716 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2717 (ins _.RC:$src1, i32u8imm:$src2), 2718 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2719 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), 2720 (i32 imm:$src2)))]>, 2721 Sched<[sched]>; 2722 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2723 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2724 OpcodeStr##_.Suffix# 2725 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2726 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2727 (OpNode (_.VT _.RC:$src1), 2728 (i32 imm:$src2))))]>, 2729 EVEX_K, Sched<[sched]>; 2730 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2731 (ins _.MemOp:$src1, i32u8imm:$src2), 2732 OpcodeStr##_.Suffix##mem# 2733 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2734 [(set _.KRC:$dst,(OpNode 2735 (_.VT (bitconvert (_.LdFrag addr:$src1))), 2736 (i32 imm:$src2)))]>, 2737 Sched<[sched.Folded, ReadAfterLd]>; 2738 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2739 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2740 OpcodeStr##_.Suffix##mem# 2741 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2742 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode 2743 (_.VT (bitconvert (_.LdFrag addr:$src1))), 2744 (i32 imm:$src2))))]>, 2745 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2746 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2747 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2748 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## 2749 _.BroadcastStr##", $dst|$dst, ${src1}" 2750 ##_.BroadcastStr##", $src2}", 2751 [(set _.KRC:$dst,(OpNode 2752 (_.VT (X86VBroadcast 2753 (_.ScalarLdFrag addr:$src1))), 2754 (i32 imm:$src2)))]>, 2755 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 2756 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2757 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2758 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## 2759 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## 2760 _.BroadcastStr##", $src2}", 2761 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode 2762 (_.VT (X86VBroadcast 2763 (_.ScalarLdFrag addr:$src1))), 2764 (i32 imm:$src2))))]>, 2765 EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; 2766 } 2767 } 2768 2769 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2770 bits<8> opc, SDNode OpNode, 2771 X86SchedWriteWidths sched, Predicate prd, 2772 string broadcast>{ 2773 let Predicates = [prd] in { 2774 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM, 2775 _.info512, "{z}", broadcast>, EVEX_V512; 2776 } 2777 let Predicates = [prd, HasVLX] in { 2778 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM, 2779 _.info128, "{x}", broadcast>, EVEX_V128; 2780 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM, 2781 _.info256, "{y}", broadcast>, EVEX_V256; 2782 } 2783 } 2784 2785 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2786 bits<8> opcScalar, SDNode VecOpNode, 2787 SDNode ScalarOpNode, X86SchedWriteWidths sched, 2788 Predicate prd> { 2789 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2790 VecOpNode, sched, prd, "{l}">, 2791 EVEX_CD8<32, CD8VF>; 2792 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2793 VecOpNode, sched, prd, "{q}">, 2794 EVEX_CD8<64, CD8VF> , VEX_W; 2795 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, 2796 sched.Scl, f32x_info, prd>, 2797 EVEX_CD8<32, CD8VT1>; 2798 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, 2799 sched.Scl, f64x_info, prd>, 2800 EVEX_CD8<64, CD8VT1>, VEX_W; 2801 } 2802 2803 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, 2804 X86Vfpclasss, SchedWriteFCmp, HasDQI>, 2805 AVX512AIi8Base, EVEX; 2806 2807 //----------------------------------------------------------------- 2808 // Mask register copy, including 2809 // - copy between mask registers 2810 // - load/store mask registers 2811 // - copy from GPR to mask register and vice versa 2812 // 2813 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2814 string OpcodeStr, RegisterClass KRC, 2815 ValueType vvt, X86MemOperand x86memop> { 2816 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2817 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2819 Sched<[WriteMove]>; 2820 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2821 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2822 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2823 Sched<[WriteLoad]>; 2824 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2826 [(store KRC:$src, addr:$dst)]>, 2827 Sched<[WriteStore]>; 2828 } 2829 2830 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2831 string OpcodeStr, 2832 RegisterClass KRC, RegisterClass GRC> { 2833 let hasSideEffects = 0 in { 2834 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2835 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2836 Sched<[WriteMove]>; 2837 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2838 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2839 Sched<[WriteMove]>; 2840 } 2841 } 2842 2843 let Predicates = [HasDQI] in 2844 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2845 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2846 VEX, PD; 2847 2848 let Predicates = [HasAVX512] in 2849 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2850 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2851 VEX, PS; 2852 2853 let Predicates = [HasBWI] in { 2854 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2855 VEX, PD, VEX_W; 2856 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2857 VEX, XD; 2858 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2859 VEX, PS, VEX_W; 2860 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2861 VEX, XD, VEX_W; 2862 } 2863 2864 // GR from/to mask register 2865 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2866 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2867 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2868 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2869 2870 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2871 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2872 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2873 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2874 2875 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2876 (KMOVWrk VK16:$src)>; 2877 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2878 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2879 2880 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2881 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2882 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2883 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2884 2885 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2886 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2887 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2888 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2889 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2890 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2891 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2892 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2893 2894 // Load/store kreg 2895 let Predicates = [HasDQI] in { 2896 def : Pat<(store VK1:$src, addr:$dst), 2897 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; 2898 2899 def : Pat<(v1i1 (load addr:$src)), 2900 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2901 def : Pat<(v2i1 (load addr:$src)), 2902 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2903 def : Pat<(v4i1 (load addr:$src)), 2904 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2905 } 2906 2907 let Predicates = [HasAVX512] in { 2908 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2909 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2910 } 2911 2912 let Predicates = [HasAVX512] in { 2913 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2914 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2915 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2916 2917 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2918 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2919 } 2920 2921 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2922 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2923 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2924 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2925 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2926 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2927 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2928 2929 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2930 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2931 (COPY_TO_REGCLASS 2932 (KMOVWkr (AND32ri8 2933 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2934 (i32 1))), VK16)>; 2935 } 2936 2937 // Mask unary operation 2938 // - KNOT 2939 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2940 RegisterClass KRC, SDPatternOperator OpNode, 2941 X86FoldableSchedWrite sched, Predicate prd> { 2942 let Predicates = [prd] in 2943 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2944 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2945 [(set KRC:$dst, (OpNode KRC:$src))]>, 2946 Sched<[sched]>; 2947 } 2948 2949 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2950 SDPatternOperator OpNode, 2951 X86FoldableSchedWrite sched> { 2952 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2953 sched, HasDQI>, VEX, PD; 2954 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2955 sched, HasAVX512>, VEX, PS; 2956 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2957 sched, HasBWI>, VEX, PD, VEX_W; 2958 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2959 sched, HasBWI>, VEX, PS, VEX_W; 2960 } 2961 2962 // TODO - do we need a X86SchedWriteWidths::KMASK type? 2963 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2964 2965 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2966 let Predicates = [HasAVX512, NoDQI] in 2967 def : Pat<(vnot VK8:$src), 2968 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2969 2970 def : Pat<(vnot VK4:$src), 2971 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2972 def : Pat<(vnot VK2:$src), 2973 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2974 2975 // Mask binary operation 2976 // - KAND, KANDN, KOR, KXNOR, KXOR 2977 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2978 RegisterClass KRC, SDPatternOperator OpNode, 2979 X86FoldableSchedWrite sched, Predicate prd, 2980 bit IsCommutable> { 2981 let Predicates = [prd], isCommutable = IsCommutable in 2982 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2983 !strconcat(OpcodeStr, 2984 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2985 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2986 Sched<[sched]>; 2987 } 2988 2989 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2990 SDPatternOperator OpNode, 2991 X86FoldableSchedWrite sched, bit IsCommutable, 2992 Predicate prdW = HasAVX512> { 2993 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2994 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 2995 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2996 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 2997 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2998 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 2999 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3000 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3001 } 3002 3003 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; 3004 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; 3005 // These nodes use 'vnot' instead of 'not' to support vectors. 3006 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3007 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3008 3009 // TODO - do we need a X86SchedWriteWidths::KMASK type? 3010 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3011 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3012 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3013 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3014 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3015 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3016 3017 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, 3018 Instruction Inst> { 3019 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3020 // for the DQI set, this type is legal and KxxxB instruction is used 3021 let Predicates = [NoDQI] in 3022 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3023 (COPY_TO_REGCLASS 3024 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3025 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3026 3027 // All types smaller than 8 bits require conversion anyway 3028 def : Pat<(OpNode VK1:$src1, VK1:$src2), 3029 (COPY_TO_REGCLASS (Inst 3030 (COPY_TO_REGCLASS VK1:$src1, VK16), 3031 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3032 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3033 (COPY_TO_REGCLASS (Inst 3034 (COPY_TO_REGCLASS VK2:$src1, VK16), 3035 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; 3036 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3037 (COPY_TO_REGCLASS (Inst 3038 (COPY_TO_REGCLASS VK4:$src1, VK16), 3039 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; 3040 } 3041 3042 defm : avx512_binop_pat<and, and, KANDWrr>; 3043 defm : avx512_binop_pat<vandn, andn, KANDNWrr>; 3044 defm : avx512_binop_pat<or, or, KORWrr>; 3045 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; 3046 defm : avx512_binop_pat<xor, xor, KXORWrr>; 3047 3048 // Mask unpacking 3049 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT, 3050 RegisterClass KRCSrc, X86FoldableSchedWrite sched, 3051 Predicate prd> { 3052 let Predicates = [prd] in { 3053 let hasSideEffects = 0 in 3054 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), 3055 (ins KRC:$src1, KRC:$src2), 3056 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3057 VEX_4V, VEX_L, Sched<[sched]>; 3058 3059 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), 3060 (!cast<Instruction>(NAME##rr) 3061 (COPY_TO_REGCLASS KRCSrc:$src2, KRC), 3062 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>; 3063 } 3064 } 3065 3066 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD; 3067 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS; 3068 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W; 3069 3070 // Mask bit testing 3071 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3072 SDNode OpNode, X86FoldableSchedWrite sched, 3073 Predicate prd> { 3074 let Predicates = [prd], Defs = [EFLAGS] in 3075 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3076 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3077 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3078 Sched<[sched]>; 3079 } 3080 3081 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3082 X86FoldableSchedWrite sched, 3083 Predicate prdW = HasAVX512> { 3084 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3085 VEX, PD; 3086 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3087 VEX, PS; 3088 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3089 VEX, PS, VEX_W; 3090 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3091 VEX, PD, VEX_W; 3092 } 3093 3094 // TODO - do we need a X86SchedWriteWidths::KMASK type? 3095 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3096 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3097 3098 // Mask shift 3099 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3100 SDNode OpNode, X86FoldableSchedWrite sched> { 3101 let Predicates = [HasAVX512] in 3102 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3103 !strconcat(OpcodeStr, 3104 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3105 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>, 3106 Sched<[sched]>; 3107 } 3108 3109 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3110 SDNode OpNode, X86FoldableSchedWrite sched> { 3111 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3112 sched>, VEX, TAPD, VEX_W; 3113 let Predicates = [HasDQI] in 3114 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3115 sched>, VEX, TAPD; 3116 let Predicates = [HasBWI] in { 3117 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3118 sched>, VEX, TAPD, VEX_W; 3119 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3120 sched>, VEX, TAPD; 3121 } 3122 } 3123 3124 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3125 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3126 3127 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3128 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr, 3129 X86VectorVTInfo Narrow, 3130 X86VectorVTInfo Wide> { 3131 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), 3132 (Narrow.VT Narrow.RC:$src2))), 3133 (COPY_TO_REGCLASS 3134 (!cast<Instruction>(InstStr#"Zrr") 3135 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3136 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), 3137 Narrow.KRC)>; 3138 3139 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3140 (Frag (Narrow.VT Narrow.RC:$src1), 3141 (Narrow.VT Narrow.RC:$src2)))), 3142 (COPY_TO_REGCLASS 3143 (!cast<Instruction>(InstStr#"Zrrk") 3144 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3145 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3146 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), 3147 Narrow.KRC)>; 3148 } 3149 3150 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3151 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, 3152 string InstStr, 3153 X86VectorVTInfo Narrow, 3154 X86VectorVTInfo Wide> { 3155 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3156 (Narrow.VT Narrow.RC:$src2), cond)), 3157 (COPY_TO_REGCLASS 3158 (!cast<Instruction>(InstStr##Zrri) 3159 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3160 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3161 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3162 3163 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3164 (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3165 (Narrow.VT Narrow.RC:$src2), 3166 cond)))), 3167 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) 3168 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3170 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3171 (Frag.OperandTransform $cc)), Narrow.KRC)>; 3172 } 3173 3174 // Same as above, but for fp types which don't use PatFrags. 3175 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr, 3176 X86VectorVTInfo Narrow, 3177 X86VectorVTInfo Wide> { 3178 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), 3179 (Narrow.VT Narrow.RC:$src2), imm:$cc)), 3180 (COPY_TO_REGCLASS 3181 (!cast<Instruction>(InstStr##Zrri) 3182 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3183 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3184 imm:$cc), Narrow.KRC)>; 3185 3186 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3187 (OpNode (Narrow.VT Narrow.RC:$src1), 3188 (Narrow.VT Narrow.RC:$src2), imm:$cc))), 3189 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) 3190 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3193 imm:$cc), Narrow.KRC)>; 3194 } 3195 3196 let Predicates = [HasAVX512, NoVLX] in { 3197 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 3198 // increase the pattern complexity the way an immediate would. 3199 let AddedComplexity = 2 in { 3200 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>; 3201 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>; 3202 3203 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>; 3204 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>; 3205 3206 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>; 3207 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>; 3208 3209 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>; 3210 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>; 3211 } 3212 3213 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>; 3214 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>; 3215 3216 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>; 3217 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>; 3218 3219 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>; 3220 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>; 3221 3222 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>; 3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>; 3224 3225 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>; 3226 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>; 3227 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>; 3228 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>; 3229 } 3230 3231 let Predicates = [HasBWI, NoVLX] in { 3232 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 3233 // increase the pattern complexity the way an immediate would. 3234 let AddedComplexity = 2 in { 3235 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>; 3236 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>; 3237 3238 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>; 3239 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>; 3240 3241 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>; 3242 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>; 3243 3244 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>; 3245 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>; 3246 } 3247 3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>; 3249 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>; 3250 3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>; 3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>; 3253 3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>; 3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>; 3256 3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>; 3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>; 3259 } 3260 3261 // Mask setting all 0s or 1s 3262 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { 3263 let Predicates = [HasAVX512] in 3264 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3265 SchedRW = [WriteZero] in 3266 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3267 [(set KRC:$dst, (VT Val))]>; 3268 } 3269 3270 multiclass avx512_mask_setop_w<PatFrag Val> { 3271 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3272 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3273 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3274 } 3275 3276 defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3277 defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3278 3279 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3280 let Predicates = [HasAVX512] in { 3281 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3282 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3283 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3284 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3285 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3286 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3287 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3288 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3289 } 3290 3291 // Patterns for kmask insert_subvector/extract_subvector to/from index=0 3292 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3293 RegisterClass RC, ValueType VT> { 3294 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3295 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3296 3297 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3298 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3299 } 3300 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3301 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3302 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3303 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3304 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3305 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3306 3307 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3308 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3309 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3310 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3311 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3312 3313 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3314 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3315 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3316 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3317 3318 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3319 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3320 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3321 3322 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3323 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3324 3325 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3326 3327 //===----------------------------------------------------------------------===// 3328 // AVX-512 - Aligned and unaligned load and store 3329 // 3330 3331 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3332 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3333 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3334 bit NoRMPattern = 0, 3335 SDPatternOperator SelectOprr = vselect> { 3336 let hasSideEffects = 0 in { 3337 let isMoveReg = 1 in 3338 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3339 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3340 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3341 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3342 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3343 (ins _.KRCWM:$mask, _.RC:$src), 3344 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3345 "${dst} {${mask}} {z}, $src}"), 3346 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3347 (_.VT _.RC:$src), 3348 _.ImmAllZerosV)))], _.ExeDomain>, 3349 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3350 3351 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3352 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3353 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3354 !if(NoRMPattern, [], 3355 [(set _.RC:$dst, 3356 (_.VT (bitconvert (ld_frag addr:$src))))]), 3357 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3358 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3359 3360 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3361 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3362 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3363 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3364 "${dst} {${mask}}, $src1}"), 3365 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3366 (_.VT _.RC:$src1), 3367 (_.VT _.RC:$src0))))], _.ExeDomain>, 3368 EVEX, EVEX_K, Sched<[Sched.RR]>; 3369 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3370 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3371 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3372 "${dst} {${mask}}, $src1}"), 3373 [(set _.RC:$dst, (_.VT 3374 (vselect _.KRCWM:$mask, 3375 (_.VT (bitconvert (ld_frag addr:$src1))), 3376 (_.VT _.RC:$src0))))], _.ExeDomain>, 3377 EVEX, EVEX_K, Sched<[Sched.RM]>; 3378 } 3379 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3380 (ins _.KRCWM:$mask, _.MemOp:$src), 3381 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3382 "${dst} {${mask}} {z}, $src}", 3383 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, 3384 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))], 3385 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3386 } 3387 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3388 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3389 3390 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3391 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; 3392 3393 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3394 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, 3395 _.KRCWM:$mask, addr:$ptr)>; 3396 } 3397 3398 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3399 AVX512VLVectorVTInfo _, Predicate prd, 3400 X86SchedWriteMoveLSWidths Sched, 3401 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3402 let Predicates = [prd] in 3403 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3404 _.info512.AlignedLdFrag, masked_load_aligned512, 3405 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3406 3407 let Predicates = [prd, HasVLX] in { 3408 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3409 _.info256.AlignedLdFrag, masked_load_aligned256, 3410 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3411 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3412 _.info128.AlignedLdFrag, masked_load_aligned128, 3413 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3414 } 3415 } 3416 3417 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3418 AVX512VLVectorVTInfo _, Predicate prd, 3419 X86SchedWriteMoveLSWidths Sched, 3420 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3421 SDPatternOperator SelectOprr = vselect> { 3422 let Predicates = [prd] in 3423 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3424 masked_load_unaligned, Sched.ZMM, "", 3425 NoRMPattern, SelectOprr>, EVEX_V512; 3426 3427 let Predicates = [prd, HasVLX] in { 3428 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3429 masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y", 3430 NoRMPattern, SelectOprr>, EVEX_V256; 3431 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3432 masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd, 3433 NoRMPattern, SelectOprr>, EVEX_V128; 3434 } 3435 } 3436 3437 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3438 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3439 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3440 bit NoMRPattern = 0> { 3441 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3442 let isMoveReg = 1 in 3443 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3444 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3445 [], _.ExeDomain>, EVEX, 3446 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3447 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3448 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3449 (ins _.KRCWM:$mask, _.RC:$src), 3450 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3451 "${dst} {${mask}}, $src}", 3452 [], _.ExeDomain>, EVEX, EVEX_K, 3453 FoldGenData<BaseName#_.ZSuffix#rrk>, 3454 Sched<[Sched.RR]>; 3455 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3456 (ins _.KRCWM:$mask, _.RC:$src), 3457 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3458 "${dst} {${mask}} {z}, $src}", 3459 [], _.ExeDomain>, EVEX, EVEX_KZ, 3460 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3461 Sched<[Sched.RR]>; 3462 } 3463 3464 let hasSideEffects = 0, mayStore = 1 in 3465 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3466 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3467 !if(NoMRPattern, [], 3468 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3469 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3470 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3471 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3472 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3473 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3474 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3475 NotMemoryFoldable; 3476 3477 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), 3478 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3479 _.KRCWM:$mask, _.RC:$src)>; 3480 3481 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3482 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3483 _.RC:$dst, _.RC:$src), 0>; 3484 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3485 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3486 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3487 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3488 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3489 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3490 } 3491 3492 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3493 AVX512VLVectorVTInfo _, Predicate prd, 3494 X86SchedWriteMoveLSWidths Sched, 3495 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3496 let Predicates = [prd] in 3497 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3498 masked_store_unaligned, Sched.ZMM, "", 3499 NoMRPattern>, EVEX_V512; 3500 let Predicates = [prd, HasVLX] in { 3501 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3502 masked_store_unaligned, Sched.YMM, 3503 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3504 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3505 masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd, 3506 NoMRPattern>, EVEX_V128; 3507 } 3508 } 3509 3510 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3511 AVX512VLVectorVTInfo _, Predicate prd, 3512 X86SchedWriteMoveLSWidths Sched, 3513 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3514 let Predicates = [prd] in 3515 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3516 masked_store_aligned512, Sched.ZMM, "", 3517 NoMRPattern>, EVEX_V512; 3518 3519 let Predicates = [prd, HasVLX] in { 3520 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3521 masked_store_aligned256, Sched.YMM, 3522 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3523 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3524 masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd, 3525 NoMRPattern>, EVEX_V128; 3526 } 3527 } 3528 3529 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3530 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3531 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3532 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3533 PS, EVEX_CD8<32, CD8VF>; 3534 3535 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3536 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3537 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3538 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3539 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3540 3541 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3542 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3543 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3544 SchedWriteFMoveLS, "VMOVUPS">, 3545 PS, EVEX_CD8<32, CD8VF>; 3546 3547 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3548 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3549 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3550 SchedWriteFMoveLS, "VMOVUPD">, 3551 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3552 3553 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3554 HasAVX512, SchedWriteVecMoveLS, 3555 "VMOVDQA", 1>, 3556 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3557 HasAVX512, SchedWriteVecMoveLS, 3558 "VMOVDQA", 1>, 3559 PD, EVEX_CD8<32, CD8VF>; 3560 3561 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3562 HasAVX512, SchedWriteVecMoveLS, 3563 "VMOVDQA">, 3564 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3565 HasAVX512, SchedWriteVecMoveLS, 3566 "VMOVDQA">, 3567 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3568 3569 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3570 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3571 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3572 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3573 XD, EVEX_CD8<8, CD8VF>; 3574 3575 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3576 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3577 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3578 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3579 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3580 3581 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3582 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3583 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3584 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3585 XS, EVEX_CD8<32, CD8VF>; 3586 3587 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3588 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3589 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3590 SchedWriteVecMoveLS, "VMOVDQU">, 3591 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3592 3593 // Special instructions to help with spilling when we don't have VLX. We need 3594 // to load or store from a ZMM register instead. These are converted in 3595 // expandPostRAPseudos. 3596 let isReMaterializable = 1, canFoldAsLoad = 1, 3597 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3598 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3599 "", []>, Sched<[WriteFLoadX]>; 3600 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3601 "", []>, Sched<[WriteFLoadY]>; 3602 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3603 "", []>, Sched<[WriteFLoadX]>; 3604 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3605 "", []>, Sched<[WriteFLoadY]>; 3606 } 3607 3608 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3609 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3610 "", []>, Sched<[WriteFStoreX]>; 3611 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3612 "", []>, Sched<[WriteFStoreY]>; 3613 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3614 "", []>, Sched<[WriteFStoreX]>; 3615 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3616 "", []>, Sched<[WriteFStoreY]>; 3617 } 3618 3619 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), 3620 (v8i64 VR512:$src))), 3621 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3622 VK8), VR512:$src)>; 3623 3624 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3625 (v16i32 VR512:$src))), 3626 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3627 3628 // These patterns exist to prevent the above patterns from introducing a second 3629 // mask inversion when one already exists. 3630 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), 3631 (bc_v8i64 (v16i32 immAllZerosV)), 3632 (v8i64 VR512:$src))), 3633 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3634 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), 3635 (v16i32 immAllZerosV), 3636 (v16i32 VR512:$src))), 3637 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3638 3639 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3640 X86VectorVTInfo Wide> { 3641 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3642 Narrow.RC:$src1, Narrow.RC:$src0)), 3643 (EXTRACT_SUBREG 3644 (Wide.VT 3645 (!cast<Instruction>(InstrStr#"rrk") 3646 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3647 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3648 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3649 Narrow.SubRegIdx)>; 3650 3651 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3652 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3653 (EXTRACT_SUBREG 3654 (Wide.VT 3655 (!cast<Instruction>(InstrStr#"rrkz") 3656 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3657 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3658 Narrow.SubRegIdx)>; 3659 } 3660 3661 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3662 // available. Use a 512-bit operation and extract. 3663 let Predicates = [HasAVX512, NoVLX] in { 3664 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3665 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3666 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3667 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3668 3669 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3670 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3671 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3672 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3673 } 3674 3675 let Predicates = [HasBWI, NoVLX] in { 3676 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3677 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3678 3679 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3680 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3681 } 3682 3683 let Predicates = [HasAVX512] in { 3684 // 512-bit store. 3685 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3686 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3687 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3688 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3689 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3690 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3691 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3692 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3693 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3694 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3695 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3696 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3697 } 3698 3699 let Predicates = [HasVLX] in { 3700 // 128-bit store. 3701 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3702 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3703 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3704 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3705 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3706 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3707 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3708 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3709 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3710 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3711 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3712 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3713 3714 // 256-bit store. 3715 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3716 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3717 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3718 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3719 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3720 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3721 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3722 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3723 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3724 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3725 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3726 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3727 } 3728 3729 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From, 3730 X86VectorVTInfo To, X86VectorVTInfo Cast> { 3731 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 3732 (bitconvert 3733 (To.VT (extract_subvector 3734 (From.VT From.RC:$src), (iPTR 0)))), 3735 To.RC:$src0)), 3736 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 3737 Cast.RC:$src0, Cast.KRCWM:$mask, 3738 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>; 3739 3740 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, 3741 (bitconvert 3742 (To.VT (extract_subvector 3743 (From.VT From.RC:$src), (iPTR 0)))), 3744 Cast.ImmAllZerosV)), 3745 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 3746 Cast.KRCWM:$mask, 3747 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>; 3748 } 3749 3750 3751 let Predicates = [HasVLX] in { 3752 // A masked extract from the first 128-bits of a 256-bit vector can be 3753 // implemented with masked move. 3754 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>; 3755 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>; 3756 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>; 3757 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>; 3758 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>; 3759 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>; 3760 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>; 3761 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>; 3762 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>; 3763 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>; 3764 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>; 3765 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>; 3766 3767 // A masked extract from the first 128-bits of a 512-bit vector can be 3768 // implemented with masked move. 3769 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>; 3770 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>; 3771 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>; 3772 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>; 3773 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>; 3774 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>; 3775 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>; 3776 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>; 3777 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>; 3778 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>; 3779 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>; 3780 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>; 3781 3782 // A masked extract from the first 256-bits of a 512-bit vector can be 3783 // implemented with masked move. 3784 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>; 3785 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>; 3786 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>; 3787 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>; 3788 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>; 3789 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>; 3790 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>; 3791 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>; 3792 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>; 3793 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>; 3794 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>; 3795 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>; 3796 } 3797 3798 // Move Int Doubleword to Packed Double Int 3799 // 3800 let ExeDomain = SSEPackedInt in { 3801 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3802 "vmovd\t{$src, $dst|$dst, $src}", 3803 [(set VR128X:$dst, 3804 (v4i32 (scalar_to_vector GR32:$src)))]>, 3805 EVEX, Sched<[WriteVecMoveFromGpr]>; 3806 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3807 "vmovd\t{$src, $dst|$dst, $src}", 3808 [(set VR128X:$dst, 3809 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3810 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3811 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3812 "vmovq\t{$src, $dst|$dst, $src}", 3813 [(set VR128X:$dst, 3814 (v2i64 (scalar_to_vector GR64:$src)))]>, 3815 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3816 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3817 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3818 (ins i64mem:$src), 3819 "vmovq\t{$src, $dst|$dst, $src}", []>, 3820 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3821 let isCodeGenOnly = 1 in { 3822 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3823 "vmovq\t{$src, $dst|$dst, $src}", 3824 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3825 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3826 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), 3827 "vmovq\t{$src, $dst|$dst, $src}", 3828 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, 3829 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3830 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3831 "vmovq\t{$src, $dst|$dst, $src}", 3832 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3833 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3834 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), 3835 "vmovq\t{$src, $dst|$dst, $src}", 3836 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>, 3837 EVEX, VEX_W, Sched<[WriteVecStore]>, 3838 EVEX_CD8<64, CD8VT1>; 3839 } 3840 } // ExeDomain = SSEPackedInt 3841 3842 // Move Int Doubleword to Single Scalar 3843 // 3844 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3845 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3846 "vmovd\t{$src, $dst|$dst, $src}", 3847 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3848 EVEX, Sched<[WriteVecMoveFromGpr]>; 3849 3850 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), 3851 "vmovd\t{$src, $dst|$dst, $src}", 3852 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>, 3853 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3854 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3855 3856 // Move doubleword from xmm register to r/m32 3857 // 3858 let ExeDomain = SSEPackedInt in { 3859 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3860 "vmovd\t{$src, $dst|$dst, $src}", 3861 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3862 (iPTR 0)))]>, 3863 EVEX, Sched<[WriteVecMoveToGpr]>; 3864 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3865 (ins i32mem:$dst, VR128X:$src), 3866 "vmovd\t{$src, $dst|$dst, $src}", 3867 [(store (i32 (extractelt (v4i32 VR128X:$src), 3868 (iPTR 0))), addr:$dst)]>, 3869 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3870 } // ExeDomain = SSEPackedInt 3871 3872 // Move quadword from xmm1 register to r/m64 3873 // 3874 let ExeDomain = SSEPackedInt in { 3875 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3876 "vmovq\t{$src, $dst|$dst, $src}", 3877 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3878 (iPTR 0)))]>, 3879 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 3880 Requires<[HasAVX512]>; 3881 3882 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3883 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3884 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 3885 EVEX, VEX_W, Sched<[WriteVecStore]>, 3886 Requires<[HasAVX512, In64BitMode]>; 3887 3888 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3889 (ins i64mem:$dst, VR128X:$src), 3890 "vmovq\t{$src, $dst|$dst, $src}", 3891 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3892 addr:$dst)]>, 3893 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 3894 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3895 3896 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3897 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3898 (ins VR128X:$src), 3899 "vmovq\t{$src, $dst|$dst, $src}", []>, 3900 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 3901 } // ExeDomain = SSEPackedInt 3902 3903 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3904 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3905 3906 // Move Scalar Single to Double Int 3907 // 3908 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3909 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3910 (ins FR32X:$src), 3911 "vmovd\t{$src, $dst|$dst, $src}", 3912 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3913 EVEX, Sched<[WriteVecMoveToGpr]>; 3914 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3915 (ins i32mem:$dst, FR32X:$src), 3916 "vmovd\t{$src, $dst|$dst, $src}", 3917 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>, 3918 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3919 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3920 3921 // Move Quadword Int to Packed Quadword Int 3922 // 3923 let ExeDomain = SSEPackedInt in { 3924 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3925 (ins i64mem:$src), 3926 "vmovq\t{$src, $dst|$dst, $src}", 3927 [(set VR128X:$dst, 3928 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3929 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3930 } // ExeDomain = SSEPackedInt 3931 3932 // Allow "vmovd" but print "vmovq". 3933 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3934 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3935 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3936 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3937 3938 //===----------------------------------------------------------------------===// 3939 // AVX-512 MOVSS, MOVSD 3940 //===----------------------------------------------------------------------===// 3941 3942 multiclass avx512_move_scalar<string asm, SDNode OpNode, 3943 X86VectorVTInfo _> { 3944 let Predicates = [HasAVX512, OptForSize] in 3945 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3946 (ins _.RC:$src1, _.RC:$src2), 3947 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3948 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3949 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 3950 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3951 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3952 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3953 "$dst {${mask}} {z}, $src1, $src2}"), 3954 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3955 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3956 _.ImmAllZerosV)))], 3957 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3958 let Constraints = "$src0 = $dst" in 3959 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3960 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3961 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3962 "$dst {${mask}}, $src1, $src2}"), 3963 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3964 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3965 (_.VT _.RC:$src0))))], 3966 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3967 let canFoldAsLoad = 1, isReMaterializable = 1 in 3968 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3969 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3970 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3971 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3972 let mayLoad = 1, hasSideEffects = 0 in { 3973 let Constraints = "$src0 = $dst" in 3974 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3975 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3976 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3977 "$dst {${mask}}, $src}"), 3978 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3979 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3980 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3981 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3982 "$dst {${mask}} {z}, $src}"), 3983 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3984 } 3985 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3986 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3987 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3988 EVEX, Sched<[WriteFStore]>; 3989 let mayStore = 1, hasSideEffects = 0 in 3990 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3991 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), 3992 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 3994 NotMemoryFoldable; 3995 } 3996 3997 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, 3998 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 3999 4000 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, 4001 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4002 4003 4004 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4005 PatLeaf ZeroFP, X86VectorVTInfo _> { 4006 4007 def : Pat<(_.VT (OpNode _.RC:$src0, 4008 (_.VT (scalar_to_vector 4009 (_.EltVT (X86selects VK1WM:$mask, 4010 (_.EltVT _.FRC:$src1), 4011 (_.EltVT _.FRC:$src2))))))), 4012 (!cast<Instruction>(InstrStr#rrk) 4013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4014 VK1WM:$mask, 4015 (_.VT _.RC:$src0), 4016 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4017 4018 def : Pat<(_.VT (OpNode _.RC:$src0, 4019 (_.VT (scalar_to_vector 4020 (_.EltVT (X86selects VK1WM:$mask, 4021 (_.EltVT _.FRC:$src1), 4022 (_.EltVT ZeroFP))))))), 4023 (!cast<Instruction>(InstrStr#rrkz) 4024 VK1WM:$mask, 4025 (_.VT _.RC:$src0), 4026 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4027 } 4028 4029 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4030 dag Mask, RegisterClass MaskRC> { 4031 4032 def : Pat<(masked_store addr:$dst, Mask, 4033 (_.info512.VT (insert_subvector undef, 4034 (_.info128.VT _.info128.RC:$src), 4035 (iPTR 0)))), 4036 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4037 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4038 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4039 4040 } 4041 4042 multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4043 AVX512VLVectorVTInfo _, 4044 dag Mask, RegisterClass MaskRC, 4045 SubRegIndex subreg> { 4046 4047 def : Pat<(masked_store addr:$dst, Mask, 4048 (_.info512.VT (insert_subvector undef, 4049 (_.info128.VT _.info128.RC:$src), 4050 (iPTR 0)))), 4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4052 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4053 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4054 4055 } 4056 4057 // This matches the more recent codegen from clang that avoids emitting a 512 4058 // bit masked store directly. Codegen will widen 128-bit masked store to 512 4059 // bits on AVX512F only targets. 4060 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4061 AVX512VLVectorVTInfo _, 4062 dag Mask512, dag Mask128, 4063 RegisterClass MaskRC, 4064 SubRegIndex subreg> { 4065 4066 // AVX512F pattern. 4067 def : Pat<(masked_store addr:$dst, Mask512, 4068 (_.info512.VT (insert_subvector undef, 4069 (_.info128.VT _.info128.RC:$src), 4070 (iPTR 0)))), 4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4072 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4073 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4074 4075 // AVX512VL pattern. 4076 def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)), 4077 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4078 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4079 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; 4080 } 4081 4082 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4083 dag Mask, RegisterClass MaskRC> { 4084 4085 def : Pat<(_.info128.VT (extract_subvector 4086 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4087 (_.info512.VT (bitconvert 4088 (v16i32 immAllZerosV))))), 4089 (iPTR 0))), 4090 (!cast<Instruction>(InstrStr#rmkz) 4091 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4092 addr:$srcAddr)>; 4093 4094 def : Pat<(_.info128.VT (extract_subvector 4095 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4096 (_.info512.VT (insert_subvector undef, 4097 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4098 (iPTR 0))))), 4099 (iPTR 0))), 4100 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4101 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4102 addr:$srcAddr)>; 4103 4104 } 4105 4106 multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4107 AVX512VLVectorVTInfo _, 4108 dag Mask, RegisterClass MaskRC, 4109 SubRegIndex subreg> { 4110 4111 def : Pat<(_.info128.VT (extract_subvector 4112 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4113 (_.info512.VT (bitconvert 4114 (v16i32 immAllZerosV))))), 4115 (iPTR 0))), 4116 (!cast<Instruction>(InstrStr#rmkz) 4117 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4118 addr:$srcAddr)>; 4119 4120 def : Pat<(_.info128.VT (extract_subvector 4121 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4122 (_.info512.VT (insert_subvector undef, 4123 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4124 (iPTR 0))))), 4125 (iPTR 0))), 4126 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4127 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4128 addr:$srcAddr)>; 4129 4130 } 4131 4132 // This matches the more recent codegen from clang that avoids emitting a 512 4133 // bit masked load directly. Codegen will widen 128-bit masked load to 512 4134 // bits on AVX512F only targets. 4135 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4136 AVX512VLVectorVTInfo _, 4137 dag Mask512, dag Mask128, 4138 RegisterClass MaskRC, 4139 SubRegIndex subreg> { 4140 // AVX512F patterns. 4141 def : Pat<(_.info128.VT (extract_subvector 4142 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4143 (_.info512.VT (bitconvert 4144 (v16i32 immAllZerosV))))), 4145 (iPTR 0))), 4146 (!cast<Instruction>(InstrStr#rmkz) 4147 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4148 addr:$srcAddr)>; 4149 4150 def : Pat<(_.info128.VT (extract_subvector 4151 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4152 (_.info512.VT (insert_subvector undef, 4153 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4154 (iPTR 0))))), 4155 (iPTR 0))), 4156 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4157 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4158 addr:$srcAddr)>; 4159 4160 // AVX512Vl patterns. 4161 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4162 (_.info128.VT (bitconvert (v4i32 immAllZerosV))))), 4163 (!cast<Instruction>(InstrStr#rmkz) 4164 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4165 addr:$srcAddr)>; 4166 4167 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4168 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4169 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4170 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4171 addr:$srcAddr)>; 4172 } 4173 4174 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4175 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4176 4177 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4178 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4179 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4180 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4181 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4182 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4183 4184 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4185 (v16i1 (insert_subvector 4186 (v16i1 immAllZerosV), 4187 (v4i1 (extract_subvector 4188 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4189 (iPTR 0))), 4190 (iPTR 0))), 4191 (v4i1 (extract_subvector 4192 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4193 (iPTR 0))), GR8, sub_8bit>; 4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4195 (v8i1 4196 (extract_subvector 4197 (v16i1 4198 (insert_subvector 4199 (v16i1 immAllZerosV), 4200 (v2i1 (extract_subvector 4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4202 (iPTR 0))), 4203 (iPTR 0))), 4204 (iPTR 0))), 4205 (v2i1 (extract_subvector 4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4207 (iPTR 0))), GR8, sub_8bit>; 4208 4209 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4210 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4211 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4212 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4213 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4215 4216 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4217 (v16i1 (insert_subvector 4218 (v16i1 immAllZerosV), 4219 (v4i1 (extract_subvector 4220 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4221 (iPTR 0))), 4222 (iPTR 0))), 4223 (v4i1 (extract_subvector 4224 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4225 (iPTR 0))), GR8, sub_8bit>; 4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4227 (v8i1 4228 (extract_subvector 4229 (v16i1 4230 (insert_subvector 4231 (v16i1 immAllZerosV), 4232 (v2i1 (extract_subvector 4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4234 (iPTR 0))), 4235 (iPTR 0))), 4236 (iPTR 0))), 4237 (v2i1 (extract_subvector 4238 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4239 (iPTR 0))), GR8, sub_8bit>; 4240 4241 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4242 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4243 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4244 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4246 4247 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4248 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4249 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4250 4251 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4252 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4253 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4254 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4255 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4256 4257 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), 4258 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4259 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4260 4261 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4262 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4263 (ins VR128X:$src1, VR128X:$src2), 4264 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4265 []>, XS, EVEX_4V, VEX_LIG, 4266 FoldGenData<"VMOVSSZrr">, 4267 Sched<[SchedWriteFShuffle.XMM]>; 4268 4269 let Constraints = "$src0 = $dst" in 4270 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4271 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4272 VR128X:$src1, VR128X:$src2), 4273 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4274 "$dst {${mask}}, $src1, $src2}", 4275 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4276 FoldGenData<"VMOVSSZrrk">, 4277 Sched<[SchedWriteFShuffle.XMM]>; 4278 4279 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4280 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4281 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4282 "$dst {${mask}} {z}, $src1, $src2}", 4283 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4284 FoldGenData<"VMOVSSZrrkz">, 4285 Sched<[SchedWriteFShuffle.XMM]>; 4286 4287 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4288 (ins VR128X:$src1, VR128X:$src2), 4289 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4290 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4291 FoldGenData<"VMOVSDZrr">, 4292 Sched<[SchedWriteFShuffle.XMM]>; 4293 4294 let Constraints = "$src0 = $dst" in 4295 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4296 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4297 VR128X:$src1, VR128X:$src2), 4298 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4299 "$dst {${mask}}, $src1, $src2}", 4300 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4301 VEX_W, FoldGenData<"VMOVSDZrrk">, 4302 Sched<[SchedWriteFShuffle.XMM]>; 4303 4304 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4305 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4306 VR128X:$src2), 4307 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4308 "$dst {${mask}} {z}, $src1, $src2}", 4309 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4310 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4311 Sched<[SchedWriteFShuffle.XMM]>; 4312 } 4313 4314 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4315 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4316 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4317 "$dst {${mask}}, $src1, $src2}", 4318 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4319 VR128X:$src1, VR128X:$src2), 0>; 4320 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4321 "$dst {${mask}} {z}, $src1, $src2}", 4322 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4323 VR128X:$src1, VR128X:$src2), 0>; 4324 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4325 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4326 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4327 "$dst {${mask}}, $src1, $src2}", 4328 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4329 VR128X:$src1, VR128X:$src2), 0>; 4330 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4331 "$dst {${mask}} {z}, $src1, $src2}", 4332 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4333 VR128X:$src1, VR128X:$src2), 0>; 4334 4335 let Predicates = [HasAVX512, OptForSize] in { 4336 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4337 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4338 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4339 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4340 4341 // Move low f32 and clear high bits. 4342 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4343 (SUBREG_TO_REG (i32 0), 4344 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4345 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4346 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4347 (SUBREG_TO_REG (i32 0), 4348 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4349 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4350 4351 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4352 (SUBREG_TO_REG (i32 0), 4353 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)), 4354 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>; 4355 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4356 (SUBREG_TO_REG (i32 0), 4357 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)), 4358 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>; 4359 4360 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4361 (SUBREG_TO_REG (i32 0), 4362 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4363 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4364 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4365 (SUBREG_TO_REG (i32 0), 4366 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4367 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4368 4369 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4370 (SUBREG_TO_REG (i32 0), 4371 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)), 4372 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>; 4373 4374 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4375 (SUBREG_TO_REG (i32 0), 4376 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)), 4377 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>; 4378 4379 } 4380 4381 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4382 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4383 let Predicates = [HasAVX512, OptForSpeed] in { 4384 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4385 (SUBREG_TO_REG (i32 0), 4386 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4387 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4388 (i8 1))), sub_xmm)>; 4389 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4390 (SUBREG_TO_REG (i32 0), 4391 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4392 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4393 (i8 3))), sub_xmm)>; 4394 4395 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4396 (SUBREG_TO_REG (i32 0), 4397 (v2f64 (VBLENDPDrri (v2f64 (V_SET0)), 4398 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), 4399 (i8 1))), sub_xmm)>; 4400 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4401 (SUBREG_TO_REG (i32 0), 4402 (v2i64 (VPBLENDWrri (v2i64 (V_SET0)), 4403 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), 4404 (i8 0xf))), sub_xmm)>; 4405 } 4406 4407 let Predicates = [HasAVX512] in { 4408 4409 // MOVSSrm zeros the high parts of the register; represent this 4410 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 4411 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 4412 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; 4413 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 4414 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; 4415 def : Pat<(v4f32 (X86vzload addr:$src)), 4416 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; 4417 4418 // MOVSDrm zeros the high parts of the register; represent this 4419 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 4420 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 4421 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4422 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 4423 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4424 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 4425 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4426 def : Pat<(v2f64 (X86vzload addr:$src)), 4427 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; 4428 4429 // Represent the same patterns above but in the form they appear for 4430 // 256-bit types 4431 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4432 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 4433 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4434 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 4435 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 4436 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4437 def : Pat<(v8f32 (X86vzload addr:$src)), 4438 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4439 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 4440 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 4441 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4442 def : Pat<(v4f64 (X86vzload addr:$src)), 4443 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4444 4445 // Represent the same patterns above but in the form they appear for 4446 // 512-bit types 4447 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, 4448 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 4449 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4450 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef, 4451 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 4452 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4453 def : Pat<(v16f32 (X86vzload addr:$src)), 4454 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4455 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef, 4456 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 4457 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4458 def : Pat<(v8f64 (X86vzload addr:$src)), 4459 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4460 4461 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4462 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 4463 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4464 4465 // Extract and store. 4466 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))), 4467 addr:$dst), 4468 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; 4469 } 4470 4471 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4472 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4473 (ins VR128X:$src), 4474 "vmovq\t{$src, $dst|$dst, $src}", 4475 [(set VR128X:$dst, (v2i64 (X86vzmovl 4476 (v2i64 VR128X:$src))))]>, 4477 EVEX, VEX_W; 4478 } 4479 4480 let Predicates = [HasAVX512] in { 4481 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4482 (VMOVDI2PDIZrr GR32:$src)>; 4483 4484 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4485 (VMOV64toPQIZrr GR64:$src)>; 4486 4487 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4488 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4489 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>; 4490 4491 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef, 4492 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4493 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>; 4494 4495 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4496 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4497 (VMOVDI2PDIZrm addr:$src)>; 4498 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4499 (VMOVDI2PDIZrm addr:$src)>; 4500 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4501 (VMOVDI2PDIZrm addr:$src)>; 4502 def : Pat<(v4i32 (X86vzload addr:$src)), 4503 (VMOVDI2PDIZrm addr:$src)>; 4504 def : Pat<(v8i32 (X86vzload addr:$src)), 4505 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4506 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4507 (VMOVQI2PQIZrm addr:$src)>; 4508 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4509 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4510 def : Pat<(v2i64 (X86vzload addr:$src)), 4511 (VMOVQI2PQIZrm addr:$src)>; 4512 def : Pat<(v4i64 (X86vzload addr:$src)), 4513 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4514 4515 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4516 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4517 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4518 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>; 4519 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, 4520 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4521 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>; 4522 4523 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4524 def : Pat<(v16i32 (X86vzload addr:$src)), 4525 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4526 def : Pat<(v8i64 (X86vzload addr:$src)), 4527 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4528 } 4529 4530 //===----------------------------------------------------------------------===// 4531 // AVX-512 - Non-temporals 4532 //===----------------------------------------------------------------------===// 4533 4534 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4535 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4536 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4537 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4538 4539 let Predicates = [HasVLX] in { 4540 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4541 (ins i256mem:$src), 4542 "vmovntdqa\t{$src, $dst|$dst, $src}", 4543 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4544 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4545 4546 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4547 (ins i128mem:$src), 4548 "vmovntdqa\t{$src, $dst|$dst, $src}", 4549 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4550 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4551 } 4552 4553 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4554 X86SchedWriteMoveLS Sched, 4555 PatFrag st_frag = alignednontemporalstore> { 4556 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4557 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4558 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4559 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4560 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4561 } 4562 4563 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4564 AVX512VLVectorVTInfo VTInfo, 4565 X86SchedWriteMoveLSWidths Sched> { 4566 let Predicates = [HasAVX512] in 4567 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4568 4569 let Predicates = [HasAVX512, HasVLX] in { 4570 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4571 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4572 } 4573 } 4574 4575 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4576 SchedWriteVecMoveLSNT>, PD; 4577 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4578 SchedWriteFMoveLSNT>, PD, VEX_W; 4579 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4580 SchedWriteFMoveLSNT>, PS; 4581 4582 let Predicates = [HasAVX512], AddedComplexity = 400 in { 4583 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4584 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4585 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4586 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4587 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4588 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4589 4590 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4591 (VMOVNTDQAZrm addr:$src)>; 4592 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4593 (VMOVNTDQAZrm addr:$src)>; 4594 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4595 (VMOVNTDQAZrm addr:$src)>; 4596 } 4597 4598 let Predicates = [HasVLX], AddedComplexity = 400 in { 4599 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4600 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4601 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4602 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4603 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4604 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4605 4606 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4607 (VMOVNTDQAZ256rm addr:$src)>; 4608 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4609 (VMOVNTDQAZ256rm addr:$src)>; 4610 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4611 (VMOVNTDQAZ256rm addr:$src)>; 4612 4613 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4614 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4615 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4616 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4617 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4618 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4619 4620 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4621 (VMOVNTDQAZ128rm addr:$src)>; 4622 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4623 (VMOVNTDQAZ128rm addr:$src)>; 4624 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4625 (VMOVNTDQAZ128rm addr:$src)>; 4626 } 4627 4628 //===----------------------------------------------------------------------===// 4629 // AVX-512 - Integer arithmetic 4630 // 4631 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4632 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4633 bit IsCommutable = 0> { 4634 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4635 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4636 "$src2, $src1", "$src1, $src2", 4637 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4638 IsCommutable>, AVX512BIBase, EVEX_4V, 4639 Sched<[sched]>; 4640 4641 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4642 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4643 "$src2, $src1", "$src1, $src2", 4644 (_.VT (OpNode _.RC:$src1, 4645 (bitconvert (_.LdFrag addr:$src2))))>, 4646 AVX512BIBase, EVEX_4V, 4647 Sched<[sched.Folded, ReadAfterLd]>; 4648 } 4649 4650 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4651 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4652 bit IsCommutable = 0> : 4653 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4654 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4655 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4656 "${src2}"##_.BroadcastStr##", $src1", 4657 "$src1, ${src2}"##_.BroadcastStr, 4658 (_.VT (OpNode _.RC:$src1, 4659 (X86VBroadcast 4660 (_.ScalarLdFrag addr:$src2))))>, 4661 AVX512BIBase, EVEX_4V, EVEX_B, 4662 Sched<[sched.Folded, ReadAfterLd]>; 4663 } 4664 4665 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4666 AVX512VLVectorVTInfo VTInfo, 4667 X86SchedWriteWidths sched, Predicate prd, 4668 bit IsCommutable = 0> { 4669 let Predicates = [prd] in 4670 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4671 IsCommutable>, EVEX_V512; 4672 4673 let Predicates = [prd, HasVLX] in { 4674 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4675 sched.YMM, IsCommutable>, EVEX_V256; 4676 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4677 sched.XMM, IsCommutable>, EVEX_V128; 4678 } 4679 } 4680 4681 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4682 AVX512VLVectorVTInfo VTInfo, 4683 X86SchedWriteWidths sched, Predicate prd, 4684 bit IsCommutable = 0> { 4685 let Predicates = [prd] in 4686 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4687 IsCommutable>, EVEX_V512; 4688 4689 let Predicates = [prd, HasVLX] in { 4690 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4691 sched.YMM, IsCommutable>, EVEX_V256; 4692 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4693 sched.XMM, IsCommutable>, EVEX_V128; 4694 } 4695 } 4696 4697 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4698 X86SchedWriteWidths sched, Predicate prd, 4699 bit IsCommutable = 0> { 4700 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4701 sched, prd, IsCommutable>, 4702 VEX_W, EVEX_CD8<64, CD8VF>; 4703 } 4704 4705 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4706 X86SchedWriteWidths sched, Predicate prd, 4707 bit IsCommutable = 0> { 4708 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4709 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4710 } 4711 4712 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4713 X86SchedWriteWidths sched, Predicate prd, 4714 bit IsCommutable = 0> { 4715 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4716 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4717 VEX_WIG; 4718 } 4719 4720 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4721 X86SchedWriteWidths sched, Predicate prd, 4722 bit IsCommutable = 0> { 4723 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4724 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4725 VEX_WIG; 4726 } 4727 4728 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4729 SDNode OpNode, X86SchedWriteWidths sched, 4730 Predicate prd, bit IsCommutable = 0> { 4731 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4732 IsCommutable>; 4733 4734 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4735 IsCommutable>; 4736 } 4737 4738 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4739 SDNode OpNode, X86SchedWriteWidths sched, 4740 Predicate prd, bit IsCommutable = 0> { 4741 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4742 IsCommutable>; 4743 4744 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4745 IsCommutable>; 4746 } 4747 4748 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4749 bits<8> opc_d, bits<8> opc_q, 4750 string OpcodeStr, SDNode OpNode, 4751 X86SchedWriteWidths sched, 4752 bit IsCommutable = 0> { 4753 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4754 sched, HasAVX512, IsCommutable>, 4755 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4756 sched, HasBWI, IsCommutable>; 4757 } 4758 4759 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4760 X86FoldableSchedWrite sched, 4761 SDNode OpNode,X86VectorVTInfo _Src, 4762 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4763 bit IsCommutable = 0> { 4764 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4765 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4766 "$src2, $src1","$src1, $src2", 4767 (_Dst.VT (OpNode 4768 (_Src.VT _Src.RC:$src1), 4769 (_Src.VT _Src.RC:$src2))), 4770 IsCommutable>, 4771 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4772 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4773 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4774 "$src2, $src1", "$src1, $src2", 4775 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4776 (bitconvert (_Src.LdFrag addr:$src2))))>, 4777 AVX512BIBase, EVEX_4V, 4778 Sched<[sched.Folded, ReadAfterLd]>; 4779 4780 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4781 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4782 OpcodeStr, 4783 "${src2}"##_Brdct.BroadcastStr##", $src1", 4784 "$src1, ${src2}"##_Brdct.BroadcastStr, 4785 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4786 (_Brdct.VT (X86VBroadcast 4787 (_Brdct.ScalarLdFrag addr:$src2))))))>, 4788 AVX512BIBase, EVEX_4V, EVEX_B, 4789 Sched<[sched.Folded, ReadAfterLd]>; 4790 } 4791 4792 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4793 SchedWriteVecALU, 1>; 4794 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4795 SchedWriteVecALU, 0>; 4796 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, 4797 SchedWriteVecALU, HasBWI, 1>; 4798 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, 4799 SchedWriteVecALU, HasBWI, 0>; 4800 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, 4801 SchedWriteVecALU, HasBWI, 1>; 4802 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, 4803 SchedWriteVecALU, HasBWI, 0>; 4804 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4805 SchedWritePMULLD, HasAVX512, 1>, T8PD; 4806 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4807 SchedWriteVecIMul, HasBWI, 1>; 4808 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4809 SchedWriteVecIMul, HasDQI, 1>, T8PD, 4810 NotEVEX2VEXConvertible; 4811 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4812 HasBWI, 1>; 4813 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4814 HasBWI, 1>; 4815 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4816 SchedWriteVecIMul, HasBWI, 1>, T8PD; 4817 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 4818 SchedWriteVecALU, HasBWI, 1>; 4819 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4820 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 4821 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4822 SchedWriteVecIMul, HasAVX512, 1>; 4823 4824 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4825 X86SchedWriteWidths sched, 4826 AVX512VLVectorVTInfo _SrcVTInfo, 4827 AVX512VLVectorVTInfo _DstVTInfo, 4828 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4829 let Predicates = [prd] in 4830 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4831 _SrcVTInfo.info512, _DstVTInfo.info512, 4832 v8i64_info, IsCommutable>, 4833 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 4834 let Predicates = [HasVLX, prd] in { 4835 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4836 _SrcVTInfo.info256, _DstVTInfo.info256, 4837 v4i64x_info, IsCommutable>, 4838 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 4839 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4840 _SrcVTInfo.info128, _DstVTInfo.info128, 4841 v2i64x_info, IsCommutable>, 4842 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 4843 } 4844 } 4845 4846 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4847 avx512vl_i8_info, avx512vl_i8_info, 4848 X86multishift, HasVBMI, 0>, T8PD; 4849 4850 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4851 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4852 X86FoldableSchedWrite sched> { 4853 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4854 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4855 OpcodeStr, 4856 "${src2}"##_Src.BroadcastStr##", $src1", 4857 "$src1, ${src2}"##_Src.BroadcastStr, 4858 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4859 (_Src.VT (X86VBroadcast 4860 (_Src.ScalarLdFrag addr:$src2))))))>, 4861 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4862 Sched<[sched.Folded, ReadAfterLd]>; 4863 } 4864 4865 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4866 SDNode OpNode,X86VectorVTInfo _Src, 4867 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4868 bit IsCommutable = 0> { 4869 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4870 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4871 "$src2, $src1","$src1, $src2", 4872 (_Dst.VT (OpNode 4873 (_Src.VT _Src.RC:$src1), 4874 (_Src.VT _Src.RC:$src2))), 4875 IsCommutable>, 4876 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 4877 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4878 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4879 "$src2, $src1", "$src1, $src2", 4880 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4881 (bitconvert (_Src.LdFrag addr:$src2))))>, 4882 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 4883 Sched<[sched.Folded, ReadAfterLd]>; 4884 } 4885 4886 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4887 SDNode OpNode> { 4888 let Predicates = [HasBWI] in 4889 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4890 v32i16_info, SchedWriteShuffle.ZMM>, 4891 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4892 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4893 let Predicates = [HasBWI, HasVLX] in { 4894 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4895 v16i16x_info, SchedWriteShuffle.YMM>, 4896 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4897 v16i16x_info, SchedWriteShuffle.YMM>, 4898 EVEX_V256; 4899 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4900 v8i16x_info, SchedWriteShuffle.XMM>, 4901 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4902 v8i16x_info, SchedWriteShuffle.XMM>, 4903 EVEX_V128; 4904 } 4905 } 4906 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4907 SDNode OpNode> { 4908 let Predicates = [HasBWI] in 4909 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4910 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 4911 let Predicates = [HasBWI, HasVLX] in { 4912 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4913 v32i8x_info, SchedWriteShuffle.YMM>, 4914 EVEX_V256, VEX_WIG; 4915 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4916 v16i8x_info, SchedWriteShuffle.XMM>, 4917 EVEX_V128, VEX_WIG; 4918 } 4919 } 4920 4921 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4922 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4923 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4924 let Predicates = [HasBWI] in 4925 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4926 _Dst.info512, SchedWriteVecIMul.ZMM, 4927 IsCommutable>, EVEX_V512; 4928 let Predicates = [HasBWI, HasVLX] in { 4929 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4930 _Dst.info256, SchedWriteVecIMul.YMM, 4931 IsCommutable>, EVEX_V256; 4932 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4933 _Dst.info128, SchedWriteVecIMul.XMM, 4934 IsCommutable>, EVEX_V128; 4935 } 4936 } 4937 4938 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4939 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4940 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4941 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4942 4943 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4944 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 4945 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4946 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 4947 4948 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4949 SchedWriteVecALU, HasBWI, 1>, T8PD; 4950 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4951 SchedWriteVecALU, HasBWI, 1>; 4952 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4953 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4954 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4955 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4956 NotEVEX2VEXConvertible; 4957 4958 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4959 SchedWriteVecALU, HasBWI, 1>; 4960 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4961 SchedWriteVecALU, HasBWI, 1>, T8PD; 4962 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4963 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4964 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4965 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4966 NotEVEX2VEXConvertible; 4967 4968 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4969 SchedWriteVecALU, HasBWI, 1>, T8PD; 4970 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4971 SchedWriteVecALU, HasBWI, 1>; 4972 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4973 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4974 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4975 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4976 NotEVEX2VEXConvertible; 4977 4978 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4979 SchedWriteVecALU, HasBWI, 1>; 4980 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4981 SchedWriteVecALU, HasBWI, 1>, T8PD; 4982 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4983 SchedWriteVecALU, HasAVX512, 1>, T8PD; 4984 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4985 SchedWriteVecALU, HasAVX512, 1>, T8PD, 4986 NotEVEX2VEXConvertible; 4987 4988 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 4989 let Predicates = [HasDQI, NoVLX] in { 4990 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 4991 (EXTRACT_SUBREG 4992 (VPMULLQZrr 4993 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 4994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 4995 sub_ymm)>; 4996 4997 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 4998 (EXTRACT_SUBREG 4999 (VPMULLQZrr 5000 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5001 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5002 sub_xmm)>; 5003 } 5004 5005 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5006 let Predicates = [HasDQI, NoVLX] in { 5007 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5008 (EXTRACT_SUBREG 5009 (VPMULLQZrr 5010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5012 sub_ymm)>; 5013 5014 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5015 (EXTRACT_SUBREG 5016 (VPMULLQZrr 5017 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5019 sub_xmm)>; 5020 } 5021 5022 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> { 5023 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5024 (EXTRACT_SUBREG 5025 (Instr 5026 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5027 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5028 sub_ymm)>; 5029 5030 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5031 (EXTRACT_SUBREG 5032 (Instr 5033 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5035 sub_xmm)>; 5036 } 5037 5038 let Predicates = [HasAVX512, NoVLX] in { 5039 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>; 5040 defm : avx512_min_max_lowering<VPMINUQZrr, umin>; 5041 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>; 5042 defm : avx512_min_max_lowering<VPMINSQZrr, smin>; 5043 } 5044 5045 //===----------------------------------------------------------------------===// 5046 // AVX-512 Logical Instructions 5047 //===----------------------------------------------------------------------===// 5048 5049 // OpNodeMsk is the OpNode to use when element size is important. OpNode will 5050 // be set to null_frag for 32-bit elements. 5051 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, 5052 SDPatternOperator OpNode, 5053 SDNode OpNodeMsk, X86FoldableSchedWrite sched, 5054 X86VectorVTInfo _, bit IsCommutable = 0> { 5055 let hasSideEffects = 0 in 5056 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst), 5057 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5058 "$src2, $src1", "$src1, $src2", 5059 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)), 5060 (bitconvert (_.VT _.RC:$src2)))), 5061 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, 5062 _.RC:$src2)))), 5063 IsCommutable>, AVX512BIBase, EVEX_4V, 5064 Sched<[sched]>; 5065 5066 let hasSideEffects = 0, mayLoad = 1 in 5067 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst), 5068 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5069 "$src2, $src1", "$src1, $src2", 5070 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)), 5071 (bitconvert (_.LdFrag addr:$src2)))), 5072 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, 5073 (bitconvert (_.LdFrag addr:$src2))))))>, 5074 AVX512BIBase, EVEX_4V, 5075 Sched<[sched.Folded, ReadAfterLd]>; 5076 } 5077 5078 // OpNodeMsk is the OpNode to use where element size is important. So use 5079 // for all of the broadcast patterns. 5080 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, 5081 SDPatternOperator OpNode, 5082 SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _, 5083 bit IsCommutable = 0> : 5084 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _, 5085 IsCommutable> { 5086 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst), 5087 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5088 "${src2}"##_.BroadcastStr##", $src1", 5089 "$src1, ${src2}"##_.BroadcastStr, 5090 (_.i64VT (OpNodeMsk _.RC:$src1, 5091 (bitconvert 5092 (_.VT (X86VBroadcast 5093 (_.ScalarLdFrag addr:$src2)))))), 5094 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, 5095 (bitconvert 5096 (_.VT (X86VBroadcast 5097 (_.ScalarLdFrag addr:$src2))))))))>, 5098 AVX512BIBase, EVEX_4V, EVEX_B, 5099 Sched<[sched.Folded, ReadAfterLd]>; 5100 } 5101 5102 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, 5103 SDPatternOperator OpNode, 5104 SDNode OpNodeMsk, X86SchedWriteWidths sched, 5105 AVX512VLVectorVTInfo VTInfo, 5106 bit IsCommutable = 0> { 5107 let Predicates = [HasAVX512] in 5108 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM, 5109 VTInfo.info512, IsCommutable>, EVEX_V512; 5110 5111 let Predicates = [HasAVX512, HasVLX] in { 5112 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM, 5113 VTInfo.info256, IsCommutable>, EVEX_V256; 5114 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM, 5115 VTInfo.info128, IsCommutable>, EVEX_V128; 5116 } 5117 } 5118 5119 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 5120 SDNode OpNode, X86SchedWriteWidths sched, 5121 bit IsCommutable = 0> { 5122 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched, 5123 avx512vl_i64_info, IsCommutable>, 5124 VEX_W, EVEX_CD8<64, CD8VF>; 5125 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched, 5126 avx512vl_i32_info, IsCommutable>, 5127 EVEX_CD8<32, CD8VF>; 5128 } 5129 5130 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5131 SchedWriteVecLogic, 1>; 5132 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5133 SchedWriteVecLogic, 1>; 5134 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5135 SchedWriteVecLogic, 1>; 5136 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5137 SchedWriteVecLogic>; 5138 5139 //===----------------------------------------------------------------------===// 5140 // AVX-512 FP arithmetic 5141 //===----------------------------------------------------------------------===// 5142 5143 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5144 SDNode OpNode, SDNode VecNode, 5145 X86FoldableSchedWrite sched, bit IsCommutable> { 5146 let ExeDomain = _.ExeDomain in { 5147 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5148 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5149 "$src2, $src1", "$src1, $src2", 5150 (_.VT (VecNode _.RC:$src1, _.RC:$src2, 5151 (i32 FROUND_CURRENT)))>, 5152 Sched<[sched]>; 5153 5154 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5155 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5156 "$src2, $src1", "$src1, $src2", 5157 (_.VT (VecNode _.RC:$src1, 5158 _.ScalarIntMemCPat:$src2, 5159 (i32 FROUND_CURRENT)))>, 5160 Sched<[sched.Folded, ReadAfterLd]>; 5161 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5162 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5163 (ins _.FRC:$src1, _.FRC:$src2), 5164 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5165 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5166 Sched<[sched]> { 5167 let isCommutable = IsCommutable; 5168 } 5169 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5170 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5171 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5172 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5173 (_.ScalarLdFrag addr:$src2)))]>, 5174 Sched<[sched.Folded, ReadAfterLd]>; 5175 } 5176 } 5177 } 5178 5179 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5180 SDNode VecNode, X86FoldableSchedWrite sched, 5181 bit IsCommutable = 0> { 5182 let ExeDomain = _.ExeDomain in 5183 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5184 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5185 "$rc, $src2, $src1", "$src1, $src2, $rc", 5186 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5187 (i32 imm:$rc)), IsCommutable>, 5188 EVEX_B, EVEX_RC, Sched<[sched]>; 5189 } 5190 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5191 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5192 X86FoldableSchedWrite sched, bit IsCommutable> { 5193 let ExeDomain = _.ExeDomain in { 5194 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5195 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5196 "$src2, $src1", "$src1, $src2", 5197 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5198 Sched<[sched]>; 5199 5200 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5201 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5202 "$src2, $src1", "$src1, $src2", 5203 (_.VT (VecNode _.RC:$src1, 5204 _.ScalarIntMemCPat:$src2))>, 5205 Sched<[sched.Folded, ReadAfterLd]>; 5206 5207 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5208 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5209 (ins _.FRC:$src1, _.FRC:$src2), 5210 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5211 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5212 Sched<[sched]> { 5213 let isCommutable = IsCommutable; 5214 } 5215 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5216 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5217 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5218 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5219 (_.ScalarLdFrag addr:$src2)))]>, 5220 Sched<[sched.Folded, ReadAfterLd]>; 5221 } 5222 5223 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5224 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5225 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5226 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5227 (i32 FROUND_NO_EXC))>, EVEX_B, 5228 Sched<[sched]>; 5229 } 5230 } 5231 5232 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 5233 SDNode VecNode, X86SchedWriteSizes sched, 5234 bit IsCommutable> { 5235 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5236 sched.PS.Scl, IsCommutable>, 5237 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode, 5238 sched.PS.Scl, IsCommutable>, 5239 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5240 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5241 sched.PD.Scl, IsCommutable>, 5242 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode, 5243 sched.PD.Scl, IsCommutable>, 5244 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5245 } 5246 5247 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5248 SDNode VecNode, SDNode SaeNode, 5249 X86SchedWriteSizes sched, bit IsCommutable> { 5250 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5251 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5252 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5253 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5254 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5255 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5256 } 5257 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, 5258 SchedWriteFAddSizes, 1>; 5259 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, 5260 SchedWriteFMulSizes, 1>; 5261 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, 5262 SchedWriteFAddSizes, 0>; 5263 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, 5264 SchedWriteFDivSizes, 0>; 5265 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, 5266 SchedWriteFCmpSizes, 0>; 5267 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, 5268 SchedWriteFCmpSizes, 0>; 5269 5270 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5271 // X86fminc and X86fmaxc instead of X86fmin and X86fmax 5272 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5273 X86VectorVTInfo _, SDNode OpNode, 5274 X86FoldableSchedWrite sched> { 5275 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5276 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5277 (ins _.FRC:$src1, _.FRC:$src2), 5278 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5279 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5280 Sched<[sched]> { 5281 let isCommutable = 1; 5282 } 5283 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5284 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5285 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5286 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5287 (_.ScalarLdFrag addr:$src2)))]>, 5288 Sched<[sched.Folded, ReadAfterLd]>; 5289 } 5290 } 5291 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5292 SchedWriteFCmp.Scl>, XS, EVEX_4V, 5293 VEX_LIG, EVEX_CD8<32, CD8VT1>; 5294 5295 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5296 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, 5297 VEX_LIG, EVEX_CD8<64, CD8VT1>; 5298 5299 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5300 SchedWriteFCmp.Scl>, XS, EVEX_4V, 5301 VEX_LIG, EVEX_CD8<32, CD8VT1>; 5302 5303 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5304 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, 5305 VEX_LIG, EVEX_CD8<64, CD8VT1>; 5306 5307 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5308 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5309 bit IsCommutable, 5310 bit IsKZCommutable = IsCommutable> { 5311 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5312 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5313 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5314 "$src2, $src1", "$src1, $src2", 5315 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0, 5316 IsKZCommutable>, 5317 EVEX_4V, Sched<[sched]>; 5318 let mayLoad = 1 in { 5319 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5320 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5321 "$src2, $src1", "$src1, $src2", 5322 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5323 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 5324 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5325 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5326 "${src2}"##_.BroadcastStr##", $src1", 5327 "$src1, ${src2}"##_.BroadcastStr, 5328 (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5329 (_.ScalarLdFrag addr:$src2))))>, 5330 EVEX_4V, EVEX_B, 5331 Sched<[sched.Folded, ReadAfterLd]>; 5332 } 5333 } 5334 } 5335 5336 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5337 SDPatternOperator OpNodeRnd, 5338 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5339 let ExeDomain = _.ExeDomain in 5340 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5341 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, 5342 "$rc, $src2, $src1", "$src1, $src2, $rc", 5343 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>, 5344 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5345 } 5346 5347 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5348 SDPatternOperator OpNodeRnd, 5349 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5350 let ExeDomain = _.ExeDomain in 5351 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5352 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5353 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5354 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>, 5355 EVEX_4V, EVEX_B, Sched<[sched]>; 5356 } 5357 5358 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5359 Predicate prd, X86SchedWriteSizes sched, 5360 bit IsCommutable = 0, 5361 bit IsPD128Commutable = IsCommutable> { 5362 let Predicates = [prd] in { 5363 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, 5364 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5365 EVEX_CD8<32, CD8VF>; 5366 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, 5367 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5368 EVEX_CD8<64, CD8VF>; 5369 } 5370 5371 // Define only if AVX512VL feature is present. 5372 let Predicates = [prd, HasVLX] in { 5373 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, 5374 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5375 EVEX_CD8<32, CD8VF>; 5376 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, 5377 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5378 EVEX_CD8<32, CD8VF>; 5379 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, 5380 sched.PD.XMM, IsPD128Commutable, 5381 IsCommutable>, EVEX_V128, PD, VEX_W, 5382 EVEX_CD8<64, CD8VF>; 5383 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, 5384 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5385 EVEX_CD8<64, CD8VF>; 5386 } 5387 } 5388 5389 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5390 X86SchedWriteSizes sched> { 5391 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5392 v16f32_info>, 5393 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5394 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5395 v8f64_info>, 5396 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5397 } 5398 5399 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5400 X86SchedWriteSizes sched> { 5401 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5402 v16f32_info>, 5403 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5404 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5405 v8f64_info>, 5406 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5407 } 5408 5409 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, 5410 SchedWriteFAddSizes, 1>, 5411 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5412 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, 5413 SchedWriteFMulSizes, 1>, 5414 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5415 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, 5416 SchedWriteFAddSizes>, 5417 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5418 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, 5419 SchedWriteFDivSizes>, 5420 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5421 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 5422 SchedWriteFCmpSizes, 0>, 5423 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>; 5424 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 5425 SchedWriteFCmpSizes, 0>, 5426 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>; 5427 let isCodeGenOnly = 1 in { 5428 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 5429 SchedWriteFCmpSizes, 1>; 5430 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 5431 SchedWriteFCmpSizes, 1>; 5432 } 5433 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, 5434 SchedWriteFLogicSizes, 1>; 5435 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, 5436 SchedWriteFLogicSizes, 0>; 5437 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, 5438 SchedWriteFLogicSizes, 1>; 5439 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, 5440 SchedWriteFLogicSizes, 1>; 5441 5442 // Patterns catch floating point selects with bitcasted integer logic ops. 5443 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, 5444 X86VectorVTInfo _, Predicate prd> { 5445 let Predicates = [prd] in { 5446 // Masked register-register logical operations. 5447 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5448 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), 5449 _.RC:$src0)), 5450 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5451 _.RC:$src1, _.RC:$src2)>; 5452 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5453 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), 5454 _.ImmAllZerosV)), 5455 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5456 _.RC:$src2)>; 5457 // Masked register-memory logical operations. 5458 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5459 (bitconvert (_.i64VT (OpNode _.RC:$src1, 5460 (load addr:$src2)))), 5461 _.RC:$src0)), 5462 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5463 _.RC:$src1, addr:$src2)>; 5464 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5465 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))), 5466 _.ImmAllZerosV)), 5467 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5468 addr:$src2)>; 5469 // Register-broadcast logical operations. 5470 def : Pat<(_.i64VT (OpNode _.RC:$src1, 5471 (bitconvert (_.VT (X86VBroadcast 5472 (_.ScalarLdFrag addr:$src2)))))), 5473 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>; 5474 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5475 (bitconvert 5476 (_.i64VT (OpNode _.RC:$src1, 5477 (bitconvert (_.VT 5478 (X86VBroadcast 5479 (_.ScalarLdFrag addr:$src2))))))), 5480 _.RC:$src0)), 5481 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5482 _.RC:$src1, addr:$src2)>; 5483 def : Pat<(_.VT (vselect _.KRCWM:$mask, 5484 (bitconvert 5485 (_.i64VT (OpNode _.RC:$src1, 5486 (bitconvert (_.VT 5487 (X86VBroadcast 5488 (_.ScalarLdFrag addr:$src2))))))), 5489 _.ImmAllZerosV)), 5490 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5491 _.RC:$src1, addr:$src2)>; 5492 } 5493 } 5494 5495 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> { 5496 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>; 5497 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>; 5498 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>; 5499 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>; 5500 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>; 5501 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>; 5502 } 5503 5504 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>; 5505 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>; 5506 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>; 5507 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>; 5508 5509 let Predicates = [HasVLX,HasDQI] in { 5510 // Use packed logical operations for scalar ops. 5511 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)), 5512 (COPY_TO_REGCLASS 5513 (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5514 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5515 FR64X)>; 5516 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)), 5517 (COPY_TO_REGCLASS 5518 (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5519 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5520 FR64X)>; 5521 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)), 5522 (COPY_TO_REGCLASS 5523 (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5524 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5525 FR64X)>; 5526 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)), 5527 (COPY_TO_REGCLASS 5528 (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), 5529 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), 5530 FR64X)>; 5531 5532 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)), 5533 (COPY_TO_REGCLASS 5534 (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5535 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5536 FR32X)>; 5537 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)), 5538 (COPY_TO_REGCLASS 5539 (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5540 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5541 FR32X)>; 5542 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)), 5543 (COPY_TO_REGCLASS 5544 (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5545 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5546 FR32X)>; 5547 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)), 5548 (COPY_TO_REGCLASS 5549 (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), 5550 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), 5551 FR32X)>; 5552 } 5553 5554 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5555 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5556 let ExeDomain = _.ExeDomain in { 5557 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5558 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5559 "$src2, $src1", "$src1, $src2", 5560 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, 5561 EVEX_4V, Sched<[sched]>; 5562 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5563 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, 5564 "$src2, $src1", "$src1, $src2", 5565 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, 5566 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 5567 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5568 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, 5569 "${src2}"##_.BroadcastStr##", $src1", 5570 "$src1, ${src2}"##_.BroadcastStr, 5571 (OpNode _.RC:$src1, (_.VT (X86VBroadcast 5572 (_.ScalarLdFrag addr:$src2))), 5573 (i32 FROUND_CURRENT))>, 5574 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 5575 } 5576 } 5577 5578 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5579 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5580 let ExeDomain = _.ExeDomain in { 5581 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5582 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, 5583 "$src2, $src1", "$src1, $src2", 5584 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, 5585 Sched<[sched]>; 5586 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5587 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, 5588 "$src2, $src1", "$src1, $src2", 5589 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2, 5590 (i32 FROUND_CURRENT))>, 5591 Sched<[sched.Folded, ReadAfterLd]>; 5592 } 5593 } 5594 5595 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5596 SDNode OpNode, SDNode OpNodeScal, 5597 X86SchedWriteWidths sched> { 5598 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>, 5599 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>, 5600 EVEX_V512, EVEX_CD8<32, CD8VF>; 5601 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>, 5602 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>, 5603 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 5604 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>, 5605 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>, 5606 EVEX_4V,EVEX_CD8<32, CD8VT1>; 5607 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>, 5608 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>, 5609 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 5610 5611 // Define only if AVX512VL feature is present. 5612 let Predicates = [HasVLX] in { 5613 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>, 5614 EVEX_V128, EVEX_CD8<32, CD8VF>; 5615 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>, 5616 EVEX_V256, EVEX_CD8<32, CD8VF>; 5617 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>, 5618 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 5619 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>, 5620 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 5621 } 5622 } 5623 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs, 5624 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible; 5625 5626 //===----------------------------------------------------------------------===// 5627 // AVX-512 VPTESTM instructions 5628 //===----------------------------------------------------------------------===// 5629 5630 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5631 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5632 string Name> { 5633 let ExeDomain = _.ExeDomain in { 5634 let isCommutable = 1 in 5635 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5636 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5637 "$src2, $src1", "$src1, $src2", 5638 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), 5639 _.ImmAllZerosV)>, 5640 EVEX_4V, Sched<[sched]>; 5641 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5642 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5643 "$src2, $src1", "$src1, $src2", 5644 (OpNode (bitconvert 5645 (_.i64VT (and _.RC:$src1, 5646 (bitconvert (_.LdFrag addr:$src2))))), 5647 _.ImmAllZerosV)>, 5648 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5649 Sched<[sched.Folded, ReadAfterLd]>; 5650 } 5651 5652 // Patterns for compare with 0 that just use the same source twice. 5653 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), 5654 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr") 5655 _.RC:$src, _.RC:$src))>; 5656 5657 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), 5658 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk") 5659 _.KRC:$mask, _.RC:$src, _.RC:$src))>; 5660 } 5661 5662 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5663 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5664 let ExeDomain = _.ExeDomain in 5665 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5666 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5667 "${src2}"##_.BroadcastStr##", $src1", 5668 "$src1, ${src2}"##_.BroadcastStr, 5669 (OpNode (and _.RC:$src1, 5670 (X86VBroadcast 5671 (_.ScalarLdFrag addr:$src2))), 5672 _.ImmAllZerosV)>, 5673 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5674 Sched<[sched.Folded, ReadAfterLd]>; 5675 } 5676 5677 // Use 512bit version to implement 128/256 bit in case NoVLX. 5678 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo, 5679 X86VectorVTInfo _, string Name> { 5680 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), 5681 _.ImmAllZerosV)), 5682 (_.KVT (COPY_TO_REGCLASS 5683 (!cast<Instruction>(Name # "Zrr") 5684 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5685 _.RC:$src1, _.SubRegIdx), 5686 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5687 _.RC:$src2, _.SubRegIdx)), 5688 _.KRC))>; 5689 5690 def : Pat<(_.KVT (and _.KRC:$mask, 5691 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), 5692 _.ImmAllZerosV))), 5693 (COPY_TO_REGCLASS 5694 (!cast<Instruction>(Name # "Zrrk") 5695 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), 5696 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5697 _.RC:$src1, _.SubRegIdx), 5698 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5699 _.RC:$src2, _.SubRegIdx)), 5700 _.KRC)>; 5701 5702 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), 5703 (_.KVT (COPY_TO_REGCLASS 5704 (!cast<Instruction>(Name # "Zrr") 5705 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5706 _.RC:$src, _.SubRegIdx), 5707 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5708 _.RC:$src, _.SubRegIdx)), 5709 _.KRC))>; 5710 5711 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), 5712 (COPY_TO_REGCLASS 5713 (!cast<Instruction>(Name # "Zrrk") 5714 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), 5715 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5716 _.RC:$src, _.SubRegIdx), 5717 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 5718 _.RC:$src, _.SubRegIdx)), 5719 _.KRC)>; 5720 } 5721 5722 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5723 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 5724 let Predicates = [HasAVX512] in 5725 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>, 5726 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 5727 5728 let Predicates = [HasAVX512, HasVLX] in { 5729 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>, 5730 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 5731 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>, 5732 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 5733 } 5734 let Predicates = [HasAVX512, NoVLX] in { 5735 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>; 5736 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>; 5737 } 5738 } 5739 5740 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode, 5741 X86SchedWriteWidths sched> { 5742 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched, 5743 avx512vl_i32_info>; 5744 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched, 5745 avx512vl_i64_info>, VEX_W; 5746 } 5747 5748 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5749 PatFrag OpNode, X86SchedWriteWidths sched> { 5750 let Predicates = [HasBWI] in { 5751 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM, 5752 v32i16_info, NAME#"W">, EVEX_V512, VEX_W; 5753 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM, 5754 v64i8_info, NAME#"B">, EVEX_V512; 5755 } 5756 let Predicates = [HasVLX, HasBWI] in { 5757 5758 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM, 5759 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W; 5760 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM, 5761 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W; 5762 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM, 5763 v32i8x_info, NAME#"B">, EVEX_V256; 5764 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM, 5765 v16i8x_info, NAME#"B">, EVEX_V128; 5766 } 5767 5768 let Predicates = [HasAVX512, NoVLX] in { 5769 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">; 5770 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">; 5771 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">; 5772 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">; 5773 } 5774 } 5775 5776 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm 5777 // as commutable here because we already canonicalized all zeros vectors to the 5778 // RHS during lowering. 5779 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), 5780 (setcc node:$src1, node:$src2, SETEQ)>; 5781 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), 5782 (setcc node:$src1, node:$src2, SETNE)>; 5783 5784 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5785 PatFrag OpNode, X86SchedWriteWidths sched> : 5786 avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>, 5787 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>; 5788 5789 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem, 5790 SchedWriteVecLogic>, T8PD; 5791 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm, 5792 SchedWriteVecLogic>, T8XS; 5793 5794 //===----------------------------------------------------------------------===// 5795 // AVX-512 Shift instructions 5796 //===----------------------------------------------------------------------===// 5797 5798 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5799 string OpcodeStr, SDNode OpNode, 5800 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5801 let ExeDomain = _.ExeDomain in { 5802 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5803 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5804 "$src2, $src1", "$src1, $src2", 5805 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>, 5806 Sched<[sched]>; 5807 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5808 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5809 "$src2, $src1", "$src1, $src2", 5810 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 5811 (i8 imm:$src2)))>, 5812 Sched<[sched.Folded]>; 5813 } 5814 } 5815 5816 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5817 string OpcodeStr, SDNode OpNode, 5818 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5819 let ExeDomain = _.ExeDomain in 5820 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5821 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5822 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", 5823 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>, 5824 EVEX_B, Sched<[sched.Folded]>; 5825 } 5826 5827 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5828 X86FoldableSchedWrite sched, ValueType SrcVT, 5829 PatFrag bc_frag, X86VectorVTInfo _> { 5830 // src2 is always 128-bit 5831 let ExeDomain = _.ExeDomain in { 5832 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5833 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5834 "$src2, $src1", "$src1, $src2", 5835 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5836 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5837 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5838 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5839 "$src2, $src1", "$src1, $src2", 5840 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>, 5841 AVX512BIBase, 5842 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 5843 } 5844 } 5845 5846 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5847 X86SchedWriteWidths sched, ValueType SrcVT, 5848 PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo, 5849 Predicate prd> { 5850 let Predicates = [prd] in 5851 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5852 bc_frag, VTInfo.info512>, EVEX_V512, 5853 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5854 let Predicates = [prd, HasVLX] in { 5855 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5856 bc_frag, VTInfo.info256>, EVEX_V256, 5857 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5858 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5859 bc_frag, VTInfo.info128>, EVEX_V128, 5860 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5861 } 5862 } 5863 5864 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5865 string OpcodeStr, SDNode OpNode, 5866 X86SchedWriteWidths sched, 5867 bit NotEVEX2VEXConvertibleQ = 0> { 5868 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5869 bc_v4i32, avx512vl_i32_info, HasAVX512>; 5870 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5871 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5872 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W; 5873 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5874 bc_v2i64, avx512vl_i16_info, HasBWI>; 5875 } 5876 5877 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5878 string OpcodeStr, SDNode OpNode, 5879 X86SchedWriteWidths sched, 5880 AVX512VLVectorVTInfo VTInfo> { 5881 let Predicates = [HasAVX512] in 5882 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5883 sched.ZMM, VTInfo.info512>, 5884 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5885 VTInfo.info512>, EVEX_V512; 5886 let Predicates = [HasAVX512, HasVLX] in { 5887 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5888 sched.YMM, VTInfo.info256>, 5889 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5890 VTInfo.info256>, EVEX_V256; 5891 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5892 sched.XMM, VTInfo.info128>, 5893 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5894 VTInfo.info128>, EVEX_V128; 5895 } 5896 } 5897 5898 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5899 string OpcodeStr, SDNode OpNode, 5900 X86SchedWriteWidths sched> { 5901 let Predicates = [HasBWI] in 5902 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5903 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 5904 let Predicates = [HasVLX, HasBWI] in { 5905 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5906 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 5907 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5908 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 5909 } 5910 } 5911 5912 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5913 Format ImmFormR, Format ImmFormM, 5914 string OpcodeStr, SDNode OpNode, 5915 X86SchedWriteWidths sched, 5916 bit NotEVEX2VEXConvertibleQ = 0> { 5917 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 5918 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 5919 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 5920 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 5921 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 5922 } 5923 5924 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 5925 SchedWriteVecShiftImm>, 5926 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 5927 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5928 5929 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 5930 SchedWriteVecShiftImm>, 5931 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 5932 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5933 5934 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 5935 SchedWriteVecShiftImm, 1>, 5936 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 5937 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5938 5939 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 5940 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5941 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 5942 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 5943 5944 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 5945 SchedWriteVecShift>; 5946 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 5947 SchedWriteVecShift, 1>; 5948 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 5949 SchedWriteVecShift>; 5950 5951 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 5952 let Predicates = [HasAVX512, NoVLX] in { 5953 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 5954 (EXTRACT_SUBREG (v8i64 5955 (VPSRAQZrr 5956 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5957 VR128X:$src2)), sub_ymm)>; 5958 5959 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5960 (EXTRACT_SUBREG (v8i64 5961 (VPSRAQZrr 5962 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5963 VR128X:$src2)), sub_xmm)>; 5964 5965 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), 5966 (EXTRACT_SUBREG (v8i64 5967 (VPSRAQZri 5968 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 5969 imm:$src2)), sub_ymm)>; 5970 5971 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), 5972 (EXTRACT_SUBREG (v8i64 5973 (VPSRAQZri 5974 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 5975 imm:$src2)), sub_xmm)>; 5976 } 5977 5978 //===-------------------------------------------------------------------===// 5979 // Variable Bit Shifts 5980 //===-------------------------------------------------------------------===// 5981 5982 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 5983 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5984 let ExeDomain = _.ExeDomain in { 5985 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5986 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5987 "$src2, $src1", "$src1, $src2", 5988 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 5989 AVX5128IBase, EVEX_4V, Sched<[sched]>; 5990 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5991 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5992 "$src2, $src1", "$src1, $src2", 5993 (_.VT (OpNode _.RC:$src1, 5994 (_.VT (bitconvert (_.LdFrag addr:$src2)))))>, 5995 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 5996 Sched<[sched.Folded, ReadAfterLd]>; 5997 } 5998 } 5999 6000 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6001 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6002 let ExeDomain = _.ExeDomain in 6003 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6004 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6005 "${src2}"##_.BroadcastStr##", $src1", 6006 "$src1, ${src2}"##_.BroadcastStr, 6007 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast 6008 (_.ScalarLdFrag addr:$src2)))))>, 6009 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6010 Sched<[sched.Folded, ReadAfterLd]>; 6011 } 6012 6013 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6014 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6015 let Predicates = [HasAVX512] in 6016 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6017 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6018 6019 let Predicates = [HasAVX512, HasVLX] in { 6020 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6021 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6022 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6023 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6024 } 6025 } 6026 6027 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6028 SDNode OpNode, X86SchedWriteWidths sched> { 6029 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6030 avx512vl_i32_info>; 6031 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6032 avx512vl_i64_info>, VEX_W; 6033 } 6034 6035 // Use 512bit version to implement 128/256 bit in case NoVLX. 6036 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6037 SDNode OpNode, list<Predicate> p> { 6038 let Predicates = p in { 6039 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6040 (_.info256.VT _.info256.RC:$src2))), 6041 (EXTRACT_SUBREG 6042 (!cast<Instruction>(OpcodeStr#"Zrr") 6043 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6044 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6045 sub_ymm)>; 6046 6047 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6048 (_.info128.VT _.info128.RC:$src2))), 6049 (EXTRACT_SUBREG 6050 (!cast<Instruction>(OpcodeStr#"Zrr") 6051 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6052 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6053 sub_xmm)>; 6054 } 6055 } 6056 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6057 SDNode OpNode, X86SchedWriteWidths sched> { 6058 let Predicates = [HasBWI] in 6059 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6060 EVEX_V512, VEX_W; 6061 let Predicates = [HasVLX, HasBWI] in { 6062 6063 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6064 EVEX_V256, VEX_W; 6065 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6066 EVEX_V128, VEX_W; 6067 } 6068 } 6069 6070 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>, 6071 avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>; 6072 6073 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>, 6074 avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>; 6075 6076 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>, 6077 avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>; 6078 6079 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6080 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6081 6082 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>; 6083 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>; 6084 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>; 6085 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>; 6086 6087 // Special handing for handling VPSRAV intrinsics. 6088 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _, 6089 list<Predicate> p> { 6090 let Predicates = p in { 6091 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), 6092 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1, 6093 _.RC:$src2)>; 6094 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))), 6095 (!cast<Instruction>(InstrStr#_.ZSuffix##rm) 6096 _.RC:$src1, addr:$src2)>; 6097 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6098 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), 6099 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0, 6100 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; 6101 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6102 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))), 6103 _.RC:$src0)), 6104 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0, 6105 _.KRC:$mask, _.RC:$src1, addr:$src2)>; 6106 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6107 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), 6108 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, 6109 _.RC:$src1, _.RC:$src2)>; 6110 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6111 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))), 6112 _.ImmAllZerosV)), 6113 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, 6114 _.RC:$src1, addr:$src2)>; 6115 } 6116 } 6117 6118 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _, 6119 list<Predicate> p> : 6120 avx512_var_shift_int_lowering<InstrStr, _, p> { 6121 let Predicates = p in { 6122 def : Pat<(_.VT (X86vsrav _.RC:$src1, 6123 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), 6124 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb) 6125 _.RC:$src1, addr:$src2)>; 6126 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6127 (X86vsrav _.RC:$src1, 6128 (X86VBroadcast (_.ScalarLdFrag addr:$src2))), 6129 _.RC:$src0)), 6130 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, 6131 _.KRC:$mask, _.RC:$src1, addr:$src2)>; 6132 def : Pat<(_.VT (vselect _.KRCWM:$mask, 6133 (X86vsrav _.RC:$src1, 6134 (X86VBroadcast (_.ScalarLdFrag addr:$src2))), 6135 _.ImmAllZerosV)), 6136 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, 6137 _.RC:$src1, addr:$src2)>; 6138 } 6139 } 6140 6141 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>; 6142 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; 6143 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; 6144 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; 6145 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; 6146 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; 6147 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; 6148 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; 6149 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; 6150 6151 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6152 let Predicates = [HasAVX512, NoVLX] in { 6153 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6154 (EXTRACT_SUBREG (v8i64 6155 (VPROLVQZrr 6156 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6157 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6158 sub_xmm)>; 6159 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6160 (EXTRACT_SUBREG (v8i64 6161 (VPROLVQZrr 6162 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6163 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6164 sub_ymm)>; 6165 6166 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6167 (EXTRACT_SUBREG (v16i32 6168 (VPROLVDZrr 6169 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6170 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6171 sub_xmm)>; 6172 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6173 (EXTRACT_SUBREG (v16i32 6174 (VPROLVDZrr 6175 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6176 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6177 sub_ymm)>; 6178 6179 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), 6180 (EXTRACT_SUBREG (v8i64 6181 (VPROLQZri 6182 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6183 imm:$src2)), sub_xmm)>; 6184 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), 6185 (EXTRACT_SUBREG (v8i64 6186 (VPROLQZri 6187 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6188 imm:$src2)), sub_ymm)>; 6189 6190 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), 6191 (EXTRACT_SUBREG (v16i32 6192 (VPROLDZri 6193 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6194 imm:$src2)), sub_xmm)>; 6195 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), 6196 (EXTRACT_SUBREG (v16i32 6197 (VPROLDZri 6198 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6199 imm:$src2)), sub_ymm)>; 6200 } 6201 6202 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6203 let Predicates = [HasAVX512, NoVLX] in { 6204 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6205 (EXTRACT_SUBREG (v8i64 6206 (VPRORVQZrr 6207 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6208 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6209 sub_xmm)>; 6210 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6211 (EXTRACT_SUBREG (v8i64 6212 (VPRORVQZrr 6213 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6214 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6215 sub_ymm)>; 6216 6217 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6218 (EXTRACT_SUBREG (v16i32 6219 (VPRORVDZrr 6220 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6221 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6222 sub_xmm)>; 6223 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6224 (EXTRACT_SUBREG (v16i32 6225 (VPRORVDZrr 6226 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6227 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6228 sub_ymm)>; 6229 6230 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), 6231 (EXTRACT_SUBREG (v8i64 6232 (VPRORQZri 6233 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6234 imm:$src2)), sub_xmm)>; 6235 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), 6236 (EXTRACT_SUBREG (v8i64 6237 (VPRORQZri 6238 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6239 imm:$src2)), sub_ymm)>; 6240 6241 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), 6242 (EXTRACT_SUBREG (v16i32 6243 (VPRORDZri 6244 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6245 imm:$src2)), sub_xmm)>; 6246 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), 6247 (EXTRACT_SUBREG (v16i32 6248 (VPRORDZri 6249 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6250 imm:$src2)), sub_ymm)>; 6251 } 6252 6253 //===-------------------------------------------------------------------===// 6254 // 1-src variable permutation VPERMW/D/Q 6255 //===-------------------------------------------------------------------===// 6256 6257 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6258 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6259 let Predicates = [HasAVX512] in 6260 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6261 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6262 6263 let Predicates = [HasAVX512, HasVLX] in 6264 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6265 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6266 } 6267 6268 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6269 string OpcodeStr, SDNode OpNode, 6270 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6271 let Predicates = [HasAVX512] in 6272 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6273 sched, VTInfo.info512>, 6274 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6275 sched, VTInfo.info512>, EVEX_V512; 6276 let Predicates = [HasAVX512, HasVLX] in 6277 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6278 sched, VTInfo.info256>, 6279 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6280 sched, VTInfo.info256>, EVEX_V256; 6281 } 6282 6283 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6284 Predicate prd, SDNode OpNode, 6285 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6286 let Predicates = [prd] in 6287 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6288 EVEX_V512 ; 6289 let Predicates = [HasVLX, prd] in { 6290 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6291 EVEX_V256 ; 6292 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6293 EVEX_V128 ; 6294 } 6295 } 6296 6297 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6298 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6299 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6300 WriteVarShuffle256, avx512vl_i8_info>; 6301 6302 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6303 WriteVarShuffle256, avx512vl_i32_info>; 6304 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6305 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6306 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6307 WriteFVarShuffle256, avx512vl_f32_info>; 6308 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6309 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6310 6311 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6312 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6313 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6314 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6315 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6316 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6317 6318 //===----------------------------------------------------------------------===// 6319 // AVX-512 - VPERMIL 6320 //===----------------------------------------------------------------------===// 6321 6322 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6323 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6324 X86VectorVTInfo Ctrl> { 6325 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6326 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6327 "$src2, $src1", "$src1, $src2", 6328 (_.VT (OpNode _.RC:$src1, 6329 (Ctrl.VT Ctrl.RC:$src2)))>, 6330 T8PD, EVEX_4V, Sched<[sched]>; 6331 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6332 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6333 "$src2, $src1", "$src1, $src2", 6334 (_.VT (OpNode 6335 _.RC:$src1, 6336 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>, 6337 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6338 Sched<[sched.Folded, ReadAfterLd]>; 6339 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6340 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6341 "${src2}"##_.BroadcastStr##", $src1", 6342 "$src1, ${src2}"##_.BroadcastStr, 6343 (_.VT (OpNode 6344 _.RC:$src1, 6345 (Ctrl.VT (X86VBroadcast 6346 (Ctrl.ScalarLdFrag addr:$src2)))))>, 6347 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6348 Sched<[sched.Folded, ReadAfterLd]>; 6349 } 6350 6351 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6352 X86SchedWriteWidths sched, 6353 AVX512VLVectorVTInfo _, 6354 AVX512VLVectorVTInfo Ctrl> { 6355 let Predicates = [HasAVX512] in { 6356 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6357 _.info512, Ctrl.info512>, EVEX_V512; 6358 } 6359 let Predicates = [HasAVX512, HasVLX] in { 6360 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6361 _.info128, Ctrl.info128>, EVEX_V128; 6362 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6363 _.info256, Ctrl.info256>, EVEX_V256; 6364 } 6365 } 6366 6367 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6368 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6369 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6370 _, Ctrl>; 6371 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6372 X86VPermilpi, SchedWriteFShuffle, _>, 6373 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6374 } 6375 6376 let ExeDomain = SSEPackedSingle in 6377 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6378 avx512vl_i32_info>; 6379 let ExeDomain = SSEPackedDouble in 6380 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6381 avx512vl_i64_info>, VEX_W1X; 6382 6383 //===----------------------------------------------------------------------===// 6384 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6385 //===----------------------------------------------------------------------===// 6386 6387 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6388 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6389 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6390 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6391 X86PShufhw, SchedWriteShuffle>, 6392 EVEX, AVX512XSIi8Base; 6393 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6394 X86PShuflw, SchedWriteShuffle>, 6395 EVEX, AVX512XDIi8Base; 6396 6397 //===----------------------------------------------------------------------===// 6398 // AVX-512 - VPSHUFB 6399 //===----------------------------------------------------------------------===// 6400 6401 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6402 X86SchedWriteWidths sched> { 6403 let Predicates = [HasBWI] in 6404 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6405 EVEX_V512; 6406 6407 let Predicates = [HasVLX, HasBWI] in { 6408 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6409 EVEX_V256; 6410 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6411 EVEX_V128; 6412 } 6413 } 6414 6415 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6416 SchedWriteVarShuffle>, VEX_WIG; 6417 6418 //===----------------------------------------------------------------------===// 6419 // Move Low to High and High to Low packed FP Instructions 6420 //===----------------------------------------------------------------------===// 6421 6422 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6423 (ins VR128X:$src1, VR128X:$src2), 6424 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6425 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6426 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6427 let isCommutable = 1 in 6428 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6429 (ins VR128X:$src1, VR128X:$src2), 6430 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6431 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6432 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6433 6434 //===----------------------------------------------------------------------===// 6435 // VMOVHPS/PD VMOVLPS Instructions 6436 // All patterns was taken from SSS implementation. 6437 //===----------------------------------------------------------------------===// 6438 6439 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6440 SDPatternOperator OpNode, 6441 X86VectorVTInfo _> { 6442 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6443 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6444 (ins _.RC:$src1, f64mem:$src2), 6445 !strconcat(OpcodeStr, 6446 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6447 [(set _.RC:$dst, 6448 (OpNode _.RC:$src1, 6449 (_.VT (bitconvert 6450 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6451 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V; 6452 } 6453 6454 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6455 // SSE1. And MOVLPS pattern is even more complex. 6456 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6457 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6458 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6459 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6460 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6461 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6462 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6463 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6464 6465 let Predicates = [HasAVX512] in { 6466 // VMOVHPD patterns 6467 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, 6468 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 6469 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6470 } 6471 6472 let SchedRW = [WriteFStore] in { 6473 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6474 (ins f64mem:$dst, VR128X:$src), 6475 "vmovhps\t{$src, $dst|$dst, $src}", 6476 [(store (f64 (extractelt 6477 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), 6478 (bc_v2f64 (v4f32 VR128X:$src))), 6479 (iPTR 0))), addr:$dst)]>, 6480 EVEX, EVEX_CD8<32, CD8VT2>; 6481 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6482 (ins f64mem:$dst, VR128X:$src), 6483 "vmovhpd\t{$src, $dst|$dst, $src}", 6484 [(store (f64 (extractelt 6485 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6486 (iPTR 0))), addr:$dst)]>, 6487 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6488 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6489 (ins f64mem:$dst, VR128X:$src), 6490 "vmovlps\t{$src, $dst|$dst, $src}", 6491 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)), 6492 (iPTR 0))), addr:$dst)]>, 6493 EVEX, EVEX_CD8<32, CD8VT2>; 6494 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6495 (ins f64mem:$dst, VR128X:$src), 6496 "vmovlpd\t{$src, $dst|$dst, $src}", 6497 [(store (f64 (extractelt (v2f64 VR128X:$src), 6498 (iPTR 0))), addr:$dst)]>, 6499 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6500 } // SchedRW 6501 6502 let Predicates = [HasAVX512] in { 6503 // VMOVHPD patterns 6504 def : Pat<(store (f64 (extractelt 6505 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6506 (iPTR 0))), addr:$dst), 6507 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6508 } 6509 //===----------------------------------------------------------------------===// 6510 // FMA - Fused Multiply Operations 6511 // 6512 6513 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6514 X86FoldableSchedWrite sched, 6515 X86VectorVTInfo _, string Suff> { 6516 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6517 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6518 (ins _.RC:$src2, _.RC:$src3), 6519 OpcodeStr, "$src3, $src2", "$src2, $src3", 6520 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6521 AVX512FMA3Base, Sched<[sched]>; 6522 6523 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6524 (ins _.RC:$src2, _.MemOp:$src3), 6525 OpcodeStr, "$src3, $src2", "$src2, $src3", 6526 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6527 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 6528 6529 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6530 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6531 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6532 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6533 (OpNode _.RC:$src2, 6534 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, 6535 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 6536 } 6537 } 6538 6539 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6540 X86FoldableSchedWrite sched, 6541 X86VectorVTInfo _, string Suff> { 6542 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6543 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6544 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6545 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6546 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>, 6547 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6548 } 6549 6550 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6551 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6552 AVX512VLVectorVTInfo _, string Suff> { 6553 let Predicates = [HasAVX512] in { 6554 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6555 _.info512, Suff>, 6556 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6557 _.info512, Suff>, 6558 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6559 } 6560 let Predicates = [HasVLX, HasAVX512] in { 6561 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM, 6562 _.info256, Suff>, 6563 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6564 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM, 6565 _.info128, Suff>, 6566 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6567 } 6568 } 6569 6570 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6571 SDNode OpNodeRnd> { 6572 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6573 SchedWriteFMA, avx512vl_f32_info, "PS">; 6574 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6575 SchedWriteFMA, avx512vl_f64_info, "PD">, 6576 VEX_W; 6577 } 6578 6579 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; 6580 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; 6581 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; 6582 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; 6583 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; 6584 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; 6585 6586 6587 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6588 X86FoldableSchedWrite sched, 6589 X86VectorVTInfo _, string Suff> { 6590 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6591 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6592 (ins _.RC:$src2, _.RC:$src3), 6593 OpcodeStr, "$src3, $src2", "$src2, $src3", 6594 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, 6595 vselect, 1>, AVX512FMA3Base, Sched<[sched]>; 6596 6597 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6598 (ins _.RC:$src2, _.MemOp:$src3), 6599 OpcodeStr, "$src3, $src2", "$src2, $src3", 6600 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6601 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 6602 6603 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6604 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6605 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6606 "$src2, ${src3}"##_.BroadcastStr, 6607 (_.VT (OpNode _.RC:$src2, 6608 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 6609 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, 6610 Sched<[sched.Folded, ReadAfterLd]>; 6611 } 6612 } 6613 6614 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6615 X86FoldableSchedWrite sched, 6616 X86VectorVTInfo _, string Suff> { 6617 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6618 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6619 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6620 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6621 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 6622 1, 1, vselect, 1>, 6623 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6624 } 6625 6626 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6627 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6628 AVX512VLVectorVTInfo _, string Suff> { 6629 let Predicates = [HasAVX512] in { 6630 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6631 _.info512, Suff>, 6632 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6633 _.info512, Suff>, 6634 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6635 } 6636 let Predicates = [HasVLX, HasAVX512] in { 6637 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM, 6638 _.info256, Suff>, 6639 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6640 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM, 6641 _.info128, Suff>, 6642 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6643 } 6644 } 6645 6646 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6647 SDNode OpNodeRnd > { 6648 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6649 SchedWriteFMA, avx512vl_f32_info, "PS">; 6650 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6651 SchedWriteFMA, avx512vl_f64_info, "PD">, 6652 VEX_W; 6653 } 6654 6655 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; 6656 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; 6657 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; 6658 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; 6659 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; 6660 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; 6661 6662 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6663 X86FoldableSchedWrite sched, 6664 X86VectorVTInfo _, string Suff> { 6665 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6666 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6667 (ins _.RC:$src2, _.RC:$src3), 6668 OpcodeStr, "$src3, $src2", "$src2, $src3", 6669 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, 6670 AVX512FMA3Base, Sched<[sched]>; 6671 6672 // Pattern is 312 order so that the load is in a different place from the 6673 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6674 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6675 (ins _.RC:$src2, _.MemOp:$src3), 6676 OpcodeStr, "$src3, $src2", "$src2, $src3", 6677 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6678 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 6679 6680 // Pattern is 312 order so that the load is in a different place from the 6681 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6682 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 6683 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6684 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", 6685 "$src2, ${src3}"##_.BroadcastStr, 6686 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 6687 _.RC:$src1, _.RC:$src2)), 1, 0>, 6688 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 6689 } 6690 } 6691 6692 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6693 X86FoldableSchedWrite sched, 6694 X86VectorVTInfo _, string Suff> { 6695 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in 6696 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 6697 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6698 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6699 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 6700 1, 1, vselect, 1>, 6701 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; 6702 } 6703 6704 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 6705 SDNode OpNodeRnd, X86SchedWriteWidths sched, 6706 AVX512VLVectorVTInfo _, string Suff> { 6707 let Predicates = [HasAVX512] in { 6708 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM, 6709 _.info512, Suff>, 6710 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6711 _.info512, Suff>, 6712 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6713 } 6714 let Predicates = [HasVLX, HasAVX512] in { 6715 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM, 6716 _.info256, Suff>, 6717 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6718 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM, 6719 _.info128, Suff>, 6720 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6721 } 6722 } 6723 6724 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, 6725 SDNode OpNodeRnd > { 6726 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, 6727 SchedWriteFMA, avx512vl_f32_info, "PS">; 6728 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, 6729 SchedWriteFMA, avx512vl_f64_info, "PD">, 6730 VEX_W; 6731 } 6732 6733 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; 6734 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; 6735 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; 6736 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; 6737 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; 6738 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; 6739 6740 // Scalar FMA 6741 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6742 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6743 let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6744 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6745 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6746 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6747 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; 6748 6749 let mayLoad = 1 in 6750 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6751 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6752 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6753 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; 6754 6755 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6756 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6757 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6758 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6759 6760 let isCodeGenOnly = 1, isCommutable = 1 in { 6761 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6762 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6763 !strconcat(OpcodeStr, 6764 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6765 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>; 6766 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst), 6767 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6768 !strconcat(OpcodeStr, 6769 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6770 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; 6771 6772 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst), 6773 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6774 !strconcat(OpcodeStr, 6775 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6776 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6777 Sched<[SchedWriteFMA.Scl]>; 6778 }// isCodeGenOnly = 1 6779 }// Constraints = "$src1 = $dst" 6780 } 6781 6782 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6783 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 6784 X86VectorVTInfo _, string SUFF> { 6785 let ExeDomain = _.ExeDomain in { 6786 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6787 // Operands for intrinsic are in 123 order to preserve passthu 6788 // semantics. 6789 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6790 _.FRC:$src3))), 6791 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6792 (_.ScalarLdFrag addr:$src3)))), 6793 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6794 _.FRC:$src3, (i32 imm:$rc)))), 0>; 6795 6796 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6797 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6798 _.FRC:$src1))), 6799 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6800 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6801 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6802 _.FRC:$src1, (i32 imm:$rc)))), 1>; 6803 6804 // One pattern is 312 order so that the load is in a different place from the 6805 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6806 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6807 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6808 _.FRC:$src2))), 6809 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6810 _.FRC:$src1, _.FRC:$src2))), 6811 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6812 _.FRC:$src2, (i32 imm:$rc)))), 1>; 6813 } 6814 } 6815 6816 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6817 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { 6818 let Predicates = [HasAVX512] in { 6819 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6820 OpNodeRnd, f32x_info, "SS">, 6821 EVEX_CD8<32, CD8VT1>, VEX_LIG; 6822 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6823 OpNodeRnd, f64x_info, "SD">, 6824 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; 6825 } 6826 } 6827 6828 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; 6829 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; 6830 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; 6831 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; 6832 6833 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, 6834 string Suffix, SDNode Move, 6835 X86VectorVTInfo _, PatLeaf ZeroFP> { 6836 let Predicates = [HasAVX512] in { 6837 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6838 (Op _.FRC:$src2, 6839 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6840 _.FRC:$src3))))), 6841 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6842 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6843 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6844 6845 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6846 (Op _.FRC:$src2, _.FRC:$src3, 6847 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6848 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6849 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6850 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6851 6852 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6853 (Op _.FRC:$src2, 6854 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6855 (_.ScalarLdFrag addr:$src3)))))), 6856 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6857 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6858 addr:$src3)>; 6859 6860 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6861 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6862 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6863 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6864 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6865 addr:$src3)>; 6866 6867 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6868 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6869 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6870 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6871 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6872 addr:$src3)>; 6873 6874 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6875 (X86selects VK1WM:$mask, 6876 (Op _.FRC:$src2, 6877 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6878 _.FRC:$src3), 6879 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6880 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6881 VR128X:$src1, VK1WM:$mask, 6882 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6883 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6884 6885 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6886 (X86selects VK1WM:$mask, 6887 (Op _.FRC:$src2, 6888 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6889 (_.ScalarLdFrag addr:$src3)), 6890 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6891 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6892 VR128X:$src1, VK1WM:$mask, 6893 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6894 6895 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6896 (X86selects VK1WM:$mask, 6897 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6898 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6899 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6900 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6901 VR128X:$src1, VK1WM:$mask, 6902 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6903 6904 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6905 (X86selects VK1WM:$mask, 6906 (Op _.FRC:$src2, _.FRC:$src3, 6907 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6908 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6909 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 6910 VR128X:$src1, VK1WM:$mask, 6911 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6912 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6913 6914 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6915 (X86selects VK1WM:$mask, 6916 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6917 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6918 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6919 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 6920 VR128X:$src1, VK1WM:$mask, 6921 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6922 6923 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6924 (X86selects VK1WM:$mask, 6925 (Op _.FRC:$src2, 6926 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6927 _.FRC:$src3), 6928 (_.EltVT ZeroFP)))))), 6929 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 6930 VR128X:$src1, VK1WM:$mask, 6931 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6932 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6933 6934 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6935 (X86selects VK1WM:$mask, 6936 (Op _.FRC:$src2, _.FRC:$src3, 6937 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6938 (_.EltVT ZeroFP)))))), 6939 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 6940 VR128X:$src1, VK1WM:$mask, 6941 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6942 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6943 6944 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6945 (X86selects VK1WM:$mask, 6946 (Op _.FRC:$src2, 6947 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6948 (_.ScalarLdFrag addr:$src3)), 6949 (_.EltVT ZeroFP)))))), 6950 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 6951 VR128X:$src1, VK1WM:$mask, 6952 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6953 6954 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6955 (X86selects VK1WM:$mask, 6956 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6957 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 6958 (_.EltVT ZeroFP)))))), 6959 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 6960 VR128X:$src1, VK1WM:$mask, 6961 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6962 6963 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6964 (X86selects VK1WM:$mask, 6965 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6966 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6967 (_.EltVT ZeroFP)))))), 6968 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 6969 VR128X:$src1, VK1WM:$mask, 6970 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6971 6972 // Patterns with rounding mode. 6973 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6974 (RndOp _.FRC:$src2, 6975 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6976 _.FRC:$src3, (i32 imm:$rc)))))), 6977 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 6978 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6979 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 6980 6981 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6982 (RndOp _.FRC:$src2, _.FRC:$src3, 6983 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6984 (i32 imm:$rc)))))), 6985 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 6986 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6987 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 6988 6989 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6990 (X86selects VK1WM:$mask, 6991 (RndOp _.FRC:$src2, 6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6993 _.FRC:$src3, (i32 imm:$rc)), 6994 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6995 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 6996 VR128X:$src1, VK1WM:$mask, 6997 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6998 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 6999 7000 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7001 (X86selects VK1WM:$mask, 7002 (RndOp _.FRC:$src2, _.FRC:$src3, 7003 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7004 (i32 imm:$rc)), 7005 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7006 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7007 VR128X:$src1, VK1WM:$mask, 7008 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7009 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7010 7011 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7012 (X86selects VK1WM:$mask, 7013 (RndOp _.FRC:$src2, 7014 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7015 _.FRC:$src3, (i32 imm:$rc)), 7016 (_.EltVT ZeroFP)))))), 7017 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7018 VR128X:$src1, VK1WM:$mask, 7019 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7020 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7021 7022 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7023 (X86selects VK1WM:$mask, 7024 (RndOp _.FRC:$src2, _.FRC:$src3, 7025 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7026 (i32 imm:$rc)), 7027 (_.EltVT ZeroFP)))))), 7028 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7029 VR128X:$src1, VK1WM:$mask, 7030 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7031 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>; 7032 } 7033 } 7034 7035 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS", 7036 X86Movss, v4f32x_info, fp32imm0>; 7037 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", 7038 X86Movss, v4f32x_info, fp32imm0>; 7039 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS", 7040 X86Movss, v4f32x_info, fp32imm0>; 7041 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", 7042 X86Movss, v4f32x_info, fp32imm0>; 7043 7044 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD", 7045 X86Movsd, v2f64x_info, fp64imm0>; 7046 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", 7047 X86Movsd, v2f64x_info, fp64imm0>; 7048 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD", 7049 X86Movsd, v2f64x_info, fp64imm0>; 7050 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD", 7051 X86Movsd, v2f64x_info, fp64imm0>; 7052 7053 //===----------------------------------------------------------------------===// 7054 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7055 //===----------------------------------------------------------------------===// 7056 let Constraints = "$src1 = $dst" in { 7057 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7058 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7059 // NOTE: The SDNode have the multiply operands first with the add last. 7060 // This enables commuted load patterns to be autogenerated by tablegen. 7061 let ExeDomain = _.ExeDomain in { 7062 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7063 (ins _.RC:$src2, _.RC:$src3), 7064 OpcodeStr, "$src3, $src2", "$src2, $src3", 7065 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7066 AVX512FMA3Base, Sched<[sched]>; 7067 7068 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7069 (ins _.RC:$src2, _.MemOp:$src3), 7070 OpcodeStr, "$src3, $src2", "$src2, $src3", 7071 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7072 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; 7073 7074 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7075 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7076 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7077 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7078 (OpNode _.RC:$src2, 7079 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), 7080 _.RC:$src1)>, 7081 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 7082 } 7083 } 7084 } // Constraints = "$src1 = $dst" 7085 7086 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7087 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7088 let Predicates = [HasIFMA] in { 7089 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7090 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7091 } 7092 let Predicates = [HasVLX, HasIFMA] in { 7093 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7094 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7095 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7096 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7097 } 7098 } 7099 7100 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7101 SchedWriteVecIMul, avx512vl_i64_info>, 7102 VEX_W; 7103 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7104 SchedWriteVecIMul, avx512vl_i64_info>, 7105 VEX_W; 7106 7107 //===----------------------------------------------------------------------===// 7108 // AVX-512 Scalar convert from sign integer to float/double 7109 //===----------------------------------------------------------------------===// 7110 7111 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched, 7112 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7113 X86MemOperand x86memop, PatFrag ld_frag, string asm> { 7114 let hasSideEffects = 0 in { 7115 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7116 (ins DstVT.FRC:$src1, SrcRC:$src), 7117 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7118 EVEX_4V, Sched<[sched]>; 7119 let mayLoad = 1 in 7120 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7121 (ins DstVT.FRC:$src1, x86memop:$src), 7122 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7123 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 7124 } // hasSideEffects = 0 7125 let isCodeGenOnly = 1 in { 7126 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7127 (ins DstVT.RC:$src1, SrcRC:$src2), 7128 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7129 [(set DstVT.RC:$dst, 7130 (OpNode (DstVT.VT DstVT.RC:$src1), 7131 SrcRC:$src2, 7132 (i32 FROUND_CURRENT)))]>, 7133 EVEX_4V, Sched<[sched]>; 7134 7135 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7136 (ins DstVT.RC:$src1, x86memop:$src2), 7137 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7138 [(set DstVT.RC:$dst, 7139 (OpNode (DstVT.VT DstVT.RC:$src1), 7140 (ld_frag addr:$src2), 7141 (i32 FROUND_CURRENT)))]>, 7142 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; 7143 }//isCodeGenOnly = 1 7144 } 7145 7146 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7147 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7148 X86VectorVTInfo DstVT, string asm> { 7149 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7150 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7151 !strconcat(asm, 7152 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7153 [(set DstVT.RC:$dst, 7154 (OpNode (DstVT.VT DstVT.RC:$src1), 7155 SrcRC:$src2, 7156 (i32 imm:$rc)))]>, 7157 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 7158 } 7159 7160 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, 7161 X86FoldableSchedWrite sched, 7162 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7163 X86MemOperand x86memop, PatFrag ld_frag, string asm> { 7164 defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>, 7165 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7166 ld_frag, asm>, VEX_LIG; 7167 } 7168 7169 let Predicates = [HasAVX512] in { 7170 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32, 7171 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, 7172 XS, EVEX_CD8<32, CD8VT1>; 7173 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64, 7174 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, 7175 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7176 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32, 7177 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, 7178 XD, EVEX_CD8<32, CD8VT1>; 7179 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64, 7180 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, 7181 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7182 7183 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7184 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7185 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7186 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7187 7188 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 7189 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7190 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 7191 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7192 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 7193 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7194 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 7195 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7196 7197 def : Pat<(f32 (sint_to_fp GR32:$src)), 7198 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7199 def : Pat<(f32 (sint_to_fp GR64:$src)), 7200 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7201 def : Pat<(f64 (sint_to_fp GR32:$src)), 7202 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7203 def : Pat<(f64 (sint_to_fp GR64:$src)), 7204 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7205 7206 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32, 7207 v4f32x_info, i32mem, loadi32, 7208 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; 7209 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64, 7210 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, 7211 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7212 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info, 7213 i32mem, loadi32, "cvtusi2sd{l}">, 7214 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7215 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64, 7216 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, 7217 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7218 7219 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7220 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7221 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7222 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; 7223 7224 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), 7225 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7226 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))), 7227 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7228 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))), 7229 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7230 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))), 7231 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7232 7233 def : Pat<(f32 (uint_to_fp GR32:$src)), 7234 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7235 def : Pat<(f32 (uint_to_fp GR64:$src)), 7236 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7237 def : Pat<(f64 (uint_to_fp GR32:$src)), 7238 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7239 def : Pat<(f64 (uint_to_fp GR64:$src)), 7240 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7241 } 7242 7243 //===----------------------------------------------------------------------===// 7244 // AVX-512 Scalar convert from float/double to integer 7245 //===----------------------------------------------------------------------===// 7246 7247 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7248 X86VectorVTInfo DstVT, SDNode OpNode, 7249 X86FoldableSchedWrite sched, string asm, 7250 string aliasStr, 7251 bit CodeGenOnly = 1> { 7252 let Predicates = [HasAVX512] in { 7253 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7254 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7255 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>, 7256 EVEX, VEX_LIG, Sched<[sched]>; 7257 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7258 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7259 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>, 7260 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7261 Sched<[sched]>; 7262 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in 7263 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7264 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7265 [(set DstVT.RC:$dst, (OpNode 7266 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src), 7267 (i32 FROUND_CURRENT)))]>, 7268 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; 7269 7270 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7271 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7272 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7273 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7274 } // Predicates = [HasAVX512] 7275 } 7276 7277 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT, 7278 X86VectorVTInfo DstVT, SDNode OpNode, 7279 X86FoldableSchedWrite sched, string asm, 7280 string aliasStr> : 7281 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> { 7282 let Predicates = [HasAVX512] in { 7283 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7284 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7285 SrcVT.IntScalarMemOp:$src), 0, "att">; 7286 } // Predicates = [HasAVX512] 7287 } 7288 7289 // Convert float/double to signed/unsigned int 32/64 7290 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, 7291 X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">, 7292 XS, EVEX_CD8<32, CD8VT1>; 7293 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, 7294 X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">, 7295 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7296 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, 7297 X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">, 7298 XS, EVEX_CD8<32, CD8VT1>; 7299 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, 7300 X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">, 7301 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7302 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, 7303 X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">, 7304 XD, EVEX_CD8<64, CD8VT1>; 7305 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, 7306 X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">, 7307 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7308 defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, 7309 X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7310 XD, EVEX_CD8<64, CD8VT1>; 7311 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, 7312 X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7313 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7314 7315 // The SSE version of these instructions are disabled for AVX512. 7316 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions. 7317 let Predicates = [HasAVX512] in { 7318 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))), 7319 (VCVTSS2SIZrr_Int VR128X:$src)>; 7320 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)), 7321 (VCVTSS2SIZrm_Int sse_load_f32:$src)>; 7322 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))), 7323 (VCVTSS2SI64Zrr_Int VR128X:$src)>; 7324 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)), 7325 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>; 7326 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))), 7327 (VCVTSD2SIZrr_Int VR128X:$src)>; 7328 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)), 7329 (VCVTSD2SIZrm_Int sse_load_f64:$src)>; 7330 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))), 7331 (VCVTSD2SI64Zrr_Int VR128X:$src)>; 7332 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)), 7333 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>; 7334 } // HasAVX512 7335 7336 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7337 // which produce unnecessary vmovs{s,d} instructions 7338 let Predicates = [HasAVX512] in { 7339 def : Pat<(v4f32 (X86Movss 7340 (v4f32 VR128X:$dst), 7341 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 7342 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7343 7344 def : Pat<(v4f32 (X86Movss 7345 (v4f32 VR128X:$dst), 7346 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 7347 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7348 7349 def : Pat<(v4f32 (X86Movss 7350 (v4f32 VR128X:$dst), 7351 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 7352 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7353 7354 def : Pat<(v4f32 (X86Movss 7355 (v4f32 VR128X:$dst), 7356 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 7357 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7358 7359 def : Pat<(v2f64 (X86Movsd 7360 (v2f64 VR128X:$dst), 7361 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 7362 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7363 7364 def : Pat<(v2f64 (X86Movsd 7365 (v2f64 VR128X:$dst), 7366 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 7367 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7368 7369 def : Pat<(v2f64 (X86Movsd 7370 (v2f64 VR128X:$dst), 7371 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 7372 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7373 7374 def : Pat<(v2f64 (X86Movsd 7375 (v2f64 VR128X:$dst), 7376 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 7377 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7378 7379 def : Pat<(v4f32 (X86Movss 7380 (v4f32 VR128X:$dst), 7381 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))), 7382 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7383 7384 def : Pat<(v4f32 (X86Movss 7385 (v4f32 VR128X:$dst), 7386 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))), 7387 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7388 7389 def : Pat<(v4f32 (X86Movss 7390 (v4f32 VR128X:$dst), 7391 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))), 7392 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7393 7394 def : Pat<(v4f32 (X86Movss 7395 (v4f32 VR128X:$dst), 7396 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))), 7397 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7398 7399 def : Pat<(v2f64 (X86Movsd 7400 (v2f64 VR128X:$dst), 7401 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))), 7402 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7403 7404 def : Pat<(v2f64 (X86Movsd 7405 (v2f64 VR128X:$dst), 7406 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))), 7407 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7408 7409 def : Pat<(v2f64 (X86Movsd 7410 (v2f64 VR128X:$dst), 7411 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))), 7412 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7413 7414 def : Pat<(v2f64 (X86Movsd 7415 (v2f64 VR128X:$dst), 7416 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))), 7417 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7418 } // Predicates = [HasAVX512] 7419 7420 // Convert float/double to signed/unsigned int 32/64 with truncation 7421 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7422 X86VectorVTInfo _DstRC, SDNode OpNode, 7423 SDNode OpNodeRnd, X86FoldableSchedWrite sched, 7424 string aliasStr, bit CodeGenOnly = 1>{ 7425 let Predicates = [HasAVX512] in { 7426 let isCodeGenOnly = 1 in { 7427 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7428 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7429 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7430 EVEX, Sched<[sched]>; 7431 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7432 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7433 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7434 EVEX, Sched<[sched.Folded, ReadAfterLd]>; 7435 } 7436 7437 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7438 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7439 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src), 7440 (i32 FROUND_CURRENT)))]>, 7441 EVEX, VEX_LIG, Sched<[sched]>; 7442 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7443 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7444 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src), 7445 (i32 FROUND_NO_EXC)))]>, 7446 EVEX,VEX_LIG , EVEX_B, Sched<[sched]>; 7447 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in 7448 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7449 (ins _SrcRC.IntScalarMemOp:$src), 7450 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7451 [(set _DstRC.RC:$dst, (OpNodeRnd 7452 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src), 7453 (i32 FROUND_CURRENT)))]>, 7454 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; 7455 7456 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7457 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7458 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7459 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7460 } //HasAVX512 7461 } 7462 7463 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm, 7464 X86VectorVTInfo _SrcRC, 7465 X86VectorVTInfo _DstRC, SDNode OpNode, 7466 SDNode OpNodeRnd, X86FoldableSchedWrite sched, 7467 string aliasStr> : 7468 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched, 7469 aliasStr, 0> { 7470 let Predicates = [HasAVX512] in { 7471 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7472 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7473 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7474 } 7475 } 7476 7477 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7478 fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">, 7479 XS, EVEX_CD8<32, CD8VT1>; 7480 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7481 fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">, 7482 VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7483 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7484 fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">, 7485 XD, EVEX_CD8<64, CD8VT1>; 7486 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7487 fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">, 7488 VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7489 7490 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info, 7491 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">, 7492 XS, EVEX_CD8<32, CD8VT1>; 7493 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info, 7494 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">, 7495 XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7496 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7497 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">, 7498 XD, EVEX_CD8<64, CD8VT1>; 7499 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7500 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">, 7501 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7502 7503 let Predicates = [HasAVX512] in { 7504 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), 7505 (VCVTTSS2SIZrr_Int VR128X:$src)>; 7506 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)), 7507 (VCVTTSS2SIZrm_Int ssmem:$src)>; 7508 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))), 7509 (VCVTTSS2SI64Zrr_Int VR128X:$src)>; 7510 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)), 7511 (VCVTTSS2SI64Zrm_Int ssmem:$src)>; 7512 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))), 7513 (VCVTTSD2SIZrr_Int VR128X:$src)>; 7514 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)), 7515 (VCVTTSD2SIZrm_Int sdmem:$src)>; 7516 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))), 7517 (VCVTTSD2SI64Zrr_Int VR128X:$src)>; 7518 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)), 7519 (VCVTTSD2SI64Zrm_Int sdmem:$src)>; 7520 } // HasAVX512 7521 7522 //===----------------------------------------------------------------------===// 7523 // AVX-512 Convert form float to double and back 7524 //===----------------------------------------------------------------------===// 7525 7526 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7527 X86VectorVTInfo _Src, SDNode OpNode, 7528 X86FoldableSchedWrite sched> { 7529 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7530 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7531 "$src2, $src1", "$src1, $src2", 7532 (_.VT (OpNode (_.VT _.RC:$src1), 7533 (_Src.VT _Src.RC:$src2), 7534 (i32 FROUND_CURRENT)))>, 7535 EVEX_4V, VEX_LIG, Sched<[sched]>; 7536 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7537 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7538 "$src2, $src1", "$src1, $src2", 7539 (_.VT (OpNode (_.VT _.RC:$src1), 7540 (_Src.VT _Src.ScalarIntMemCPat:$src2), 7541 (i32 FROUND_CURRENT)))>, 7542 EVEX_4V, VEX_LIG, 7543 Sched<[sched.Folded, ReadAfterLd]>; 7544 7545 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7546 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7547 (ins _.FRC:$src1, _Src.FRC:$src2), 7548 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7549 EVEX_4V, VEX_LIG, Sched<[sched]>; 7550 let mayLoad = 1 in 7551 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7552 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7553 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7554 EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; 7555 } 7556 } 7557 7558 // Scalar Coversion with SAE - suppress all exceptions 7559 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7560 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7561 X86FoldableSchedWrite sched> { 7562 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7563 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7564 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7565 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7566 (_Src.VT _Src.RC:$src2), 7567 (i32 FROUND_NO_EXC)))>, 7568 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7569 } 7570 7571 // Scalar Conversion with rounding control (RC) 7572 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7573 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7574 X86FoldableSchedWrite sched> { 7575 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7576 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7577 "$rc, $src2, $src1", "$src1, $src2, $rc", 7578 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7579 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>, 7580 EVEX_4V, VEX_LIG, Sched<[sched]>, 7581 EVEX_B, EVEX_RC; 7582 } 7583 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, 7584 SDNode OpNodeRnd, X86FoldableSchedWrite sched, 7585 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7586 let Predicates = [HasAVX512] in { 7587 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, 7588 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7589 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; 7590 } 7591 } 7592 7593 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 7594 X86FoldableSchedWrite sched, 7595 X86VectorVTInfo _src, X86VectorVTInfo _dst> { 7596 let Predicates = [HasAVX512] in { 7597 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, 7598 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, 7599 EVEX_CD8<32, CD8VT1>, XS; 7600 } 7601 } 7602 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", 7603 X86froundRnd, WriteCvtSD2SS, f64x_info, 7604 f32x_info>; 7605 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", 7606 X86fpextRnd, WriteCvtSS2SD, f32x_info, 7607 f64x_info>; 7608 7609 def : Pat<(f64 (fpextend FR32X:$src)), 7610 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7611 Requires<[HasAVX512]>; 7612 def : Pat<(f64 (fpextend (loadf32 addr:$src))), 7613 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7614 Requires<[HasAVX512, OptForSize]>; 7615 7616 def : Pat<(f64 (extloadf32 addr:$src)), 7617 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7618 Requires<[HasAVX512, OptForSize]>; 7619 7620 def : Pat<(f64 (extloadf32 addr:$src)), 7621 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, 7622 Requires<[HasAVX512, OptForSpeed]>; 7623 7624 def : Pat<(f32 (fpround FR64X:$src)), 7625 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7626 Requires<[HasAVX512]>; 7627 7628 def : Pat<(v4f32 (X86Movss 7629 (v4f32 VR128X:$dst), 7630 (v4f32 (scalar_to_vector 7631 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7632 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7633 Requires<[HasAVX512]>; 7634 7635 def : Pat<(v2f64 (X86Movsd 7636 (v2f64 VR128X:$dst), 7637 (v2f64 (scalar_to_vector 7638 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7639 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7640 Requires<[HasAVX512]>; 7641 7642 //===----------------------------------------------------------------------===// 7643 // AVX-512 Vector convert from signed/unsigned integer to float/double 7644 // and from float/double to signed/unsigned integer 7645 //===----------------------------------------------------------------------===// 7646 7647 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7648 X86VectorVTInfo _Src, SDNode OpNode, 7649 X86FoldableSchedWrite sched, 7650 string Broadcast = _.BroadcastStr, 7651 string Alias = "", X86MemOperand MemOp = _Src.MemOp> { 7652 7653 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7654 (ins _Src.RC:$src), OpcodeStr, "$src", "$src", 7655 (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, 7656 EVEX, Sched<[sched]>; 7657 7658 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 7659 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src", 7660 (_.VT (OpNode (_Src.VT 7661 (bitconvert (_Src.LdFrag addr:$src)))))>, 7662 EVEX, Sched<[sched.Folded]>; 7663 7664 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 7665 (ins _Src.ScalarMemOp:$src), OpcodeStr, 7666 "${src}"##Broadcast, "${src}"##Broadcast, 7667 (_.VT (OpNode (_Src.VT 7668 (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) 7669 ))>, EVEX, EVEX_B, 7670 Sched<[sched.Folded]>; 7671 } 7672 // Coversion with SAE - suppress all exceptions 7673 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7674 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7675 X86FoldableSchedWrite sched> { 7676 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7677 (ins _Src.RC:$src), OpcodeStr, 7678 "{sae}, $src", "$src, {sae}", 7679 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), 7680 (i32 FROUND_NO_EXC)))>, 7681 EVEX, EVEX_B, Sched<[sched]>; 7682 } 7683 7684 // Conversion with rounding control (RC) 7685 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7686 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7687 X86FoldableSchedWrite sched> { 7688 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7689 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7690 "$rc, $src", "$src, $rc", 7691 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>, 7692 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7693 } 7694 7695 // Extend Float to Double 7696 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, 7697 X86SchedWriteWidths sched> { 7698 let Predicates = [HasAVX512] in { 7699 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, 7700 fpextend, sched.ZMM>, 7701 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, 7702 X86vfpextRnd, sched.ZMM>, EVEX_V512; 7703 } 7704 let Predicates = [HasVLX] in { 7705 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info, 7706 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7707 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, 7708 sched.YMM>, EVEX_V256; 7709 } 7710 } 7711 7712 // Truncate Double to Float 7713 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7714 let Predicates = [HasAVX512] in { 7715 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>, 7716 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, 7717 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7718 } 7719 let Predicates = [HasVLX] in { 7720 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, 7721 X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128; 7722 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround, 7723 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7724 7725 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7726 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7727 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7728 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">; 7729 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7730 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 7731 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7732 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; 7733 } 7734 } 7735 7736 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>, 7737 VEX_W, PD, EVEX_CD8<64, CD8VF>; 7738 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, 7739 PS, EVEX_CD8<32, CD8VH>; 7740 7741 def : Pat<(v8f64 (extloadv8f32 addr:$src)), 7742 (VCVTPS2PDZrm addr:$src)>; 7743 7744 let Predicates = [HasVLX] in { 7745 def : Pat<(X86vzmovl (v2f64 (bitconvert 7746 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), 7747 (VCVTPD2PSZ128rr VR128X:$src)>; 7748 def : Pat<(X86vzmovl (v2f64 (bitconvert 7749 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), 7750 (VCVTPD2PSZ128rm addr:$src)>; 7751 def : Pat<(v2f64 (extloadv2f32 addr:$src)), 7752 (VCVTPS2PDZ128rm addr:$src)>; 7753 def : Pat<(v4f64 (extloadv4f32 addr:$src)), 7754 (VCVTPS2PDZ256rm addr:$src)>; 7755 } 7756 7757 // Convert Signed/Unsigned Doubleword to Double 7758 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7759 SDNode OpNode128, X86SchedWriteWidths sched> { 7760 // No rounding in this op 7761 let Predicates = [HasAVX512] in 7762 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 7763 sched.ZMM>, EVEX_V512; 7764 7765 let Predicates = [HasVLX] in { 7766 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 7767 OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128; 7768 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 7769 sched.YMM>, EVEX_V256; 7770 } 7771 } 7772 7773 // Convert Signed/Unsigned Doubleword to Float 7774 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7775 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7776 let Predicates = [HasAVX512] in 7777 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 7778 sched.ZMM>, 7779 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 7780 OpNodeRnd, sched.ZMM>, EVEX_V512; 7781 7782 let Predicates = [HasVLX] in { 7783 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 7784 sched.XMM>, EVEX_V128; 7785 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 7786 sched.YMM>, EVEX_V256; 7787 } 7788 } 7789 7790 // Convert Float to Signed/Unsigned Doubleword with truncation 7791 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7792 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7793 let Predicates = [HasAVX512] in { 7794 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7795 sched.ZMM>, 7796 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 7797 OpNodeRnd, sched.ZMM>, EVEX_V512; 7798 } 7799 let Predicates = [HasVLX] in { 7800 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7801 sched.XMM>, EVEX_V128; 7802 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7803 sched.YMM>, EVEX_V256; 7804 } 7805 } 7806 7807 // Convert Float to Signed/Unsigned Doubleword 7808 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7809 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7810 let Predicates = [HasAVX512] in { 7811 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 7812 sched.ZMM>, 7813 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 7814 OpNodeRnd, sched.ZMM>, EVEX_V512; 7815 } 7816 let Predicates = [HasVLX] in { 7817 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 7818 sched.XMM>, EVEX_V128; 7819 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 7820 sched.YMM>, EVEX_V256; 7821 } 7822 } 7823 7824 // Convert Double to Signed/Unsigned Doubleword with truncation 7825 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7826 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7827 let Predicates = [HasAVX512] in { 7828 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7829 sched.ZMM>, 7830 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 7831 OpNodeRnd, sched.ZMM>, EVEX_V512; 7832 } 7833 let Predicates = [HasVLX] in { 7834 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7835 // memory forms of these instructions in Asm Parser. They have the same 7836 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7837 // due to the same reason. 7838 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 7839 OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128; 7840 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7841 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7842 7843 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7844 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7845 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7846 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">; 7847 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7848 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 7849 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7850 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">; 7851 } 7852 } 7853 7854 // Convert Double to Signed/Unsigned Doubleword 7855 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7856 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7857 let Predicates = [HasAVX512] in { 7858 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 7859 sched.ZMM>, 7860 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 7861 OpNodeRnd, sched.ZMM>, EVEX_V512; 7862 } 7863 let Predicates = [HasVLX] in { 7864 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7865 // memory forms of these instructions in Asm Parcer. They have the same 7866 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7867 // due to the same reason. 7868 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode, 7869 sched.XMM, "{1to2}", "{x}">, EVEX_V128; 7870 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 7871 sched.YMM, "{1to4}", "{y}">, EVEX_V256; 7872 7873 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7874 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7875 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7876 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">; 7877 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7878 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 7879 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7880 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; 7881 } 7882 } 7883 7884 // Convert Double to Signed/Unsigned Quardword 7885 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7886 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7887 let Predicates = [HasDQI] in { 7888 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7889 sched.ZMM>, 7890 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 7891 OpNodeRnd, sched.ZMM>, EVEX_V512; 7892 } 7893 let Predicates = [HasDQI, HasVLX] in { 7894 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7895 sched.XMM>, EVEX_V128; 7896 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7897 sched.YMM>, EVEX_V256; 7898 } 7899 } 7900 7901 // Convert Double to Signed/Unsigned Quardword with truncation 7902 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7903 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7904 let Predicates = [HasDQI] in { 7905 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 7906 sched.ZMM>, 7907 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 7908 OpNodeRnd, sched.ZMM>, EVEX_V512; 7909 } 7910 let Predicates = [HasDQI, HasVLX] in { 7911 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 7912 sched.XMM>, EVEX_V128; 7913 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 7914 sched.YMM>, EVEX_V256; 7915 } 7916 } 7917 7918 // Convert Signed/Unsigned Quardword to Double 7919 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, 7920 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7921 let Predicates = [HasDQI] in { 7922 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 7923 sched.ZMM>, 7924 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 7925 OpNodeRnd, sched.ZMM>, EVEX_V512; 7926 } 7927 let Predicates = [HasDQI, HasVLX] in { 7928 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 7929 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 7930 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 7931 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 7932 } 7933 } 7934 7935 // Convert Float to Signed/Unsigned Quardword 7936 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7937 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7938 let Predicates = [HasDQI] in { 7939 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 7940 sched.ZMM>, 7941 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 7942 OpNodeRnd, sched.ZMM>, EVEX_V512; 7943 } 7944 let Predicates = [HasDQI, HasVLX] in { 7945 // Explicitly specified broadcast string, since we take only 2 elements 7946 // from v4f32x_info source 7947 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7948 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7949 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7950 sched.YMM>, EVEX_V256; 7951 } 7952 } 7953 7954 // Convert Float to Signed/Unsigned Quardword with truncation 7955 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 7956 SDNode OpNodeRnd, X86SchedWriteWidths sched> { 7957 let Predicates = [HasDQI] in { 7958 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>, 7959 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 7960 OpNodeRnd, sched.ZMM>, EVEX_V512; 7961 } 7962 let Predicates = [HasDQI, HasVLX] in { 7963 // Explicitly specified broadcast string, since we take only 2 elements 7964 // from v4f32x_info source 7965 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 7966 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; 7967 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 7968 sched.YMM>, EVEX_V256; 7969 } 7970 } 7971 7972 // Convert Signed/Unsigned Quardword to Float 7973 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, 7974 SDNode OpNode128, SDNode OpNodeRnd, 7975 X86SchedWriteWidths sched> { 7976 let Predicates = [HasDQI] in { 7977 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, 7978 sched.ZMM>, 7979 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, 7980 OpNodeRnd, sched.ZMM>, EVEX_V512; 7981 } 7982 let Predicates = [HasDQI, HasVLX] in { 7983 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 7984 // memory forms of these instructions in Asm Parcer. They have the same 7985 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 7986 // due to the same reason. 7987 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128, 7988 sched.XMM, "{1to2}", "{x}">, EVEX_V128, 7989 NotEVEX2VEXConvertible; 7990 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, 7991 sched.YMM, "{1to4}", "{y}">, EVEX_V256, 7992 NotEVEX2VEXConvertible; 7993 7994 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7995 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; 7996 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", 7997 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">; 7998 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 7999 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>; 8000 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", 8001 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">; 8002 } 8003 } 8004 8005 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, 8006 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8007 8008 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, 8009 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8010 PS, EVEX_CD8<32, CD8VF>; 8011 8012 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, 8013 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, 8014 XS, EVEX_CD8<32, CD8VF>; 8015 8016 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, 8017 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, 8018 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8019 8020 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, 8021 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS, 8022 EVEX_CD8<32, CD8VF>; 8023 8024 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, 8025 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, 8026 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8027 8028 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, 8029 X86VUintToFP, SchedWriteCvtDQ2PD>, XS, 8030 EVEX_CD8<32, CD8VH>; 8031 8032 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, 8033 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, 8034 EVEX_CD8<32, CD8VF>; 8035 8036 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, 8037 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8038 EVEX_CD8<32, CD8VF>; 8039 8040 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, 8041 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8042 VEX_W, EVEX_CD8<64, CD8VF>; 8043 8044 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, 8045 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8046 PS, EVEX_CD8<32, CD8VF>; 8047 8048 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, 8049 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8050 PS, EVEX_CD8<64, CD8VF>; 8051 8052 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, 8053 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8054 PD, EVEX_CD8<64, CD8VF>; 8055 8056 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, 8057 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8058 EVEX_CD8<32, CD8VH>; 8059 8060 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, 8061 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8062 PD, EVEX_CD8<64, CD8VF>; 8063 8064 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, 8065 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8066 EVEX_CD8<32, CD8VH>; 8067 8068 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, 8069 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W, 8070 PD, EVEX_CD8<64, CD8VF>; 8071 8072 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, 8073 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD, 8074 EVEX_CD8<32, CD8VH>; 8075 8076 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, 8077 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W, 8078 PD, EVEX_CD8<64, CD8VF>; 8079 8080 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, 8081 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD, 8082 EVEX_CD8<32, CD8VH>; 8083 8084 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, 8085 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8086 EVEX_CD8<64, CD8VF>; 8087 8088 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, 8089 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, 8090 EVEX_CD8<64, CD8VF>; 8091 8092 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP, 8093 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, 8094 EVEX_CD8<64, CD8VF>; 8095 8096 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP, 8097 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, 8098 EVEX_CD8<64, CD8VF>; 8099 8100 let Predicates = [HasAVX512] in { 8101 def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))), 8102 (VCVTTPS2DQZrr VR512:$src)>; 8103 def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))), 8104 (VCVTTPS2DQZrm addr:$src)>; 8105 8106 def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))), 8107 (VCVTTPS2UDQZrr VR512:$src)>; 8108 def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))), 8109 (VCVTTPS2UDQZrm addr:$src)>; 8110 8111 def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))), 8112 (VCVTTPD2DQZrr VR512:$src)>; 8113 def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))), 8114 (VCVTTPD2DQZrm addr:$src)>; 8115 8116 def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))), 8117 (VCVTTPD2UDQZrr VR512:$src)>; 8118 def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))), 8119 (VCVTTPD2UDQZrm addr:$src)>; 8120 } 8121 8122 let Predicates = [HasVLX] in { 8123 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))), 8124 (VCVTTPS2DQZ128rr VR128X:$src)>; 8125 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), 8126 (VCVTTPS2DQZ128rm addr:$src)>; 8127 8128 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))), 8129 (VCVTTPS2UDQZ128rr VR128X:$src)>; 8130 def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))), 8131 (VCVTTPS2UDQZ128rm addr:$src)>; 8132 8133 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))), 8134 (VCVTTPS2DQZ256rr VR256X:$src)>; 8135 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), 8136 (VCVTTPS2DQZ256rm addr:$src)>; 8137 8138 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))), 8139 (VCVTTPS2UDQZ256rr VR256X:$src)>; 8140 def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))), 8141 (VCVTTPS2UDQZ256rm addr:$src)>; 8142 8143 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))), 8144 (VCVTTPD2DQZ256rr VR256X:$src)>; 8145 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 8146 (VCVTTPD2DQZ256rm addr:$src)>; 8147 8148 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))), 8149 (VCVTTPD2UDQZ256rr VR256X:$src)>; 8150 def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))), 8151 (VCVTTPD2UDQZ256rm addr:$src)>; 8152 } 8153 8154 let Predicates = [HasDQI] in { 8155 def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))), 8156 (VCVTTPS2QQZrr VR256X:$src)>; 8157 def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))), 8158 (VCVTTPS2QQZrm addr:$src)>; 8159 8160 def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))), 8161 (VCVTTPS2UQQZrr VR256X:$src)>; 8162 def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))), 8163 (VCVTTPS2UQQZrm addr:$src)>; 8164 8165 def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))), 8166 (VCVTTPD2QQZrr VR512:$src)>; 8167 def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))), 8168 (VCVTTPD2QQZrm addr:$src)>; 8169 8170 def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))), 8171 (VCVTTPD2UQQZrr VR512:$src)>; 8172 def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))), 8173 (VCVTTPD2UQQZrm addr:$src)>; 8174 } 8175 8176 let Predicates = [HasDQI, HasVLX] in { 8177 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))), 8178 (VCVTTPS2QQZ256rr VR128X:$src)>; 8179 def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))), 8180 (VCVTTPS2QQZ256rm addr:$src)>; 8181 8182 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))), 8183 (VCVTTPS2UQQZ256rr VR128X:$src)>; 8184 def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))), 8185 (VCVTTPS2UQQZ256rm addr:$src)>; 8186 8187 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))), 8188 (VCVTTPD2QQZ128rr VR128X:$src)>; 8189 def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))), 8190 (VCVTTPD2QQZ128rm addr:$src)>; 8191 8192 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))), 8193 (VCVTTPD2UQQZ128rr VR128X:$src)>; 8194 def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))), 8195 (VCVTTPD2UQQZ128rm addr:$src)>; 8196 8197 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))), 8198 (VCVTTPD2QQZ256rr VR256X:$src)>; 8199 def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))), 8200 (VCVTTPD2QQZ256rm addr:$src)>; 8201 8202 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))), 8203 (VCVTTPD2UQQZ256rr VR256X:$src)>; 8204 def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))), 8205 (VCVTTPD2UQQZ256rm addr:$src)>; 8206 } 8207 8208 let Predicates = [HasAVX512, NoVLX] in { 8209 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), 8210 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8211 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8212 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8213 8214 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), 8215 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 8216 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), 8217 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8218 8219 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))), 8220 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr 8221 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8222 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8223 8224 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), 8225 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8226 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8227 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8228 8229 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), 8230 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr 8231 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 8232 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8233 8234 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), 8235 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8236 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8237 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8238 8239 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), 8240 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr 8241 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), 8242 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8243 } 8244 8245 let Predicates = [HasAVX512, HasVLX] in { 8246 def : Pat<(X86vzmovl (v2i64 (bitconvert 8247 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), 8248 (VCVTPD2DQZ128rr VR128X:$src)>; 8249 def : Pat<(X86vzmovl (v2i64 (bitconvert 8250 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), 8251 (VCVTPD2DQZ128rm addr:$src)>; 8252 def : Pat<(X86vzmovl (v2i64 (bitconvert 8253 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), 8254 (VCVTPD2UDQZ128rr VR128X:$src)>; 8255 def : Pat<(X86vzmovl (v2i64 (bitconvert 8256 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), 8257 (VCVTTPD2DQZ128rr VR128X:$src)>; 8258 def : Pat<(X86vzmovl (v2i64 (bitconvert 8259 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), 8260 (VCVTTPD2DQZ128rm addr:$src)>; 8261 def : Pat<(X86vzmovl (v2i64 (bitconvert 8262 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), 8263 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8264 8265 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 8266 (VCVTDQ2PDZ128rm addr:$src)>; 8267 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), 8268 (VCVTDQ2PDZ128rm addr:$src)>; 8269 8270 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 8271 (VCVTUDQ2PDZ128rm addr:$src)>; 8272 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), 8273 (VCVTUDQ2PDZ128rm addr:$src)>; 8274 } 8275 8276 let Predicates = [HasAVX512] in { 8277 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), 8278 (VCVTPD2PSZrm addr:$src)>; 8279 def : Pat<(v8f64 (extloadv8f32 addr:$src)), 8280 (VCVTPS2PDZrm addr:$src)>; 8281 } 8282 8283 let Predicates = [HasDQI, HasVLX] in { 8284 def : Pat<(X86vzmovl (v2f64 (bitconvert 8285 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), 8286 (VCVTQQ2PSZ128rr VR128X:$src)>; 8287 def : Pat<(X86vzmovl (v2f64 (bitconvert 8288 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), 8289 (VCVTUQQ2PSZ128rr VR128X:$src)>; 8290 } 8291 8292 let Predicates = [HasDQI, NoVLX] in { 8293 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))), 8294 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8295 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8296 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8297 8298 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))), 8299 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr 8300 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8301 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8302 8303 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))), 8304 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr 8305 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8306 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8307 8308 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))), 8309 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8310 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8311 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8312 8313 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))), 8314 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr 8315 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), 8316 VR128X:$src1, sub_xmm)))), sub_ymm)>; 8317 8318 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))), 8319 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr 8320 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), 8321 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8322 8323 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))), 8324 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr 8325 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8326 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8327 8328 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))), 8329 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8330 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8331 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8332 8333 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))), 8334 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr 8335 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8336 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8337 8338 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))), 8339 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr 8340 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8341 VR256X:$src1, sub_ymm)))), sub_xmm)>; 8342 8343 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))), 8344 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8345 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8346 VR128X:$src1, sub_xmm)))), sub_xmm)>; 8347 8348 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), 8349 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr 8350 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 8351 VR256X:$src1, sub_ymm)))), sub_ymm)>; 8352 } 8353 8354 //===----------------------------------------------------------------------===// 8355 // Half precision conversion instructions 8356 //===----------------------------------------------------------------------===// 8357 8358 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8359 X86MemOperand x86memop, PatFrag ld_frag, 8360 X86FoldableSchedWrite sched> { 8361 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8362 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8363 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8364 T8PD, Sched<[sched]>; 8365 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8366 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8367 (X86cvtph2ps (_src.VT 8368 (bitconvert 8369 (ld_frag addr:$src))))>, 8370 T8PD, Sched<[sched.Folded]>; 8371 } 8372 8373 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8374 X86FoldableSchedWrite sched> { 8375 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8376 (ins _src.RC:$src), "vcvtph2ps", 8377 "{sae}, $src", "$src, {sae}", 8378 (X86cvtph2psRnd (_src.VT _src.RC:$src), 8379 (i32 FROUND_NO_EXC))>, 8380 T8PD, EVEX_B, Sched<[sched]>; 8381 } 8382 8383 let Predicates = [HasAVX512] in 8384 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64, 8385 WriteCvtPH2PSZ>, 8386 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8387 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8388 8389 let Predicates = [HasVLX] in { 8390 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8391 loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256, 8392 EVEX_CD8<32, CD8VH>; 8393 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8394 loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128, 8395 EVEX_CD8<32, CD8VH>; 8396 8397 // Pattern match vcvtph2ps of a scalar i64 load. 8398 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), 8399 (VCVTPH2PSZ128rm addr:$src)>; 8400 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), 8401 (VCVTPH2PSZ128rm addr:$src)>; 8402 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert 8403 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8404 (VCVTPH2PSZ128rm addr:$src)>; 8405 } 8406 8407 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8408 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8409 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), 8410 (ins _src.RC:$src1, i32u8imm:$src2), 8411 "vcvtps2ph", "$src2, $src1", "$src1, $src2", 8412 (X86cvtps2ph (_src.VT _src.RC:$src1), 8413 (i32 imm:$src2)), 0, 0>, 8414 AVX512AIi8Base, Sched<[RR]>; 8415 let hasSideEffects = 0, mayStore = 1 in { 8416 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8417 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8418 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8419 Sched<[MR]>; 8420 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8421 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8422 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8423 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 8424 } 8425 } 8426 8427 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8428 SchedWrite Sched> { 8429 let hasSideEffects = 0 in 8430 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 8431 (outs _dest.RC:$dst), 8432 (ins _src.RC:$src1, i32u8imm:$src2), 8433 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 8434 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 8435 } 8436 8437 let Predicates = [HasAVX512] in { 8438 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8439 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8440 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8441 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8442 let Predicates = [HasVLX] in { 8443 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 8444 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 8445 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 8446 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 8447 WriteCvtPS2PH, WriteCvtPS2PHSt>, 8448 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 8449 } 8450 8451 def : Pat<(store (f64 (extractelt 8452 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), 8453 (iPTR 0))), addr:$dst), 8454 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; 8455 def : Pat<(store (i64 (extractelt 8456 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), 8457 (iPTR 0))), addr:$dst), 8458 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; 8459 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), 8460 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; 8461 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), 8462 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; 8463 } 8464 8465 // Patterns for matching conversions from float to half-float and vice versa. 8466 let Predicates = [HasVLX] in { 8467 // Use MXCSR.RC for rounding instead of explicitly specifying the default 8468 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 8469 // configurations we support (the default). However, falling back to MXCSR is 8470 // more consistent with other instructions, which are always controlled by it. 8471 // It's encoded as 0b100. 8472 def : Pat<(fp_to_f16 FR32X:$src), 8473 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr 8474 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>; 8475 8476 def : Pat<(f16_to_fp GR16:$src), 8477 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8478 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >; 8479 8480 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), 8481 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr 8482 (v8i16 (VCVTPS2PHZ128rr 8483 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >; 8484 } 8485 8486 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS 8487 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 8488 string OpcodeStr, X86FoldableSchedWrite sched> { 8489 let hasSideEffects = 0 in 8490 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 8491 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 8492 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 8493 } 8494 8495 let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8496 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, 8497 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8498 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, 8499 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8500 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, 8501 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 8502 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, 8503 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 8504 } 8505 8506 let Defs = [EFLAGS], Predicates = [HasAVX512] in { 8507 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, 8508 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8509 EVEX_CD8<32, CD8VT1>; 8510 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, 8511 "ucomisd", WriteFCom>, PD, EVEX, 8512 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8513 let Pattern = []<dag> in { 8514 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, 8515 "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8516 EVEX_CD8<32, CD8VT1>; 8517 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, 8518 "comisd", WriteFCom>, PD, EVEX, 8519 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8520 } 8521 let isCodeGenOnly = 1 in { 8522 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 8523 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, 8524 EVEX_CD8<32, CD8VT1>; 8525 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 8526 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, 8527 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8528 8529 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 8530 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, 8531 EVEX_CD8<32, CD8VT1>; 8532 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 8533 sse_load_f64, "comisd", WriteFCom>, PD, EVEX, 8534 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 8535 } 8536 } 8537 8538 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd 8539 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8540 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8541 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 8542 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8543 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8544 "$src2, $src1", "$src1, $src2", 8545 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 8546 EVEX_4V, Sched<[sched]>; 8547 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8548 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8549 "$src2, $src1", "$src1, $src2", 8550 (OpNode (_.VT _.RC:$src1), 8551 _.ScalarIntMemCPat:$src2)>, EVEX_4V, 8552 Sched<[sched.Folded, ReadAfterLd]>; 8553 } 8554 } 8555 8556 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 8557 f32x_info>, EVEX_CD8<32, CD8VT1>, 8558 T8PD; 8559 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 8560 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 8561 T8PD; 8562 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 8563 SchedWriteFRsqrt.Scl, f32x_info>, 8564 EVEX_CD8<32, CD8VT1>, T8PD; 8565 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 8566 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 8567 EVEX_CD8<64, CD8VT1>, T8PD; 8568 8569 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 8570 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 8571 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8572 let ExeDomain = _.ExeDomain in { 8573 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8574 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8575 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 8576 Sched<[sched]>; 8577 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8578 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8579 (OpNode (_.VT 8580 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 8581 Sched<[sched.Folded, ReadAfterLd]>; 8582 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8583 (ins _.ScalarMemOp:$src), OpcodeStr, 8584 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8585 (OpNode (_.VT 8586 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8587 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 8588 } 8589 } 8590 8591 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 8592 X86SchedWriteWidths sched> { 8593 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM, 8594 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 8595 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM, 8596 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8597 8598 // Define only if AVX512VL feature is present. 8599 let Predicates = [HasVLX] in { 8600 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8601 OpNode, sched.XMM, v4f32x_info>, 8602 EVEX_V128, EVEX_CD8<32, CD8VF>; 8603 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), 8604 OpNode, sched.YMM, v8f32x_info>, 8605 EVEX_V256, EVEX_CD8<32, CD8VF>; 8606 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8607 OpNode, sched.XMM, v2f64x_info>, 8608 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 8609 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), 8610 OpNode, sched.YMM, v4f64x_info>, 8611 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 8612 } 8613 } 8614 8615 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>; 8616 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>; 8617 8618 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 8619 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 8620 SDNode OpNode, X86FoldableSchedWrite sched> { 8621 let ExeDomain = _.ExeDomain in { 8622 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8623 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8624 "$src2, $src1", "$src1, $src2", 8625 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8626 (i32 FROUND_CURRENT))>, 8627 Sched<[sched]>; 8628 8629 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8630 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8631 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 8632 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8633 (i32 FROUND_NO_EXC))>, EVEX_B, 8634 Sched<[sched]>; 8635 8636 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8637 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8638 "$src2, $src1", "$src1, $src2", 8639 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, 8640 (i32 FROUND_CURRENT))>, 8641 Sched<[sched.Folded, ReadAfterLd]>; 8642 } 8643 } 8644 8645 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 8646 X86FoldableSchedWrite sched> { 8647 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>, 8648 EVEX_CD8<32, CD8VT1>; 8649 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>, 8650 EVEX_CD8<64, CD8VT1>, VEX_W; 8651 } 8652 8653 let Predicates = [HasERI] in { 8654 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>, 8655 T8PD, EVEX_4V; 8656 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, 8657 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; 8658 } 8659 8660 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, 8661 SchedWriteFRnd.Scl>, T8PD, EVEX_4V; 8662 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 8663 8664 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8665 SDNode OpNode, X86FoldableSchedWrite sched> { 8666 let ExeDomain = _.ExeDomain in { 8667 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8668 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8669 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>, 8670 Sched<[sched]>; 8671 8672 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8673 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8674 (OpNode (_.VT 8675 (bitconvert (_.LdFrag addr:$src))), 8676 (i32 FROUND_CURRENT))>, 8677 Sched<[sched.Folded, ReadAfterLd]>; 8678 8679 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8680 (ins _.ScalarMemOp:$src), OpcodeStr, 8681 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8682 (OpNode (_.VT 8683 (X86VBroadcast (_.ScalarLdFrag addr:$src))), 8684 (i32 FROUND_CURRENT))>, EVEX_B, 8685 Sched<[sched.Folded, ReadAfterLd]>; 8686 } 8687 } 8688 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8689 SDNode OpNode, X86FoldableSchedWrite sched> { 8690 let ExeDomain = _.ExeDomain in 8691 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8692 (ins _.RC:$src), OpcodeStr, 8693 "{sae}, $src", "$src, {sae}", 8694 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, 8695 EVEX_B, Sched<[sched]>; 8696 } 8697 8698 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 8699 X86SchedWriteWidths sched> { 8700 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8701 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 8702 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 8703 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8704 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 8705 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 8706 } 8707 8708 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 8709 SDNode OpNode, X86SchedWriteWidths sched> { 8710 // Define only if AVX512VL feature is present. 8711 let Predicates = [HasVLX] in { 8712 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>, 8713 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 8714 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>, 8715 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 8716 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>, 8717 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8718 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>, 8719 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 8720 } 8721 } 8722 8723 let Predicates = [HasERI] in { 8724 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX; 8725 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX; 8726 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX; 8727 } 8728 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>, 8729 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, 8730 SchedWriteFRnd>, EVEX; 8731 8732 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 8733 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8734 let ExeDomain = _.ExeDomain in 8735 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8736 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 8737 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>, 8738 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8739 } 8740 8741 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 8742 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 8743 let ExeDomain = _.ExeDomain in { 8744 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8745 (ins _.RC:$src), OpcodeStr, "$src", "$src", 8746 (_.VT (fsqrt _.RC:$src))>, EVEX, 8747 Sched<[sched]>; 8748 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8749 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 8750 (fsqrt (_.VT 8751 (bitconvert (_.LdFrag addr:$src))))>, EVEX, 8752 Sched<[sched.Folded, ReadAfterLd]>; 8753 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 8754 (ins _.ScalarMemOp:$src), OpcodeStr, 8755 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, 8756 (fsqrt (_.VT 8757 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, 8758 EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 8759 } 8760 } 8761 8762 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 8763 X86SchedWriteSizes sched> { 8764 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8765 sched.PS.ZMM, v16f32_info>, 8766 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8767 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8768 sched.PD.ZMM, v8f64_info>, 8769 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8770 // Define only if AVX512VL feature is present. 8771 let Predicates = [HasVLX] in { 8772 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8773 sched.PS.XMM, v4f32x_info>, 8774 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 8775 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 8776 sched.PS.YMM, v8f32x_info>, 8777 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 8778 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8779 sched.PD.XMM, v2f64x_info>, 8780 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8781 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 8782 sched.PD.YMM, v4f64x_info>, 8783 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8784 } 8785 } 8786 8787 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 8788 X86SchedWriteSizes sched> { 8789 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 8790 sched.PS.ZMM, v16f32_info>, 8791 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 8792 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 8793 sched.PD.ZMM, v8f64_info>, 8794 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 8795 } 8796 8797 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 8798 X86VectorVTInfo _, string Name> { 8799 let ExeDomain = _.ExeDomain in { 8800 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8801 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 8802 "$src2, $src1", "$src1, $src2", 8803 (X86fsqrtRnds (_.VT _.RC:$src1), 8804 (_.VT _.RC:$src2), 8805 (i32 FROUND_CURRENT))>, 8806 Sched<[sched]>; 8807 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8808 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 8809 "$src2, $src1", "$src1, $src2", 8810 (X86fsqrtRnds (_.VT _.RC:$src1), 8811 _.ScalarIntMemCPat:$src2, 8812 (i32 FROUND_CURRENT))>, 8813 Sched<[sched.Folded, ReadAfterLd]>; 8814 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8815 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 8816 "$rc, $src2, $src1", "$src1, $src2, $rc", 8817 (X86fsqrtRnds (_.VT _.RC:$src1), 8818 (_.VT _.RC:$src2), 8819 (i32 imm:$rc))>, 8820 EVEX_B, EVEX_RC, Sched<[sched]>; 8821 8822 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { 8823 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8824 (ins _.FRC:$src1, _.FRC:$src2), 8825 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8826 Sched<[sched]>; 8827 let mayLoad = 1 in 8828 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8829 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 8830 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8831 Sched<[sched.Folded, ReadAfterLd]>; 8832 } 8833 } 8834 8835 let Predicates = [HasAVX512] in { 8836 def : Pat<(_.EltVT (fsqrt _.FRC:$src)), 8837 (!cast<Instruction>(Name#Zr) 8838 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 8839 } 8840 8841 let Predicates = [HasAVX512, OptForSize] in { 8842 def : Pat<(_.EltVT (fsqrt (load addr:$src))), 8843 (!cast<Instruction>(Name#Zm) 8844 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 8845 } 8846 } 8847 8848 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 8849 X86SchedWriteSizes sched> { 8850 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 8851 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 8852 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 8853 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 8854 } 8855 8856 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 8857 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 8858 8859 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 8860 8861 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 8862 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 8863 let ExeDomain = _.ExeDomain in { 8864 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8865 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 8866 "$src3, $src2, $src1", "$src1, $src2, $src3", 8867 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8868 (i32 imm:$src3)))>, 8869 Sched<[sched]>; 8870 8871 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 8872 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 8873 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 8874 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), 8875 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B, 8876 Sched<[sched]>; 8877 8878 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 8879 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 8880 OpcodeStr, 8881 "$src3, $src2, $src1", "$src1, $src2, $src3", 8882 (_.VT (X86RndScales _.RC:$src1, 8883 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, 8884 Sched<[sched.Folded, ReadAfterLd]>; 8885 8886 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 8887 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 8888 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 8889 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8890 []>, Sched<[sched]>; 8891 8892 let mayLoad = 1 in 8893 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 8894 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 8895 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8896 []>, Sched<[sched.Folded, ReadAfterLd]>; 8897 } 8898 } 8899 8900 let Predicates = [HasAVX512] in { 8901 def : Pat<(ffloor _.FRC:$src), 8902 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8903 _.FRC:$src, (i32 0x9)))>; 8904 def : Pat<(fceil _.FRC:$src), 8905 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8906 _.FRC:$src, (i32 0xa)))>; 8907 def : Pat<(ftrunc _.FRC:$src), 8908 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8909 _.FRC:$src, (i32 0xb)))>; 8910 def : Pat<(frint _.FRC:$src), 8911 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8912 _.FRC:$src, (i32 0x4)))>; 8913 def : Pat<(fnearbyint _.FRC:$src), 8914 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), 8915 _.FRC:$src, (i32 0xc)))>; 8916 } 8917 8918 let Predicates = [HasAVX512, OptForSize] in { 8919 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), 8920 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8921 addr:$src, (i32 0x9)))>; 8922 def : Pat<(fceil (_.ScalarLdFrag addr:$src)), 8923 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8924 addr:$src, (i32 0xa)))>; 8925 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), 8926 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8927 addr:$src, (i32 0xb)))>; 8928 def : Pat<(frint (_.ScalarLdFrag addr:$src)), 8929 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8930 addr:$src, (i32 0x4)))>; 8931 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), 8932 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), 8933 addr:$src, (i32 0xc)))>; 8934 } 8935 } 8936 8937 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 8938 SchedWriteFRnd.Scl, f32x_info>, 8939 AVX512AIi8Base, EVEX_4V, 8940 EVEX_CD8<32, CD8VT1>; 8941 8942 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 8943 SchedWriteFRnd.Scl, f64x_info>, 8944 VEX_W, AVX512AIi8Base, EVEX_4V, 8945 EVEX_CD8<64, CD8VT1>; 8946 8947 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 8948 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 8949 dag OutMask, Predicate BasePredicate> { 8950 let Predicates = [BasePredicate] in { 8951 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 8952 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 8953 (extractelt _.VT:$dst, (iPTR 0))))), 8954 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 8955 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 8956 8957 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, 8958 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 8959 ZeroFP))), 8960 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 8961 OutMask, _.VT:$src2, _.VT:$src1)>; 8962 } 8963 } 8964 8965 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 8966 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 8967 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 8968 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 8969 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 8970 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 8971 8972 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move, 8973 X86VectorVTInfo _, PatLeaf ZeroFP, 8974 bits<8> ImmV, Predicate BasePredicate> { 8975 let Predicates = [BasePredicate] in { 8976 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask, 8977 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 8978 (extractelt _.VT:$dst, (iPTR 0))))), 8979 (!cast<Instruction>("V"#OpcPrefix#Zr_Intk) 8980 _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; 8981 8982 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask, 8983 (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), 8984 (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz) 8985 VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; 8986 } 8987 } 8988 8989 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss, 8990 v4f32x_info, fp32imm0, 0x01, HasAVX512>; 8991 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss, 8992 v4f32x_info, fp32imm0, 0x02, HasAVX512>; 8993 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd, 8994 v2f64x_info, fp64imm0, 0x01, HasAVX512>; 8995 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd, 8996 v2f64x_info, fp64imm0, 0x02, HasAVX512>; 8997 8998 8999 //------------------------------------------------- 9000 // Integer truncate and extend operations 9001 //------------------------------------------------- 9002 9003 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9004 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9005 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9006 let ExeDomain = DestInfo.ExeDomain in 9007 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst), 9008 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1", 9009 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>, 9010 EVEX, T8XS, Sched<[sched]>; 9011 9012 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9013 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9014 (ins x86memop:$dst, SrcInfo.RC:$src), 9015 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9016 EVEX, Sched<[sched.Folded]>; 9017 9018 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9019 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9020 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9021 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9022 }//mayStore = 1, hasSideEffects = 0 9023 } 9024 9025 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9026 X86VectorVTInfo DestInfo, 9027 PatFrag truncFrag, PatFrag mtruncFrag, 9028 string Name> { 9029 9030 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9031 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr) 9032 addr:$dst, SrcInfo.RC:$src)>; 9033 9034 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, 9035 (SrcInfo.VT SrcInfo.RC:$src)), 9036 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk) 9037 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9038 } 9039 9040 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9041 SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched, 9042 AVX512VLVectorVTInfo VTSrcInfo, 9043 X86VectorVTInfo DestInfoZ128, 9044 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9045 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9046 X86MemOperand x86memopZ, PatFrag truncFrag, 9047 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9048 9049 let Predicates = [HasVLX, prd] in { 9050 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched, 9051 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9052 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, 9053 truncFrag, mtruncFrag, NAME>, EVEX_V128; 9054 9055 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched, 9056 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9057 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, 9058 truncFrag, mtruncFrag, NAME>, EVEX_V256; 9059 } 9060 let Predicates = [prd] in 9061 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched, 9062 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9063 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, 9064 truncFrag, mtruncFrag, NAME>, EVEX_V512; 9065 } 9066 9067 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9068 X86FoldableSchedWrite sched, PatFrag StoreNode, 9069 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9070 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched, 9071 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9072 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9073 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9074 } 9075 9076 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9077 X86FoldableSchedWrite sched, PatFrag StoreNode, 9078 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9079 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched, 9080 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9081 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9082 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9083 } 9084 9085 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9086 X86FoldableSchedWrite sched, PatFrag StoreNode, 9087 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9088 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched, 9089 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9090 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9091 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9092 } 9093 9094 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9095 X86FoldableSchedWrite sched, PatFrag StoreNode, 9096 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9097 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched, 9098 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9099 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9100 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9101 } 9102 9103 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9104 X86FoldableSchedWrite sched, PatFrag StoreNode, 9105 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9106 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched, 9107 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9108 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9109 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9110 } 9111 9112 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9113 X86FoldableSchedWrite sched, PatFrag StoreNode, 9114 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { 9115 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9116 sched, avx512vl_i16_info, v16i8x_info, v16i8x_info, 9117 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9118 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9119 } 9120 9121 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256, 9122 truncstorevi8, masked_truncstorevi8, X86vtrunc>; 9123 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256, 9124 truncstore_s_vi8, masked_truncstore_s_vi8>; 9125 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256, 9126 truncstore_us_vi8, masked_truncstore_us_vi8>; 9127 9128 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256, 9129 truncstorevi16, masked_truncstorevi16, X86vtrunc>; 9130 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256, 9131 truncstore_s_vi16, masked_truncstore_s_vi16>; 9132 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256, 9133 truncstore_us_vi16, masked_truncstore_us_vi16>; 9134 9135 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256, 9136 truncstorevi32, masked_truncstorevi32, X86vtrunc>; 9137 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256, 9138 truncstore_s_vi32, masked_truncstore_s_vi32>; 9139 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256, 9140 truncstore_us_vi32, masked_truncstore_us_vi32>; 9141 9142 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256, 9143 truncstorevi8, masked_truncstorevi8, X86vtrunc>; 9144 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256, 9145 truncstore_s_vi8, masked_truncstore_s_vi8>; 9146 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256, 9147 truncstore_us_vi8, masked_truncstore_us_vi8>; 9148 9149 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256, 9150 truncstorevi16, masked_truncstorevi16, X86vtrunc>; 9151 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256, 9152 truncstore_s_vi16, masked_truncstore_s_vi16>; 9153 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256, 9154 truncstore_us_vi16, masked_truncstore_us_vi16>; 9155 9156 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256, 9157 truncstorevi8, masked_truncstorevi8, X86vtrunc>; 9158 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256, 9159 truncstore_s_vi8, masked_truncstore_s_vi8>; 9160 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256, 9161 truncstore_us_vi8, masked_truncstore_us_vi8>; 9162 9163 let Predicates = [HasAVX512, NoVLX] in { 9164 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9165 (v8i16 (EXTRACT_SUBREG 9166 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9167 VR256X:$src, sub_ymm)))), sub_xmm))>; 9168 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9169 (v4i32 (EXTRACT_SUBREG 9170 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9171 VR256X:$src, sub_ymm)))), sub_xmm))>; 9172 } 9173 9174 let Predicates = [HasBWI, NoVLX] in { 9175 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9176 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9177 VR256X:$src, sub_ymm))), sub_xmm))>; 9178 } 9179 9180 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9181 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9182 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9183 let ExeDomain = DestInfo.ExeDomain in { 9184 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9185 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9186 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9187 EVEX, Sched<[sched]>; 9188 9189 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9190 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9191 (DestInfo.VT (LdFrag addr:$src))>, 9192 EVEX, Sched<[sched.Folded]>; 9193 } 9194 } 9195 9196 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, 9197 SDNode OpNode, SDNode InVecNode, string ExtTy, 9198 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9199 let Predicates = [HasVLX, HasBWI] in { 9200 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, 9201 v16i8x_info, i64mem, LdFrag, InVecNode>, 9202 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9203 9204 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, 9205 v16i8x_info, i128mem, LdFrag, OpNode>, 9206 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9207 } 9208 let Predicates = [HasBWI] in { 9209 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, 9210 v32i8x_info, i256mem, LdFrag, OpNode>, 9211 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9212 } 9213 } 9214 9215 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, 9216 SDNode OpNode, SDNode InVecNode, string ExtTy, 9217 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9218 let Predicates = [HasVLX, HasAVX512] in { 9219 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9220 v16i8x_info, i32mem, LdFrag, InVecNode>, 9221 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9222 9223 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9224 v16i8x_info, i64mem, LdFrag, OpNode>, 9225 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9226 } 9227 let Predicates = [HasAVX512] in { 9228 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9229 v16i8x_info, i128mem, LdFrag, OpNode>, 9230 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9231 } 9232 } 9233 9234 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, 9235 SDNode OpNode, SDNode InVecNode, string ExtTy, 9236 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9237 let Predicates = [HasVLX, HasAVX512] in { 9238 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9239 v16i8x_info, i16mem, LdFrag, InVecNode>, 9240 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 9241 9242 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9243 v16i8x_info, i32mem, LdFrag, OpNode>, 9244 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 9245 } 9246 let Predicates = [HasAVX512] in { 9247 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9248 v16i8x_info, i64mem, LdFrag, OpNode>, 9249 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 9250 } 9251 } 9252 9253 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, 9254 SDNode OpNode, SDNode InVecNode, string ExtTy, 9255 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9256 let Predicates = [HasVLX, HasAVX512] in { 9257 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, 9258 v8i16x_info, i64mem, LdFrag, InVecNode>, 9259 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 9260 9261 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, 9262 v8i16x_info, i128mem, LdFrag, OpNode>, 9263 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 9264 } 9265 let Predicates = [HasAVX512] in { 9266 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, 9267 v16i16x_info, i256mem, LdFrag, OpNode>, 9268 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 9269 } 9270 } 9271 9272 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, 9273 SDNode OpNode, SDNode InVecNode, string ExtTy, 9274 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9275 let Predicates = [HasVLX, HasAVX512] in { 9276 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9277 v8i16x_info, i32mem, LdFrag, InVecNode>, 9278 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 9279 9280 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9281 v8i16x_info, i64mem, LdFrag, OpNode>, 9282 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 9283 } 9284 let Predicates = [HasAVX512] in { 9285 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9286 v8i16x_info, i128mem, LdFrag, OpNode>, 9287 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 9288 } 9289 } 9290 9291 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, 9292 SDNode OpNode, SDNode InVecNode, string ExtTy, 9293 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9294 9295 let Predicates = [HasVLX, HasAVX512] in { 9296 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, 9297 v4i32x_info, i64mem, LdFrag, InVecNode>, 9298 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 9299 9300 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, 9301 v4i32x_info, i128mem, LdFrag, OpNode>, 9302 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 9303 } 9304 let Predicates = [HasAVX512] in { 9305 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, 9306 v8i32x_info, i256mem, LdFrag, OpNode>, 9307 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 9308 } 9309 } 9310 9311 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>; 9312 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>; 9313 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>; 9314 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>; 9315 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>; 9316 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>; 9317 9318 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>; 9319 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>; 9320 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>; 9321 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>; 9322 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>; 9323 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>; 9324 9325 9326 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 9327 SDNode InVecOp> { 9328 // 128-bit patterns 9329 let Predicates = [HasVLX, HasBWI] in { 9330 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9331 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9332 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9333 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9334 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), 9335 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9336 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), 9337 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9338 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), 9339 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 9340 } 9341 let Predicates = [HasVLX] in { 9342 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9343 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9344 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), 9345 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9346 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), 9347 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9348 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), 9349 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 9350 9351 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 9352 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9353 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), 9354 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9355 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), 9356 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9357 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))), 9358 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 9359 9360 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9361 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9362 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9363 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9364 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), 9365 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9366 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), 9367 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9368 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))), 9369 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 9370 9371 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9372 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9373 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))), 9374 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9375 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), 9376 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9377 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))), 9378 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 9379 9380 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9381 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9382 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 9383 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9384 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))), 9385 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9386 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), 9387 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9388 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))), 9389 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 9390 } 9391 // 256-bit patterns 9392 let Predicates = [HasVLX, HasBWI] in { 9393 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9394 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9395 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 9396 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9397 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 9398 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 9399 } 9400 let Predicates = [HasVLX] in { 9401 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9402 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9403 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 9404 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9405 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 9406 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9407 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9408 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 9409 9410 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 9411 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9412 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 9413 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9414 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 9415 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9416 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9417 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 9418 9419 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 9420 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9421 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 9422 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9423 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 9424 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 9425 9426 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9427 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9428 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 9429 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9430 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 9431 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9432 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 9433 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 9434 9435 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 9436 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9437 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), 9438 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9439 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), 9440 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 9441 } 9442 // 512-bit patterns 9443 let Predicates = [HasBWI] in { 9444 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))), 9445 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 9446 } 9447 let Predicates = [HasAVX512] in { 9448 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9449 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 9450 9451 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 9452 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9453 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 9454 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 9455 9456 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))), 9457 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 9458 9459 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 9460 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 9461 9462 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))), 9463 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 9464 } 9465 } 9466 9467 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>; 9468 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>; 9469 9470 //===----------------------------------------------------------------------===// 9471 // GATHER - SCATTER Operations 9472 9473 // FIXME: Improve scheduling of gather/scatter instructions. 9474 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9475 X86MemOperand memop, PatFrag GatherNode, 9476 RegisterClass MaskRC = _.KRCWM> { 9477 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 9478 ExeDomain = _.ExeDomain in 9479 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 9480 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 9481 !strconcat(OpcodeStr#_.Suffix, 9482 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 9483 [(set _.RC:$dst, MaskRC:$mask_wb, 9484 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, 9485 vectoraddr:$src2))]>, EVEX, EVEX_K, 9486 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; 9487 } 9488 9489 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 9490 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9491 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, 9492 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W; 9493 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, 9494 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W; 9495 let Predicates = [HasVLX] in { 9496 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9497 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W; 9498 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, 9499 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W; 9500 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9501 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W; 9502 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9503 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W; 9504 } 9505 } 9506 9507 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 9508 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9509 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem, 9510 mgatherv16i32>, EVEX_V512; 9511 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem, 9512 mgatherv8i64>, EVEX_V512; 9513 let Predicates = [HasVLX] in { 9514 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, 9515 vy256xmem, mgatherv8i32>, EVEX_V256; 9516 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9517 vy128xmem, mgatherv4i64>, EVEX_V256; 9518 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, 9519 vx128xmem, mgatherv4i32>, EVEX_V128; 9520 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, 9521 vx64xmem, mgatherv2i64, VK2WM>, 9522 EVEX_V128; 9523 } 9524 } 9525 9526 9527 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 9528 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 9529 9530 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 9531 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 9532 9533 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9534 X86MemOperand memop, PatFrag ScatterNode, 9535 RegisterClass MaskRC = _.KRCWM> { 9536 9537 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in 9538 9539 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 9540 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 9541 !strconcat(OpcodeStr#_.Suffix, 9542 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 9543 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), 9544 MaskRC:$mask, vectoraddr:$dst))]>, 9545 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9546 Sched<[WriteStore]>; 9547 } 9548 9549 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 9550 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9551 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, 9552 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W; 9553 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, 9554 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W; 9555 let Predicates = [HasVLX] in { 9556 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9557 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W; 9558 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, 9559 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W; 9560 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9561 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W; 9562 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9563 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W; 9564 } 9565 } 9566 9567 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 9568 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 9569 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem, 9570 mscatterv16i32>, EVEX_V512; 9571 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem, 9572 mscatterv8i64>, EVEX_V512; 9573 let Predicates = [HasVLX] in { 9574 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, 9575 vy256xmem, mscatterv8i32>, EVEX_V256; 9576 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9577 vy128xmem, mscatterv4i64>, EVEX_V256; 9578 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, 9579 vx128xmem, mscatterv4i32>, EVEX_V128; 9580 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, 9581 vx64xmem, mscatterv2i64, VK2WM>, 9582 EVEX_V128; 9583 } 9584 } 9585 9586 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 9587 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 9588 9589 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 9590 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 9591 9592 // prefetch 9593 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 9594 RegisterClass KRC, X86MemOperand memop> { 9595 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 9596 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 9597 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 9598 EVEX, EVEX_K, Sched<[WriteLoad]>; 9599 } 9600 9601 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 9602 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9603 9604 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 9605 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9606 9607 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 9608 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9609 9610 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 9611 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9612 9613 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 9614 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9615 9616 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 9617 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9618 9619 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 9620 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9621 9622 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 9623 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9624 9625 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 9626 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9627 9628 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 9629 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9630 9631 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 9632 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9633 9634 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 9635 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9636 9637 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 9638 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 9639 9640 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 9641 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 9642 9643 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 9644 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 9645 9646 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 9647 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 9648 9649 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { 9650 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 9651 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 9652 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 9653 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? 9654 } 9655 9656 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 9657 string OpcodeStr, Predicate prd> { 9658 let Predicates = [prd] in 9659 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; 9660 9661 let Predicates = [prd, HasVLX] in { 9662 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; 9663 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; 9664 } 9665 } 9666 9667 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 9668 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 9669 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 9670 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 9671 9672 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 9673 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 9674 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 9675 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 9676 EVEX, Sched<[WriteMove]>; 9677 } 9678 9679 // Use 512bit version to implement 128/256 bit in case NoVLX. 9680 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 9681 X86VectorVTInfo _, 9682 string Name> { 9683 9684 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 9685 (_.KVT (COPY_TO_REGCLASS 9686 (!cast<Instruction>(Name#"Zrr") 9687 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 9688 _.RC:$src, _.SubRegIdx)), 9689 _.KRC))>; 9690 } 9691 9692 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 9693 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 9694 let Predicates = [prd] in 9695 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 9696 EVEX_V512; 9697 9698 let Predicates = [prd, HasVLX] in { 9699 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 9700 EVEX_V256; 9701 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 9702 EVEX_V128; 9703 } 9704 let Predicates = [prd, NoVLX] in { 9705 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 9706 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 9707 } 9708 } 9709 9710 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 9711 avx512vl_i8_info, HasBWI>; 9712 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 9713 avx512vl_i16_info, HasBWI>, VEX_W; 9714 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 9715 avx512vl_i32_info, HasDQI>; 9716 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 9717 avx512vl_i64_info, HasDQI>, VEX_W; 9718 9719 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 9720 // is available, but BWI is not. We can't handle this in lowering because 9721 // a target independent DAG combine likes to combine sext and trunc. 9722 let Predicates = [HasDQI, NoBWI] in { 9723 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 9724 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9725 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 9726 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 9727 } 9728 9729 //===----------------------------------------------------------------------===// 9730 // AVX-512 - COMPRESS and EXPAND 9731 // 9732 9733 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 9734 string OpcodeStr, X86FoldableSchedWrite sched> { 9735 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 9736 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9737 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase, 9738 Sched<[sched]>; 9739 9740 let mayStore = 1, hasSideEffects = 0 in 9741 def mr : AVX5128I<opc, MRMDestMem, (outs), 9742 (ins _.MemOp:$dst, _.RC:$src), 9743 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9744 []>, EVEX_CD8<_.EltSize, CD8VT1>, 9745 Sched<[sched.Folded]>; 9746 9747 def mrk : AVX5128I<opc, MRMDestMem, (outs), 9748 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 9749 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9750 []>, 9751 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 9752 Sched<[sched.Folded]>; 9753 } 9754 9755 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9756 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask, 9757 (_.VT _.RC:$src)), 9758 (!cast<Instruction>(Name#_.ZSuffix##mrk) 9759 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 9760 } 9761 9762 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 9763 X86FoldableSchedWrite sched, 9764 AVX512VLVectorVTInfo VTInfo, 9765 Predicate Pred = HasAVX512> { 9766 let Predicates = [Pred] in 9767 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 9768 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9769 9770 let Predicates = [Pred, HasVLX] in { 9771 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 9772 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9773 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 9774 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9775 } 9776 } 9777 9778 // FIXME: Is there a better scheduler class for VPCOMPRESS? 9779 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 9780 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 9781 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 9782 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 9783 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 9784 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 9785 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 9786 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 9787 9788 // expand 9789 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 9790 string OpcodeStr, X86FoldableSchedWrite sched> { 9791 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9792 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 9793 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase, 9794 Sched<[sched]>; 9795 9796 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9797 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 9798 (_.VT (X86expand (_.VT (bitconvert 9799 (_.LdFrag addr:$src1)))))>, 9800 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 9801 Sched<[sched.Folded, ReadAfterLd]>; 9802 } 9803 9804 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 9805 9806 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 9807 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 9808 _.KRCWM:$mask, addr:$src)>; 9809 9810 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 9811 (!cast<Instruction>(Name#_.ZSuffix##rmkz) 9812 _.KRCWM:$mask, addr:$src)>; 9813 9814 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 9815 (_.VT _.RC:$src0))), 9816 (!cast<Instruction>(Name#_.ZSuffix##rmk) 9817 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 9818 } 9819 9820 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 9821 X86FoldableSchedWrite sched, 9822 AVX512VLVectorVTInfo VTInfo, 9823 Predicate Pred = HasAVX512> { 9824 let Predicates = [Pred] in 9825 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 9826 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 9827 9828 let Predicates = [Pred, HasVLX] in { 9829 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 9830 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 9831 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 9832 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 9833 } 9834 } 9835 9836 // FIXME: Is there a better scheduler class for VPEXPAND? 9837 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 9838 avx512vl_i32_info>, EVEX; 9839 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 9840 avx512vl_i64_info>, EVEX, VEX_W; 9841 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 9842 avx512vl_f32_info>, EVEX; 9843 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 9844 avx512vl_f64_info>, EVEX, VEX_W; 9845 9846 //handle instruction reg_vec1 = op(reg_vec,imm) 9847 // op(mem_vec,imm) 9848 // op(broadcast(eltVt),imm) 9849 //all instruction created with FROUND_CURRENT 9850 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9851 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9852 let ExeDomain = _.ExeDomain in { 9853 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9854 (ins _.RC:$src1, i32u8imm:$src2), 9855 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 9856 (OpNode (_.VT _.RC:$src1), 9857 (i32 imm:$src2))>, Sched<[sched]>; 9858 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9859 (ins _.MemOp:$src1, i32u8imm:$src2), 9860 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", 9861 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 9862 (i32 imm:$src2))>, 9863 Sched<[sched.Folded, ReadAfterLd]>; 9864 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9865 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 9866 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, 9867 "${src1}"##_.BroadcastStr##", $src2", 9868 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), 9869 (i32 imm:$src2))>, EVEX_B, 9870 Sched<[sched.Folded, ReadAfterLd]>; 9871 } 9872 } 9873 9874 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 9875 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 9876 SDNode OpNode, X86FoldableSchedWrite sched, 9877 X86VectorVTInfo _> { 9878 let ExeDomain = _.ExeDomain in 9879 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9880 (ins _.RC:$src1, i32u8imm:$src2), 9881 OpcodeStr##_.Suffix, "$src2, {sae}, $src1", 9882 "$src1, {sae}, $src2", 9883 (OpNode (_.VT _.RC:$src1), 9884 (i32 imm:$src2), 9885 (i32 FROUND_NO_EXC))>, 9886 EVEX_B, Sched<[sched]>; 9887 } 9888 9889 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 9890 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 9891 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ 9892 let Predicates = [prd] in { 9893 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, 9894 _.info512>, 9895 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, 9896 sched.ZMM, _.info512>, EVEX_V512; 9897 } 9898 let Predicates = [prd, HasVLX] in { 9899 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, 9900 _.info128>, EVEX_V128; 9901 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, 9902 _.info256>, EVEX_V256; 9903 } 9904 } 9905 9906 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9907 // op(reg_vec2,mem_vec,imm) 9908 // op(reg_vec2,broadcast(eltVt),imm) 9909 //all instruction created with FROUND_CURRENT 9910 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9911 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9912 let ExeDomain = _.ExeDomain in { 9913 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9914 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 9915 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9916 (OpNode (_.VT _.RC:$src1), 9917 (_.VT _.RC:$src2), 9918 (i32 imm:$src3))>, 9919 Sched<[sched]>; 9920 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9921 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 9922 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9923 (OpNode (_.VT _.RC:$src1), 9924 (_.VT (bitconvert (_.LdFrag addr:$src2))), 9925 (i32 imm:$src3))>, 9926 Sched<[sched.Folded, ReadAfterLd]>; 9927 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9928 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9929 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 9930 "$src1, ${src2}"##_.BroadcastStr##", $src3", 9931 (OpNode (_.VT _.RC:$src1), 9932 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 9933 (i32 imm:$src3))>, EVEX_B, 9934 Sched<[sched.Folded, ReadAfterLd]>; 9935 } 9936 } 9937 9938 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9939 // op(reg_vec2,mem_vec,imm) 9940 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 9941 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 9942 X86VectorVTInfo SrcInfo>{ 9943 let ExeDomain = DestInfo.ExeDomain in { 9944 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9945 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 9946 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9947 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 9948 (SrcInfo.VT SrcInfo.RC:$src2), 9949 (i8 imm:$src3)))>, 9950 Sched<[sched]>; 9951 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9952 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 9953 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9954 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 9955 (SrcInfo.VT (bitconvert 9956 (SrcInfo.LdFrag addr:$src2))), 9957 (i8 imm:$src3)))>, 9958 Sched<[sched.Folded, ReadAfterLd]>; 9959 } 9960 } 9961 9962 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9963 // op(reg_vec2,mem_vec,imm) 9964 // op(reg_vec2,broadcast(eltVt),imm) 9965 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 9966 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 9967 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 9968 9969 let ExeDomain = _.ExeDomain in 9970 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9971 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 9972 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 9973 "$src1, ${src2}"##_.BroadcastStr##", $src3", 9974 (OpNode (_.VT _.RC:$src1), 9975 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 9976 (i8 imm:$src3))>, EVEX_B, 9977 Sched<[sched.Folded, ReadAfterLd]>; 9978 } 9979 9980 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 9981 // op(reg_vec2,mem_scalar,imm) 9982 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 9983 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9984 let ExeDomain = _.ExeDomain in { 9985 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9986 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 9987 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9988 (OpNode (_.VT _.RC:$src1), 9989 (_.VT _.RC:$src2), 9990 (i32 imm:$src3))>, 9991 Sched<[sched]>; 9992 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9993 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9994 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 9995 (OpNode (_.VT _.RC:$src1), 9996 (_.VT (scalar_to_vector 9997 (_.ScalarLdFrag addr:$src2))), 9998 (i32 imm:$src3))>, 9999 Sched<[sched.Folded, ReadAfterLd]>; 10000 } 10001 } 10002 10003 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10004 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10005 SDNode OpNode, X86FoldableSchedWrite sched, 10006 X86VectorVTInfo _> { 10007 let ExeDomain = _.ExeDomain in 10008 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10009 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10010 OpcodeStr, "$src3, {sae}, $src2, $src1", 10011 "$src1, $src2, {sae}, $src3", 10012 (OpNode (_.VT _.RC:$src1), 10013 (_.VT _.RC:$src2), 10014 (i32 imm:$src3), 10015 (i32 FROUND_NO_EXC))>, 10016 EVEX_B, Sched<[sched]>; 10017 } 10018 10019 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10020 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10021 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10022 let ExeDomain = _.ExeDomain in 10023 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10024 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10025 OpcodeStr, "$src3, {sae}, $src2, $src1", 10026 "$src1, $src2, {sae}, $src3", 10027 (OpNode (_.VT _.RC:$src1), 10028 (_.VT _.RC:$src2), 10029 (i32 imm:$src3), 10030 (i32 FROUND_NO_EXC))>, 10031 EVEX_B, Sched<[sched]>; 10032 } 10033 10034 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10035 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10036 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ 10037 let Predicates = [prd] in { 10038 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10039 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>, 10040 EVEX_V512; 10041 10042 } 10043 let Predicates = [prd, HasVLX] in { 10044 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10045 EVEX_V128; 10046 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10047 EVEX_V256; 10048 } 10049 } 10050 10051 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10052 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10053 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10054 let Predicates = [Pred] in { 10055 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10056 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10057 } 10058 let Predicates = [Pred, HasVLX] in { 10059 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10060 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10061 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10062 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10063 } 10064 } 10065 10066 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10067 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10068 Predicate Pred = HasAVX512> { 10069 let Predicates = [Pred] in { 10070 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10071 EVEX_V512; 10072 } 10073 let Predicates = [Pred, HasVLX] in { 10074 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10075 EVEX_V128; 10076 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10077 EVEX_V256; 10078 } 10079 } 10080 10081 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10082 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10083 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> { 10084 let Predicates = [prd] in { 10085 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10086 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>; 10087 } 10088 } 10089 10090 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10091 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, 10092 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ 10093 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10094 opcPs, OpNode, OpNodeRnd, sched, prd>, 10095 EVEX_CD8<32, CD8VF>; 10096 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10097 opcPd, OpNode, OpNodeRnd, sched, prd>, 10098 EVEX_CD8<64, CD8VF>, VEX_W; 10099 } 10100 10101 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10102 X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>, 10103 AVX512AIi8Base, EVEX; 10104 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10105 X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>, 10106 AVX512AIi8Base, EVEX; 10107 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10108 X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>, 10109 AVX512AIi8Base, EVEX; 10110 10111 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10112 0x50, X86VRange, X86VRangeRnd, 10113 SchedWriteFAdd, HasDQI>, 10114 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10115 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10116 0x50, X86VRange, X86VRangeRnd, 10117 SchedWriteFAdd, HasDQI>, 10118 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10119 10120 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10121 f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, 10122 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10123 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10124 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, 10125 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10126 10127 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10128 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>, 10129 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10130 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10131 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>, 10132 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10133 10134 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10135 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>, 10136 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 10137 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10138 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>, 10139 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 10140 10141 10142 multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> { 10143 // Register 10144 def : Pat<(_.VT (ffloor _.RC:$src)), 10145 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10146 _.RC:$src, (i32 0x9))>; 10147 def : Pat<(_.VT (fnearbyint _.RC:$src)), 10148 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10149 _.RC:$src, (i32 0xC))>; 10150 def : Pat<(_.VT (fceil _.RC:$src)), 10151 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10152 _.RC:$src, (i32 0xA))>; 10153 def : Pat<(_.VT (frint _.RC:$src)), 10154 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10155 _.RC:$src, (i32 0x4))>; 10156 def : Pat<(_.VT (ftrunc _.RC:$src)), 10157 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri") 10158 _.RC:$src, (i32 0xB))>; 10159 10160 // Merge-masking 10161 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)), 10162 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10163 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>; 10164 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)), 10165 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10166 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>; 10167 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)), 10168 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10169 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>; 10170 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)), 10171 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10172 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>; 10173 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)), 10174 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik") 10175 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>; 10176 10177 // Zero-masking 10178 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), 10179 _.ImmAllZerosV)), 10180 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10181 _.KRCWM:$mask, _.RC:$src, (i32 0x9))>; 10182 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), 10183 _.ImmAllZerosV)), 10184 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10185 _.KRCWM:$mask, _.RC:$src, (i32 0xC))>; 10186 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), 10187 _.ImmAllZerosV)), 10188 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10189 _.KRCWM:$mask, _.RC:$src, (i32 0xA))>; 10190 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), 10191 _.ImmAllZerosV)), 10192 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10193 _.KRCWM:$mask, _.RC:$src, (i32 0x4))>; 10194 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), 10195 _.ImmAllZerosV)), 10196 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz") 10197 _.KRCWM:$mask, _.RC:$src, (i32 0xB))>; 10198 10199 // Load 10200 def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))), 10201 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10202 addr:$src, (i32 0x9))>; 10203 def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))), 10204 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10205 addr:$src, (i32 0xC))>; 10206 def : Pat<(_.VT (fceil (_.LdFrag addr:$src))), 10207 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10208 addr:$src, (i32 0xA))>; 10209 def : Pat<(_.VT (frint (_.LdFrag addr:$src))), 10210 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10211 addr:$src, (i32 0x4))>; 10212 def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))), 10213 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi") 10214 addr:$src, (i32 0xB))>; 10215 10216 // Merge-masking + load 10217 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)), 10218 _.RC:$dst)), 10219 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10220 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10221 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)), 10222 _.RC:$dst)), 10223 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10224 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10225 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)), 10226 _.RC:$dst)), 10227 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10228 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10229 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)), 10230 _.RC:$dst)), 10231 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10232 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10233 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)), 10234 _.RC:$dst)), 10235 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik") 10236 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10237 10238 // Zero-masking + load 10239 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)), 10240 _.ImmAllZerosV)), 10241 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10242 _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10243 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)), 10244 _.ImmAllZerosV)), 10245 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10246 _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10247 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)), 10248 _.ImmAllZerosV)), 10249 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10250 _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10251 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)), 10252 _.ImmAllZerosV)), 10253 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10254 _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10255 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)), 10256 _.ImmAllZerosV)), 10257 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz") 10258 _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10259 10260 // Broadcast load 10261 def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10262 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10263 addr:$src, (i32 0x9))>; 10264 def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10265 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10266 addr:$src, (i32 0xC))>; 10267 def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10268 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10269 addr:$src, (i32 0xA))>; 10270 def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10271 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10272 addr:$src, (i32 0x4))>; 10273 def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))), 10274 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi") 10275 addr:$src, (i32 0xB))>; 10276 10277 // Merge-masking + broadcast load 10278 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10279 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10280 _.RC:$dst)), 10281 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10282 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10283 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10284 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10285 _.RC:$dst)), 10286 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10287 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10288 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10289 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10290 _.RC:$dst)), 10291 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10292 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10293 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10294 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10295 _.RC:$dst)), 10296 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10297 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10298 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10299 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10300 _.RC:$dst)), 10301 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik") 10302 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10303 10304 // Zero-masking + broadcast load 10305 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10306 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10307 _.ImmAllZerosV)), 10308 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10309 _.KRCWM:$mask, addr:$src, (i32 0x9))>; 10310 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10311 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10312 _.ImmAllZerosV)), 10313 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10314 _.KRCWM:$mask, addr:$src, (i32 0xC))>; 10315 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10316 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10317 _.ImmAllZerosV)), 10318 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10319 _.KRCWM:$mask, addr:$src, (i32 0xA))>; 10320 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10321 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10322 _.ImmAllZerosV)), 10323 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10324 _.KRCWM:$mask, addr:$src, (i32 0x4))>; 10325 def : Pat<(_.VT (vselect _.KRCWM:$mask, 10326 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))), 10327 _.ImmAllZerosV)), 10328 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz") 10329 _.KRCWM:$mask, addr:$src, (i32 0xB))>; 10330 } 10331 10332 let Predicates = [HasAVX512] in { 10333 defm : AVX512_rndscale_lowering<v16f32_info, "PS">; 10334 defm : AVX512_rndscale_lowering<v8f64_info, "PD">; 10335 } 10336 10337 let Predicates = [HasVLX] in { 10338 defm : AVX512_rndscale_lowering<v8f32x_info, "PS">; 10339 defm : AVX512_rndscale_lowering<v4f64x_info, "PD">; 10340 defm : AVX512_rndscale_lowering<v4f32x_info, "PS">; 10341 defm : AVX512_rndscale_lowering<v2f64x_info, "PD">; 10342 } 10343 10344 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10345 X86FoldableSchedWrite sched, 10346 X86VectorVTInfo _, 10347 X86VectorVTInfo CastInfo, 10348 string EVEX2VEXOvrd> { 10349 let ExeDomain = _.ExeDomain in { 10350 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10351 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10352 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10353 (_.VT (bitconvert 10354 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10355 (i8 imm:$src3)))))>, 10356 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 10357 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10358 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10359 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10360 (_.VT 10361 (bitconvert 10362 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10363 (bitconvert (_.LdFrag addr:$src2)), 10364 (i8 imm:$src3)))))>, 10365 Sched<[sched.Folded, ReadAfterLd]>, 10366 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 10367 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10368 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10369 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10370 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10371 (_.VT 10372 (bitconvert 10373 (CastInfo.VT 10374 (X86Shuf128 _.RC:$src1, 10375 (X86VBroadcast (_.ScalarLdFrag addr:$src2)), 10376 (i8 imm:$src3)))))>, EVEX_B, 10377 Sched<[sched.Folded, ReadAfterLd]>; 10378 } 10379 } 10380 10381 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10382 AVX512VLVectorVTInfo _, 10383 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 10384 string EVEX2VEXOvrd>{ 10385 let Predicates = [HasAVX512] in 10386 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10387 _.info512, CastInfo.info512, "">, EVEX_V512; 10388 10389 let Predicates = [HasAVX512, HasVLX] in 10390 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10391 _.info256, CastInfo.info256, 10392 EVEX2VEXOvrd>, EVEX_V256; 10393 } 10394 10395 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10396 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10397 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10398 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10399 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10400 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 10401 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10402 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 10403 10404 let Predicates = [HasAVX512] in { 10405 // Provide fallback in case the load node that is used in the broadcast 10406 // patterns above is used by additional users, which prevents the pattern 10407 // selection. 10408 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))), 10409 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10410 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10411 0)>; 10412 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))), 10413 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10414 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10415 0)>; 10416 10417 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))), 10418 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10419 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10420 0)>; 10421 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))), 10422 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10423 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10424 0)>; 10425 10426 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))), 10427 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10428 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10429 0)>; 10430 10431 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 10432 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10433 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 10434 0)>; 10435 } 10436 10437 multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10438 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10439 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 10440 // instantiation of this class. 10441 let ExeDomain = _.ExeDomain in { 10442 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10443 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10444 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10445 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>, 10446 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 10447 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10448 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10449 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10450 (_.VT (X86VAlign _.RC:$src1, 10451 (bitconvert (_.LdFrag addr:$src2)), 10452 (i8 imm:$src3)))>, 10453 Sched<[sched.Folded, ReadAfterLd]>, 10454 EVEX2VEXOverride<"VPALIGNRrmi">; 10455 10456 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10457 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10458 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", 10459 "$src1, ${src2}"##_.BroadcastStr##", $src3", 10460 (X86VAlign _.RC:$src1, 10461 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), 10462 (i8 imm:$src3))>, EVEX_B, 10463 Sched<[sched.Folded, ReadAfterLd]>; 10464 } 10465 } 10466 10467 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10468 AVX512VLVectorVTInfo _> { 10469 let Predicates = [HasAVX512] in { 10470 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10471 AVX512AIi8Base, EVEX_4V, EVEX_V512; 10472 } 10473 let Predicates = [HasAVX512, HasVLX] in { 10474 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10475 AVX512AIi8Base, EVEX_4V, EVEX_V128; 10476 // We can't really override the 256-bit version so change it back to unset. 10477 let EVEX2VEXOverride = ? in 10478 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10479 AVX512AIi8Base, EVEX_4V, EVEX_V256; 10480 } 10481 } 10482 10483 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10484 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10485 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10486 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10487 VEX_W; 10488 10489 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10490 SchedWriteShuffle, avx512vl_i8_info, 10491 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10492 10493 // Fragments to help convert valignq into masked valignd. Or valignq/valignd 10494 // into vpalignr. 10495 def ValignqImm32XForm : SDNodeXForm<imm, [{ 10496 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10497 }]>; 10498 def ValignqImm8XForm : SDNodeXForm<imm, [{ 10499 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10500 }]>; 10501 def ValigndImm8XForm : SDNodeXForm<imm, [{ 10502 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10503 }]>; 10504 10505 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10506 X86VectorVTInfo From, X86VectorVTInfo To, 10507 SDNodeXForm ImmXForm> { 10508 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10509 (bitconvert 10510 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10511 imm:$src3))), 10512 To.RC:$src0)), 10513 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10514 To.RC:$src1, To.RC:$src2, 10515 (ImmXForm imm:$src3))>; 10516 10517 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10518 (bitconvert 10519 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10520 imm:$src3))), 10521 To.ImmAllZerosV)), 10522 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10523 To.RC:$src1, To.RC:$src2, 10524 (ImmXForm imm:$src3))>; 10525 10526 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10527 (bitconvert 10528 (From.VT (OpNode From.RC:$src1, 10529 (bitconvert (To.LdFrag addr:$src2)), 10530 imm:$src3))), 10531 To.RC:$src0)), 10532 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10533 To.RC:$src1, addr:$src2, 10534 (ImmXForm imm:$src3))>; 10535 10536 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10537 (bitconvert 10538 (From.VT (OpNode From.RC:$src1, 10539 (bitconvert (To.LdFrag addr:$src2)), 10540 imm:$src3))), 10541 To.ImmAllZerosV)), 10542 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 10543 To.RC:$src1, addr:$src2, 10544 (ImmXForm imm:$src3))>; 10545 } 10546 10547 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 10548 X86VectorVTInfo From, 10549 X86VectorVTInfo To, 10550 SDNodeXForm ImmXForm> : 10551 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 10552 def : Pat<(From.VT (OpNode From.RC:$src1, 10553 (bitconvert (To.VT (X86VBroadcast 10554 (To.ScalarLdFrag addr:$src2)))), 10555 imm:$src3)), 10556 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 10557 (ImmXForm imm:$src3))>; 10558 10559 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10560 (bitconvert 10561 (From.VT (OpNode From.RC:$src1, 10562 (bitconvert 10563 (To.VT (X86VBroadcast 10564 (To.ScalarLdFrag addr:$src2)))), 10565 imm:$src3))), 10566 To.RC:$src0)), 10567 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 10568 To.RC:$src1, addr:$src2, 10569 (ImmXForm imm:$src3))>; 10570 10571 def : Pat<(To.VT (vselect To.KRCWM:$mask, 10572 (bitconvert 10573 (From.VT (OpNode From.RC:$src1, 10574 (bitconvert 10575 (To.VT (X86VBroadcast 10576 (To.ScalarLdFrag addr:$src2)))), 10577 imm:$src3))), 10578 To.ImmAllZerosV)), 10579 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 10580 To.RC:$src1, addr:$src2, 10581 (ImmXForm imm:$src3))>; 10582 } 10583 10584 let Predicates = [HasAVX512] in { 10585 // For 512-bit we lower to the widest element type we can. So we only need 10586 // to handle converting valignq to valignd. 10587 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 10588 v16i32_info, ValignqImm32XForm>; 10589 } 10590 10591 let Predicates = [HasVLX] in { 10592 // For 128-bit we lower to the widest element type we can. So we only need 10593 // to handle converting valignq to valignd. 10594 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 10595 v4i32x_info, ValignqImm32XForm>; 10596 // For 256-bit we lower to the widest element type we can. So we only need 10597 // to handle converting valignq to valignd. 10598 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 10599 v8i32x_info, ValignqImm32XForm>; 10600 } 10601 10602 let Predicates = [HasVLX, HasBWI] in { 10603 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 10604 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 10605 v16i8x_info, ValignqImm8XForm>; 10606 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 10607 v16i8x_info, ValigndImm8XForm>; 10608 } 10609 10610 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 10611 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 10612 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 10613 10614 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10615 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10616 let ExeDomain = _.ExeDomain in { 10617 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10618 (ins _.RC:$src1), OpcodeStr, 10619 "$src1", "$src1", 10620 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase, 10621 Sched<[sched]>; 10622 10623 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10624 (ins _.MemOp:$src1), OpcodeStr, 10625 "$src1", "$src1", 10626 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>, 10627 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 10628 Sched<[sched.Folded]>; 10629 } 10630 } 10631 10632 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10633 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 10634 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 10635 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10636 (ins _.ScalarMemOp:$src1), OpcodeStr, 10637 "${src1}"##_.BroadcastStr, 10638 "${src1}"##_.BroadcastStr, 10639 (_.VT (OpNode (X86VBroadcast 10640 (_.ScalarLdFrag addr:$src1))))>, 10641 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 10642 Sched<[sched.Folded]>; 10643 } 10644 10645 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10646 X86SchedWriteWidths sched, 10647 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10648 let Predicates = [prd] in 10649 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10650 EVEX_V512; 10651 10652 let Predicates = [prd, HasVLX] in { 10653 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10654 EVEX_V256; 10655 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10656 EVEX_V128; 10657 } 10658 } 10659 10660 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 10661 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 10662 Predicate prd> { 10663 let Predicates = [prd] in 10664 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 10665 EVEX_V512; 10666 10667 let Predicates = [prd, HasVLX] in { 10668 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 10669 EVEX_V256; 10670 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 10671 EVEX_V128; 10672 } 10673 } 10674 10675 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 10676 SDNode OpNode, X86SchedWriteWidths sched, 10677 Predicate prd> { 10678 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 10679 avx512vl_i64_info, prd>, VEX_W; 10680 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 10681 avx512vl_i32_info, prd>; 10682 } 10683 10684 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 10685 SDNode OpNode, X86SchedWriteWidths sched, 10686 Predicate prd> { 10687 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 10688 avx512vl_i16_info, prd>, VEX_WIG; 10689 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 10690 avx512vl_i8_info, prd>, VEX_WIG; 10691 } 10692 10693 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 10694 bits<8> opc_d, bits<8> opc_q, 10695 string OpcodeStr, SDNode OpNode, 10696 X86SchedWriteWidths sched> { 10697 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 10698 HasAVX512>, 10699 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 10700 HasBWI>; 10701 } 10702 10703 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 10704 SchedWriteVecALU>; 10705 10706 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 10707 let Predicates = [HasAVX512, NoVLX] in { 10708 def : Pat<(v4i64 (abs VR256X:$src)), 10709 (EXTRACT_SUBREG 10710 (VPABSQZrr 10711 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 10712 sub_ymm)>; 10713 def : Pat<(v2i64 (abs VR128X:$src)), 10714 (EXTRACT_SUBREG 10715 (VPABSQZrr 10716 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 10717 sub_xmm)>; 10718 } 10719 10720 // Use 512bit version to implement 128/256 bit. 10721 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 10722 AVX512VLVectorVTInfo _, Predicate prd> { 10723 let Predicates = [prd, NoVLX] in { 10724 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), 10725 (EXTRACT_SUBREG 10726 (!cast<Instruction>(InstrStr # "Zrr") 10727 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10728 _.info256.RC:$src1, 10729 _.info256.SubRegIdx)), 10730 _.info256.SubRegIdx)>; 10731 10732 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), 10733 (EXTRACT_SUBREG 10734 (!cast<Instruction>(InstrStr # "Zrr") 10735 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 10736 _.info128.RC:$src1, 10737 _.info128.SubRegIdx)), 10738 _.info128.SubRegIdx)>; 10739 } 10740 } 10741 10742 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 10743 SchedWriteVecIMul, HasCDI>; 10744 10745 // FIXME: Is there a better scheduler class for VPCONFLICT? 10746 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 10747 SchedWriteVecALU, HasCDI>; 10748 10749 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 10750 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 10751 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 10752 10753 //===---------------------------------------------------------------------===// 10754 // Counts number of ones - VPOPCNTD and VPOPCNTQ 10755 //===---------------------------------------------------------------------===// 10756 10757 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 10758 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 10759 SchedWriteVecALU, HasVPOPCNTDQ>; 10760 10761 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 10762 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 10763 10764 //===---------------------------------------------------------------------===// 10765 // Replicate Single FP - MOVSHDUP and MOVSLDUP 10766 //===---------------------------------------------------------------------===// 10767 10768 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 10769 X86SchedWriteWidths sched> { 10770 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 10771 avx512vl_f32_info, HasAVX512>, XS; 10772 } 10773 10774 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 10775 SchedWriteFShuffle>; 10776 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 10777 SchedWriteFShuffle>; 10778 10779 //===----------------------------------------------------------------------===// 10780 // AVX-512 - MOVDDUP 10781 //===----------------------------------------------------------------------===// 10782 10783 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode, 10784 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10785 let ExeDomain = _.ExeDomain in { 10786 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10787 (ins _.RC:$src), OpcodeStr, "$src", "$src", 10788 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX, 10789 Sched<[sched]>; 10790 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10791 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 10792 (_.VT (OpNode (_.VT (scalar_to_vector 10793 (_.ScalarLdFrag addr:$src)))))>, 10794 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 10795 Sched<[sched.Folded]>; 10796 } 10797 } 10798 10799 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 10800 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 10801 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 10802 VTInfo.info512>, EVEX_V512; 10803 10804 let Predicates = [HasAVX512, HasVLX] in { 10805 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 10806 VTInfo.info256>, EVEX_V256; 10807 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM, 10808 VTInfo.info128>, EVEX_V128; 10809 } 10810 } 10811 10812 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, 10813 X86SchedWriteWidths sched> { 10814 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, 10815 avx512vl_f64_info>, XD, VEX_W; 10816 } 10817 10818 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; 10819 10820 let Predicates = [HasVLX] in { 10821 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), 10822 (VMOVDDUPZ128rm addr:$src)>; 10823 def : Pat<(v2f64 (X86VBroadcast f64:$src)), 10824 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10825 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 10826 (VMOVDDUPZ128rm addr:$src)>; 10827 10828 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10829 (v2f64 VR128X:$src0)), 10830 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 10831 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10832 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 10833 (bitconvert (v4i32 immAllZerosV))), 10834 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 10835 10836 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), 10837 (v2f64 VR128X:$src0)), 10838 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10839 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), 10840 (bitconvert (v4i32 immAllZerosV))), 10841 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10842 10843 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 10844 (v2f64 VR128X:$src0)), 10845 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 10846 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 10847 (bitconvert (v4i32 immAllZerosV))), 10848 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; 10849 } 10850 10851 //===----------------------------------------------------------------------===// 10852 // AVX-512 - Unpack Instructions 10853 //===----------------------------------------------------------------------===// 10854 10855 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, 10856 SchedWriteFShuffleSizes, 0, 1>; 10857 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, 10858 SchedWriteFShuffleSizes>; 10859 10860 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 10861 SchedWriteShuffle, HasBWI>; 10862 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 10863 SchedWriteShuffle, HasBWI>; 10864 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 10865 SchedWriteShuffle, HasBWI>; 10866 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 10867 SchedWriteShuffle, HasBWI>; 10868 10869 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 10870 SchedWriteShuffle, HasAVX512>; 10871 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 10872 SchedWriteShuffle, HasAVX512>; 10873 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 10874 SchedWriteShuffle, HasAVX512>; 10875 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 10876 SchedWriteShuffle, HasAVX512>; 10877 10878 //===----------------------------------------------------------------------===// 10879 // AVX-512 - Extract & Insert Integer Instructions 10880 //===----------------------------------------------------------------------===// 10881 10882 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10883 X86VectorVTInfo _> { 10884 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 10885 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10886 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10887 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), 10888 addr:$dst)]>, 10889 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 10890 } 10891 10892 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 10893 let Predicates = [HasBWI] in { 10894 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 10895 (ins _.RC:$src1, u8imm:$src2), 10896 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10897 [(set GR32orGR64:$dst, 10898 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, 10899 EVEX, TAPD, Sched<[WriteVecExtract]>; 10900 10901 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 10902 } 10903 } 10904 10905 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 10906 let Predicates = [HasBWI] in { 10907 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 10908 (ins _.RC:$src1, u8imm:$src2), 10909 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10910 [(set GR32orGR64:$dst, 10911 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, 10912 EVEX, PD, Sched<[WriteVecExtract]>; 10913 10914 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 10915 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 10916 (ins _.RC:$src1, u8imm:$src2), 10917 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 10918 EVEX, TAPD, FoldGenData<NAME#rr>, 10919 Sched<[WriteVecExtract]>; 10920 10921 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 10922 } 10923 } 10924 10925 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 10926 RegisterClass GRC> { 10927 let Predicates = [HasDQI] in { 10928 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 10929 (ins _.RC:$src1, u8imm:$src2), 10930 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10931 [(set GRC:$dst, 10932 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 10933 EVEX, TAPD, Sched<[WriteVecExtract]>; 10934 10935 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 10936 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 10937 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10938 [(store (extractelt (_.VT _.RC:$src1), 10939 imm:$src2),addr:$dst)]>, 10940 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 10941 Sched<[WriteVecExtractSt]>; 10942 } 10943 } 10944 10945 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 10946 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 10947 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 10948 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 10949 10950 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 10951 X86VectorVTInfo _, PatFrag LdFrag> { 10952 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 10953 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10954 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10955 [(set _.RC:$dst, 10956 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, 10957 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>; 10958 } 10959 10960 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10961 X86VectorVTInfo _, PatFrag LdFrag> { 10962 let Predicates = [HasBWI] in { 10963 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10964 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 10965 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10966 [(set _.RC:$dst, 10967 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, 10968 Sched<[WriteVecInsert]>; 10969 10970 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>; 10971 } 10972 } 10973 10974 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 10975 X86VectorVTInfo _, RegisterClass GRC> { 10976 let Predicates = [HasDQI] in { 10977 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 10978 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 10979 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 10980 [(set _.RC:$dst, 10981 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 10982 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 10983 10984 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 10985 _.ScalarLdFrag>, TAPD; 10986 } 10987 } 10988 10989 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 10990 extloadi8>, TAPD, VEX_WIG; 10991 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 10992 extloadi16>, PD, VEX_WIG; 10993 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 10994 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 10995 10996 //===----------------------------------------------------------------------===// 10997 // VSHUFPS - VSHUFPD Operations 10998 //===----------------------------------------------------------------------===// 10999 11000 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, 11001 AVX512VLVectorVTInfo VTInfo_FP>{ 11002 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11003 SchedWriteFShuffle>, 11004 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11005 AVX512AIi8Base, EVEX_4V; 11006 } 11007 11008 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; 11009 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; 11010 11011 //===----------------------------------------------------------------------===// 11012 // AVX-512 - Byte shift Left/Right 11013 //===----------------------------------------------------------------------===// 11014 11015 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? 11016 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11017 Format MRMm, string OpcodeStr, 11018 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11019 def rr : AVX512<opc, MRMr, 11020 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11022 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, 11023 Sched<[sched]>; 11024 def rm : AVX512<opc, MRMm, 11025 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11026 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11027 [(set _.RC:$dst,(_.VT (OpNode 11028 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11029 (i8 imm:$src2))))]>, 11030 Sched<[sched.Folded, ReadAfterLd]>; 11031 } 11032 11033 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11034 Format MRMm, string OpcodeStr, 11035 X86SchedWriteWidths sched, Predicate prd>{ 11036 let Predicates = [prd] in 11037 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11038 sched.ZMM, v64i8_info>, EVEX_V512; 11039 let Predicates = [prd, HasVLX] in { 11040 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11041 sched.YMM, v32i8x_info>, EVEX_V256; 11042 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11043 sched.XMM, v16i8x_info>, EVEX_V128; 11044 } 11045 } 11046 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11047 SchedWriteShuffle, HasBWI>, 11048 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11049 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11050 SchedWriteShuffle, HasBWI>, 11051 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11052 11053 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11054 string OpcodeStr, X86FoldableSchedWrite sched, 11055 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11056 def rr : AVX512BI<opc, MRMSrcReg, 11057 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11058 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11059 [(set _dst.RC:$dst,(_dst.VT 11060 (OpNode (_src.VT _src.RC:$src1), 11061 (_src.VT _src.RC:$src2))))]>, 11062 Sched<[sched]>; 11063 def rm : AVX512BI<opc, MRMSrcMem, 11064 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11065 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11066 [(set _dst.RC:$dst,(_dst.VT 11067 (OpNode (_src.VT _src.RC:$src1), 11068 (_src.VT (bitconvert 11069 (_src.LdFrag addr:$src2))))))]>, 11070 Sched<[sched.Folded, ReadAfterLd]>; 11071 } 11072 11073 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11074 string OpcodeStr, X86SchedWriteWidths sched, 11075 Predicate prd> { 11076 let Predicates = [prd] in 11077 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11078 v8i64_info, v64i8_info>, EVEX_V512; 11079 let Predicates = [prd, HasVLX] in { 11080 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11081 v4i64x_info, v32i8x_info>, EVEX_V256; 11082 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11083 v2i64x_info, v16i8x_info>, EVEX_V128; 11084 } 11085 } 11086 11087 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11088 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11089 11090 // Transforms to swizzle an immediate to enable better matching when 11091 // memory operand isn't in the right place. 11092 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{ 11093 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11094 uint8_t Imm = N->getZExtValue(); 11095 // Swap bits 1/4 and 3/6. 11096 uint8_t NewImm = Imm & 0xa5; 11097 if (Imm & 0x02) NewImm |= 0x10; 11098 if (Imm & 0x10) NewImm |= 0x02; 11099 if (Imm & 0x08) NewImm |= 0x40; 11100 if (Imm & 0x40) NewImm |= 0x08; 11101 return getI8Imm(NewImm, SDLoc(N)); 11102 }]>; 11103 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{ 11104 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11105 uint8_t Imm = N->getZExtValue(); 11106 // Swap bits 2/4 and 3/5. 11107 uint8_t NewImm = Imm & 0xc3; 11108 if (Imm & 0x04) NewImm |= 0x10; 11109 if (Imm & 0x10) NewImm |= 0x04; 11110 if (Imm & 0x08) NewImm |= 0x20; 11111 if (Imm & 0x20) NewImm |= 0x08; 11112 return getI8Imm(NewImm, SDLoc(N)); 11113 }]>; 11114 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{ 11115 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11116 uint8_t Imm = N->getZExtValue(); 11117 // Swap bits 1/2 and 5/6. 11118 uint8_t NewImm = Imm & 0x99; 11119 if (Imm & 0x02) NewImm |= 0x04; 11120 if (Imm & 0x04) NewImm |= 0x02; 11121 if (Imm & 0x20) NewImm |= 0x40; 11122 if (Imm & 0x40) NewImm |= 0x20; 11123 return getI8Imm(NewImm, SDLoc(N)); 11124 }]>; 11125 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{ 11126 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11127 uint8_t Imm = N->getZExtValue(); 11128 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11129 uint8_t NewImm = Imm & 0x81; 11130 if (Imm & 0x02) NewImm |= 0x04; 11131 if (Imm & 0x04) NewImm |= 0x10; 11132 if (Imm & 0x08) NewImm |= 0x40; 11133 if (Imm & 0x10) NewImm |= 0x02; 11134 if (Imm & 0x20) NewImm |= 0x08; 11135 if (Imm & 0x40) NewImm |= 0x20; 11136 return getI8Imm(NewImm, SDLoc(N)); 11137 }]>; 11138 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{ 11139 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11140 uint8_t Imm = N->getZExtValue(); 11141 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11142 uint8_t NewImm = Imm & 0x81; 11143 if (Imm & 0x02) NewImm |= 0x10; 11144 if (Imm & 0x04) NewImm |= 0x02; 11145 if (Imm & 0x08) NewImm |= 0x20; 11146 if (Imm & 0x10) NewImm |= 0x04; 11147 if (Imm & 0x20) NewImm |= 0x40; 11148 if (Imm & 0x40) NewImm |= 0x08; 11149 return getI8Imm(NewImm, SDLoc(N)); 11150 }]>; 11151 11152 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11153 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11154 string Name>{ 11155 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11156 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11157 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11158 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11159 (OpNode (_.VT _.RC:$src1), 11160 (_.VT _.RC:$src2), 11161 (_.VT _.RC:$src3), 11162 (i8 imm:$src4)), 1, 1>, 11163 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11164 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11165 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11166 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11167 (OpNode (_.VT _.RC:$src1), 11168 (_.VT _.RC:$src2), 11169 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11170 (i8 imm:$src4)), 1, 0>, 11171 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11172 Sched<[sched.Folded, ReadAfterLd]>; 11173 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11174 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11175 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11176 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11177 (OpNode (_.VT _.RC:$src1), 11178 (_.VT _.RC:$src2), 11179 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), 11180 (i8 imm:$src4)), 1, 0>, EVEX_B, 11181 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11182 Sched<[sched.Folded, ReadAfterLd]>; 11183 }// Constraints = "$src1 = $dst" 11184 11185 // Additional patterns for matching passthru operand in other positions. 11186 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11187 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11188 _.RC:$src1)), 11189 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11190 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11191 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11192 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), 11193 _.RC:$src1)), 11194 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11195 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11196 11197 // Additional patterns for matching loads in other positions. 11198 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), 11199 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), 11200 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11201 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11202 def : Pat<(_.VT (OpNode _.RC:$src1, 11203 (bitconvert (_.LdFrag addr:$src3)), 11204 _.RC:$src2, (i8 imm:$src4))), 11205 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, 11206 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11207 11208 // Additional patterns for matching zero masking with loads in other 11209 // positions. 11210 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11211 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11212 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11213 _.ImmAllZerosV)), 11214 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11215 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11216 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11217 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11218 _.RC:$src2, (i8 imm:$src4)), 11219 _.ImmAllZerosV)), 11220 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11221 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11222 11223 // Additional patterns for matching masked loads with different 11224 // operand orders. 11225 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11226 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11227 _.RC:$src2, (i8 imm:$src4)), 11228 _.RC:$src1)), 11229 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11230 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11231 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11232 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11233 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11234 _.RC:$src1)), 11235 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11236 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11237 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11238 (OpNode _.RC:$src2, _.RC:$src1, 11239 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), 11240 _.RC:$src1)), 11241 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11242 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11243 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11244 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11245 _.RC:$src1, (i8 imm:$src4)), 11246 _.RC:$src1)), 11247 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11248 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; 11249 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11250 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11251 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), 11252 _.RC:$src1)), 11253 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11254 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; 11255 11256 // Additional patterns for matching broadcasts in other positions. 11257 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11258 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), 11259 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11260 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11261 def : Pat<(_.VT (OpNode _.RC:$src1, 11262 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11263 _.RC:$src2, (i8 imm:$src4))), 11264 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, 11265 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11266 11267 // Additional patterns for matching zero masking with broadcasts in other 11268 // positions. 11269 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11270 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11271 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11272 _.ImmAllZerosV)), 11273 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11274 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11275 (VPTERNLOG321_imm8 imm:$src4))>; 11276 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11277 (OpNode _.RC:$src1, 11278 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11279 _.RC:$src2, (i8 imm:$src4)), 11280 _.ImmAllZerosV)), 11281 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11282 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11283 (VPTERNLOG132_imm8 imm:$src4))>; 11284 11285 // Additional patterns for matching masked broadcasts with different 11286 // operand orders. 11287 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11288 (OpNode _.RC:$src1, 11289 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11290 _.RC:$src2, (i8 imm:$src4)), 11291 _.RC:$src1)), 11292 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11293 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; 11294 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11295 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11296 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), 11297 _.RC:$src1)), 11298 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11299 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; 11300 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11301 (OpNode _.RC:$src2, _.RC:$src1, 11302 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11303 (i8 imm:$src4)), _.RC:$src1)), 11304 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11305 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; 11306 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11307 (OpNode _.RC:$src2, 11308 (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11309 _.RC:$src1, (i8 imm:$src4)), 11310 _.RC:$src1)), 11311 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11312 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; 11313 def : Pat<(_.VT (vselect _.KRCWM:$mask, 11314 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), 11315 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), 11316 _.RC:$src1)), 11317 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11318 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; 11319 } 11320 11321 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11322 AVX512VLVectorVTInfo _> { 11323 let Predicates = [HasAVX512] in 11324 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11325 _.info512, NAME>, EVEX_V512; 11326 let Predicates = [HasAVX512, HasVLX] in { 11327 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11328 _.info128, NAME>, EVEX_V128; 11329 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11330 _.info256, NAME>, EVEX_V256; 11331 } 11332 } 11333 11334 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11335 avx512vl_i32_info>; 11336 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11337 avx512vl_i64_info>, VEX_W; 11338 11339 // Patterns to implement vnot using vpternlog instead of creating all ones 11340 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11341 // so that the result is only dependent on src0. But we use the same source 11342 // for all operands to prevent a false dependency. 11343 // TODO: We should maybe have a more generalized algorithm for folding to 11344 // vpternlog. 11345 let Predicates = [HasAVX512] in { 11346 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))), 11347 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11348 } 11349 11350 let Predicates = [HasAVX512, NoVLX] in { 11351 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))), 11352 (EXTRACT_SUBREG 11353 (VPTERNLOGQZrri 11354 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11355 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11356 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11357 (i8 15)), sub_xmm)>; 11358 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))), 11359 (EXTRACT_SUBREG 11360 (VPTERNLOGQZrri 11361 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11362 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11363 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11364 (i8 15)), sub_ymm)>; 11365 } 11366 11367 let Predicates = [HasVLX] in { 11368 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))), 11369 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11370 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))), 11371 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11372 } 11373 11374 //===----------------------------------------------------------------------===// 11375 // AVX-512 - FixupImm 11376 //===----------------------------------------------------------------------===// 11377 11378 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 11379 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11380 X86VectorVTInfo TblVT>{ 11381 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11382 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11383 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11384 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11385 (OpNode (_.VT _.RC:$src1), 11386 (_.VT _.RC:$src2), 11387 (TblVT.VT _.RC:$src3), 11388 (i32 imm:$src4), 11389 (i32 FROUND_CURRENT))>, Sched<[sched]>; 11390 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11391 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11392 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11393 (OpNode (_.VT _.RC:$src1), 11394 (_.VT _.RC:$src2), 11395 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11396 (i32 imm:$src4), 11397 (i32 FROUND_CURRENT))>, 11398 Sched<[sched.Folded, ReadAfterLd]>; 11399 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11400 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11401 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", 11402 "$src2, ${src3}"##_.BroadcastStr##", $src4", 11403 (OpNode (_.VT _.RC:$src1), 11404 (_.VT _.RC:$src2), 11405 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), 11406 (i32 imm:$src4), 11407 (i32 FROUND_CURRENT))>, 11408 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 11409 } // Constraints = "$src1 = $dst" 11410 } 11411 11412 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11413 SDNode OpNode, X86FoldableSchedWrite sched, 11414 X86VectorVTInfo _, X86VectorVTInfo TblVT>{ 11415 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11416 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11417 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11418 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11419 "$src2, $src3, {sae}, $src4", 11420 (OpNode (_.VT _.RC:$src1), 11421 (_.VT _.RC:$src2), 11422 (TblVT.VT _.RC:$src3), 11423 (i32 imm:$src4), 11424 (i32 FROUND_NO_EXC))>, 11425 EVEX_B, Sched<[sched]>; 11426 } 11427 } 11428 11429 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 11430 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11431 X86VectorVTInfo _src3VT> { 11432 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11433 ExeDomain = _.ExeDomain in { 11434 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11435 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11436 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11437 (OpNode (_.VT _.RC:$src1), 11438 (_.VT _.RC:$src2), 11439 (_src3VT.VT _src3VT.RC:$src3), 11440 (i32 imm:$src4), 11441 (i32 FROUND_CURRENT))>, Sched<[sched]>; 11442 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11443 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11444 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", 11445 "$src2, $src3, {sae}, $src4", 11446 (OpNode (_.VT _.RC:$src1), 11447 (_.VT _.RC:$src2), 11448 (_src3VT.VT _src3VT.RC:$src3), 11449 (i32 imm:$src4), 11450 (i32 FROUND_NO_EXC))>, 11451 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; 11452 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11453 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11454 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11455 (OpNode (_.VT _.RC:$src1), 11456 (_.VT _.RC:$src2), 11457 (_src3VT.VT (scalar_to_vector 11458 (_src3VT.ScalarLdFrag addr:$src3))), 11459 (i32 imm:$src4), 11460 (i32 FROUND_CURRENT))>, 11461 Sched<[sched.Folded, ReadAfterLd]>; 11462 } 11463 } 11464 11465 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11466 AVX512VLVectorVTInfo _Vec, 11467 AVX512VLVectorVTInfo _Tbl> { 11468 let Predicates = [HasAVX512] in 11469 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, 11470 _Vec.info512, _Tbl.info512>, 11471 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, 11472 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11473 EVEX_4V, EVEX_V512; 11474 let Predicates = [HasAVX512, HasVLX] in { 11475 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM, 11476 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11477 EVEX_4V, EVEX_V128; 11478 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM, 11479 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11480 EVEX_4V, EVEX_V256; 11481 } 11482 } 11483 11484 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, 11485 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11486 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11487 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, 11488 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11489 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11490 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11491 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11492 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11493 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 11494 11495 // Patterns used to select SSE scalar fp arithmetic instructions from 11496 // either: 11497 // 11498 // (1) a scalar fp operation followed by a blend 11499 // 11500 // The effect is that the backend no longer emits unnecessary vector 11501 // insert instructions immediately after SSE scalar fp instructions 11502 // like addss or mulss. 11503 // 11504 // For example, given the following code: 11505 // __m128 foo(__m128 A, __m128 B) { 11506 // A[0] += B[0]; 11507 // return A; 11508 // } 11509 // 11510 // Previously we generated: 11511 // addss %xmm0, %xmm1 11512 // movss %xmm1, %xmm0 11513 // 11514 // We now generate: 11515 // addss %xmm1, %xmm0 11516 // 11517 // (2) a vector packed single/double fp operation followed by a vector insert 11518 // 11519 // The effect is that the backend converts the packed fp instruction 11520 // followed by a vector insert into a single SSE scalar fp instruction. 11521 // 11522 // For example, given the following code: 11523 // __m128 foo(__m128 A, __m128 B) { 11524 // __m128 C = A + B; 11525 // return (__m128) {c[0], a[1], a[2], a[3]}; 11526 // } 11527 // 11528 // Previously we generated: 11529 // addps %xmm0, %xmm1 11530 // movss %xmm1, %xmm0 11531 // 11532 // We now generate: 11533 // addss %xmm1, %xmm0 11534 11535 // TODO: Some canonicalization in lowering would simplify the number of 11536 // patterns we have to try to match. 11537 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode, 11538 X86VectorVTInfo _, PatLeaf ZeroFP> { 11539 let Predicates = [HasAVX512] in { 11540 // extracted scalar math op with insert via movss 11541 def : Pat<(MoveNode 11542 (_.VT VR128X:$dst), 11543 (_.VT (scalar_to_vector 11544 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 11545 _.FRC:$src)))), 11546 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, 11547 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 11548 11549 // extracted masked scalar math op with insert via movss 11550 def : Pat<(MoveNode (_.VT VR128X:$src1), 11551 (scalar_to_vector 11552 (X86selects VK1WM:$mask, 11553 (Op (_.EltVT 11554 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11555 _.FRC:$src2), 11556 _.FRC:$src0))), 11557 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) 11558 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 11559 VK1WM:$mask, _.VT:$src1, 11560 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11561 11562 // extracted masked scalar math op with insert via movss 11563 def : Pat<(MoveNode (_.VT VR128X:$src1), 11564 (scalar_to_vector 11565 (X86selects VK1WM:$mask, 11566 (Op (_.EltVT 11567 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 11568 _.FRC:$src2), (_.EltVT ZeroFP)))), 11569 (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 11570 VK1WM:$mask, _.VT:$src1, 11571 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 11572 } 11573 } 11574 11575 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 11576 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 11577 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 11578 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 11579 11580 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 11581 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 11582 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 11583 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 11584 11585 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, 11586 SDNode Move, X86VectorVTInfo _> { 11587 let Predicates = [HasAVX512] in { 11588 def : Pat<(_.VT (Move _.VT:$dst, 11589 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11590 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>; 11591 } 11592 } 11593 11594 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 11595 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 11596 11597 multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, 11598 SDNode Move, X86VectorVTInfo _, 11599 bits<8> ImmV> { 11600 let Predicates = [HasAVX512] in { 11601 def : Pat<(_.VT (Move _.VT:$dst, 11602 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 11603 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src, 11604 (i32 ImmV))>; 11605 } 11606 } 11607 11608 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss, 11609 v4f32x_info, 0x01>; 11610 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss, 11611 v4f32x_info, 0x02>; 11612 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd, 11613 v2f64x_info, 0x01>; 11614 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd, 11615 v2f64x_info, 0x02>; 11616 11617 //===----------------------------------------------------------------------===// 11618 // AES instructions 11619 //===----------------------------------------------------------------------===// 11620 11621 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 11622 let Predicates = [HasVLX, HasVAES] in { 11623 defm Z128 : AESI_binop_rm_int<Op, OpStr, 11624 !cast<Intrinsic>(IntPrefix), 11625 loadv2i64, 0, VR128X, i128mem>, 11626 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 11627 defm Z256 : AESI_binop_rm_int<Op, OpStr, 11628 !cast<Intrinsic>(IntPrefix##"_256"), 11629 loadv4i64, 0, VR256X, i256mem>, 11630 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 11631 } 11632 let Predicates = [HasAVX512, HasVAES] in 11633 defm Z : AESI_binop_rm_int<Op, OpStr, 11634 !cast<Intrinsic>(IntPrefix##"_512"), 11635 loadv8i64, 0, VR512, i512mem>, 11636 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 11637 } 11638 11639 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 11640 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 11641 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 11642 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 11643 11644 //===----------------------------------------------------------------------===// 11645 // PCLMUL instructions - Carry less multiplication 11646 //===----------------------------------------------------------------------===// 11647 11648 let Predicates = [HasAVX512, HasVPCLMULQDQ] in 11649 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 11650 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 11651 11652 let Predicates = [HasVLX, HasVPCLMULQDQ] in { 11653 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 11654 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 11655 11656 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 11657 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 11658 EVEX_CD8<64, CD8VF>, VEX_WIG; 11659 } 11660 11661 // Aliases 11662 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 11663 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 11664 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 11665 11666 //===----------------------------------------------------------------------===// 11667 // VBMI2 11668 //===----------------------------------------------------------------------===// 11669 11670 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 11671 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11672 let Constraints = "$src1 = $dst", 11673 ExeDomain = VTI.ExeDomain in { 11674 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11675 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11676 "$src3, $src2", "$src2, $src3", 11677 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 11678 AVX512FMA3Base, Sched<[sched]>; 11679 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11680 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11681 "$src3, $src2", "$src2, $src3", 11682 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11683 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>, 11684 AVX512FMA3Base, 11685 Sched<[sched.Folded, ReadAfterLd]>; 11686 } 11687 } 11688 11689 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11690 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 11691 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 11692 let Constraints = "$src1 = $dst", 11693 ExeDomain = VTI.ExeDomain in 11694 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11695 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 11696 "${src3}"##VTI.BroadcastStr##", $src2", 11697 "$src2, ${src3}"##VTI.BroadcastStr, 11698 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11699 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, 11700 AVX512FMA3Base, EVEX_B, 11701 Sched<[sched.Folded, ReadAfterLd]>; 11702 } 11703 11704 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 11705 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11706 let Predicates = [HasVBMI2] in 11707 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11708 EVEX_V512; 11709 let Predicates = [HasVBMI2, HasVLX] in { 11710 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11711 EVEX_V256; 11712 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11713 EVEX_V128; 11714 } 11715 } 11716 11717 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 11718 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11719 let Predicates = [HasVBMI2] in 11720 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 11721 EVEX_V512; 11722 let Predicates = [HasVBMI2, HasVLX] in { 11723 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 11724 EVEX_V256; 11725 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 11726 EVEX_V128; 11727 } 11728 } 11729 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 11730 SDNode OpNode, X86SchedWriteWidths sched> { 11731 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, 11732 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 11733 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, 11734 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11735 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, 11736 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 11737 } 11738 11739 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 11740 SDNode OpNode, X86SchedWriteWidths sched> { 11741 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, 11742 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 11743 VEX_W, EVEX_CD8<16, CD8VF>; 11744 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, 11745 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11746 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, 11747 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11748 } 11749 11750 // Concat & Shift 11751 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 11752 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 11753 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 11754 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 11755 11756 // Compress 11757 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 11758 avx512vl_i8_info, HasVBMI2>, EVEX, 11759 NotMemoryFoldable; 11760 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 11761 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 11762 NotMemoryFoldable; 11763 // Expand 11764 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 11765 avx512vl_i8_info, HasVBMI2>, EVEX; 11766 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 11767 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 11768 11769 //===----------------------------------------------------------------------===// 11770 // VNNI 11771 //===----------------------------------------------------------------------===// 11772 11773 let Constraints = "$src1 = $dst" in 11774 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 11775 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11776 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 11777 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 11778 "$src3, $src2", "$src2, $src3", 11779 (VTI.VT (OpNode VTI.RC:$src1, 11780 VTI.RC:$src2, VTI.RC:$src3))>, 11781 EVEX_4V, T8PD, Sched<[sched]>; 11782 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11783 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 11784 "$src3, $src2", "$src2, $src3", 11785 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 11786 (VTI.VT (bitconvert 11787 (VTI.LdFrag addr:$src3)))))>, 11788 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 11789 Sched<[sched.Folded, ReadAfterLd]>; 11790 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11791 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 11792 OpStr, "${src3}"##VTI.BroadcastStr##", $src2", 11793 "$src2, ${src3}"##VTI.BroadcastStr, 11794 (OpNode VTI.RC:$src1, VTI.RC:$src2, 11795 (VTI.VT (X86VBroadcast 11796 (VTI.ScalarLdFrag addr:$src3))))>, 11797 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 11798 T8PD, Sched<[sched.Folded, ReadAfterLd]>; 11799 } 11800 11801 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 11802 X86SchedWriteWidths sched> { 11803 let Predicates = [HasVNNI] in 11804 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512; 11805 let Predicates = [HasVNNI, HasVLX] in { 11806 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256; 11807 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128; 11808 } 11809 } 11810 11811 // FIXME: Is there a better scheduler class for VPDP? 11812 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; 11813 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; 11814 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; 11815 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; 11816 11817 //===----------------------------------------------------------------------===// 11818 // Bit Algorithms 11819 //===----------------------------------------------------------------------===// 11820 11821 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 11822 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 11823 avx512vl_i8_info, HasBITALG>; 11824 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 11825 avx512vl_i16_info, HasBITALG>, VEX_W; 11826 11827 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 11828 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 11829 11830 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 11831 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 11832 (ins VTI.RC:$src1, VTI.RC:$src2), 11833 "vpshufbitqmb", 11834 "$src2, $src1", "$src1, $src2", 11835 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11836 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 11837 Sched<[sched]>; 11838 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 11839 (ins VTI.RC:$src1, VTI.MemOp:$src2), 11840 "vpshufbitqmb", 11841 "$src2, $src1", "$src1, $src2", 11842 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 11843 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, 11844 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 11845 Sched<[sched.Folded, ReadAfterLd]>; 11846 } 11847 11848 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 11849 let Predicates = [HasBITALG] in 11850 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 11851 let Predicates = [HasBITALG, HasVLX] in { 11852 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 11853 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 11854 } 11855 } 11856 11857 // FIXME: Is there a better scheduler class for VPSHUFBITQMB? 11858 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 11859 11860 //===----------------------------------------------------------------------===// 11861 // GFNI 11862 //===----------------------------------------------------------------------===// 11863 11864 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11865 X86SchedWriteWidths sched> { 11866 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11867 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 11868 EVEX_V512; 11869 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11870 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 11871 EVEX_V256; 11872 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 11873 EVEX_V128; 11874 } 11875 } 11876 11877 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 11878 SchedWriteVecALU>, 11879 EVEX_CD8<8, CD8VF>, T8PD; 11880 11881 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 11882 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 11883 X86VectorVTInfo BcstVTI> 11884 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 11885 let ExeDomain = VTI.ExeDomain in 11886 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 11887 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 11888 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", 11889 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", 11890 (OpNode (VTI.VT VTI.RC:$src1), 11891 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), 11892 (i8 imm:$src3))>, EVEX_B, 11893 Sched<[sched.Folded, ReadAfterLd]>; 11894 } 11895 11896 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 11897 X86SchedWriteWidths sched> { 11898 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 11899 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 11900 v64i8_info, v8i64_info>, EVEX_V512; 11901 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 11902 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 11903 v32i8x_info, v4i64x_info>, EVEX_V256; 11904 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 11905 v16i8x_info, v2i64x_info>, EVEX_V128; 11906 } 11907 } 11908 11909 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 11910 X86GF2P8affineinvqb, SchedWriteVecIMul>, 11911 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11912 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 11913 X86GF2P8affineqb, SchedWriteVecIMul>, 11914 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 11915 11916 11917 //===----------------------------------------------------------------------===// 11918 // AVX5124FMAPS 11919 //===----------------------------------------------------------------------===// 11920 11921 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 11922 Constraints = "$src1 = $dst" in { 11923 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 11924 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11925 "v4fmaddps", "$src3, $src2", "$src2, $src3", 11926 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11927 Sched<[SchedWriteFMA.ZMM.Folded]>; 11928 11929 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 11930 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11931 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 11932 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11933 Sched<[SchedWriteFMA.ZMM.Folded]>; 11934 11935 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 11936 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 11937 "v4fmaddss", "$src3, $src2", "$src2, $src3", 11938 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 11939 Sched<[SchedWriteFMA.Scl.Folded]>; 11940 11941 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 11942 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 11943 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 11944 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 11945 Sched<[SchedWriteFMA.Scl.Folded]>; 11946 } 11947 11948 //===----------------------------------------------------------------------===// 11949 // AVX5124VNNIW 11950 //===----------------------------------------------------------------------===// 11951 11952 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 11953 Constraints = "$src1 = $dst" in { 11954 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 11955 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11956 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 11957 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11958 Sched<[SchedWriteFMA.ZMM.Folded]>; 11959 11960 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 11961 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 11962 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 11963 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 11964 Sched<[SchedWriteFMA.ZMM.Folded]>; 11965 } 11966 11967